From edbd61e1362978150b5681125d93cb2bffbb5c53 Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Fri, 21 Nov 2014 11:11:06 -0800 Subject: [PATCH] vp9_ethread: modify VP9_COMP structure This patch modified struct VP9_COMP. Created a struct ThreadData to include data that need to be copied for each thread. In multiple thread case, one thread processes one tile. all threads share one copy of VP9_COMP, (refer to VP9_COMP *cpi in the code) but each thread has its own copy of ThreadData, (refer to ThreadData *td in the code). Therefore, within the scope of encode_tiles(), both cpi and td need to be passed as function parameters. In single thread case, the FRAME_COUNTS pointer in ThreadData points to "counts" in VP9_COMMON. Change-Id: Ib37908b2d8e2c0f4f9c18f38017df5ce60e8b13e --- vp9/encoder/vp9_aq_complexity.c | 7 +- vp9/encoder/vp9_aq_complexity.h | 4 +- vp9/encoder/vp9_bitstream.c | 73 ++--- vp9/encoder/vp9_context_tree.c | 42 +-- vp9/encoder/vp9_context_tree.h | 5 +- vp9/encoder/vp9_encodeframe.c | 551 ++++++++++++++++++++------------------ vp9/encoder/vp9_encodemv.c | 11 +- vp9/encoder/vp9_encodemv.h | 5 +- vp9/encoder/vp9_encoder.c | 69 +++-- vp9/encoder/vp9_encoder.h | 21 +- vp9/encoder/vp9_firstpass.c | 4 +- vp9/encoder/vp9_mbgraph.c | 12 +- vp9/encoder/vp9_picklpf.c | 2 +- vp9/encoder/vp9_pickmode.c | 2 +- vp9/encoder/vp9_quantize.c | 2 +- vp9/encoder/vp9_rd.c | 21 +- vp9/encoder/vp9_rd.h | 3 +- vp9/encoder/vp9_rdopt.c | 21 +- vp9/encoder/vp9_segmentation.c | 4 - vp9/encoder/vp9_speed_features.c | 2 +- vp9/encoder/vp9_temporal_filter.c | 17 +- vp9/encoder/vp9_tokenize.c | 31 ++- vp9/encoder/vp9_tokenize.h | 5 +- 23 files changed, 477 insertions(+), 437 deletions(-) diff --git a/vp9/encoder/vp9_aq_complexity.c b/vp9/encoder/vp9_aq_complexity.c index 3762c53..83f4a53 100644 --- a/vp9/encoder/vp9_aq_complexity.c +++ b/vp9/encoder/vp9_aq_complexity.c @@ -96,7 +96,8 @@ void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) { // An "aq_strength" value determines how many segments are supported, // the set of transition points to use and the extent of the quantizer // adjustment for each segment (configured in vp9_setup_in_frame_q_adj()). -void vp9_select_in_frame_q_segment(VP9_COMP *cpi, BLOCK_SIZE bs, +void vp9_select_in_frame_q_segment(VP9_COMP *cpi, MACROBLOCK *mb, + BLOCK_SIZE bs, int mi_row, int mi_col, int output_enabled, int projected_rate) { VP9_COMMON *const cm = &cpi->common; @@ -122,8 +123,8 @@ void vp9_select_in_frame_q_segment(VP9_COMP *cpi, BLOCK_SIZE bs, const int active_segments = aq_c_active_segments[aq_strength]; double logvar; - vp9_setup_src_planes(&cpi->mb, cpi->Source, mi_row, mi_col); - logvar = vp9_log_block_var(cpi, &cpi->mb, bs); + vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col); + logvar = vp9_log_block_var(cpi, mb, bs); // The number of segments considered and the transition points used to // select them is determined by the "aq_strength" value. diff --git a/vp9/encoder/vp9_aq_complexity.h b/vp9/encoder/vp9_aq_complexity.h index 6f82aac..3f885e4 100644 --- a/vp9/encoder/vp9_aq_complexity.h +++ b/vp9/encoder/vp9_aq_complexity.h @@ -17,9 +17,11 @@ extern "C" { #endif struct VP9_COMP; +struct macroblock; // Select a segment for the current SB64. -void vp9_select_in_frame_q_segment(struct VP9_COMP *cpi, BLOCK_SIZE bs, +void vp9_select_in_frame_q_segment(struct VP9_COMP *cpi, struct macroblock *x, + BLOCK_SIZE bs, int mi_row, int mi_col, int output_enabled, int projected_rate); diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index a3a761a..ebc633e 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -104,19 +104,21 @@ static int write_skip(const VP9_COMMON *cm, const MACROBLOCKD *xd, } } -static void update_skip_probs(VP9_COMMON *cm, vp9_writer *w) { +static void update_skip_probs(VP9_COMMON *cm, vp9_writer *w, + FRAME_COUNTS *counts) { int k; for (k = 0; k < SKIP_CONTEXTS; ++k) - vp9_cond_prob_diff_update(w, &cm->fc->skip_probs[k], cm->counts.skip[k]); + vp9_cond_prob_diff_update(w, &cm->fc->skip_probs[k], counts->skip[k]); } -static void update_switchable_interp_probs(VP9_COMMON *cm, vp9_writer *w) { +static void update_switchable_interp_probs(VP9_COMMON *cm, vp9_writer *w, + FRAME_COUNTS *counts) { int j; for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) prob_diff_update(vp9_switchable_interp_tree, cm->fc->switchable_interp_prob[j], - cm->counts.switchable_interp[j], SWITCHABLE_FILTERS, w); + counts->switchable_interp[j], SWITCHABLE_FILTERS, w); } static void pack_mb_tokens(vp9_writer *w, @@ -238,7 +240,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, vp9_writer *w) { VP9_COMMON *const cm = &cpi->common; const nmv_context *nmvc = &cm->fc->nmvc; - const MACROBLOCK *const x = &cpi->mb; + const MACROBLOCK *const x = &cpi->td.mb; const MACROBLOCKD *const xd = &x->e_mbd; const struct segmentation *const seg = &cm->seg; const MB_MODE_INFO *const mbmi = &mi->mbmi; @@ -297,7 +299,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, if (!vp9_segfeature_active(seg, segment_id, SEG_LVL_SKIP)) { if (bsize >= BLOCK_8X8) { write_inter_mode(w, mode, inter_probs); - ++cm->counts.inter_mode[mode_ctx][INTER_OFFSET(mode)]; + ++cpi->td.counts->inter_mode[mode_ctx][INTER_OFFSET(mode)]; } } @@ -320,7 +322,7 @@ static void pack_inter_mode_mvs(VP9_COMP *cpi, const MODE_INFO *mi, const int j = idy * 2 + idx; const PREDICTION_MODE b_mode = mi->bmi[j].as_mode; write_inter_mode(w, b_mode, inter_probs); - ++cm->counts.inter_mode[mode_ctx][INTER_OFFSET(b_mode)]; + ++cpi->td.counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)]; if (b_mode == NEWMV) { for (ref = 0; ref < 1 + is_compound; ++ref) vp9_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv, @@ -382,7 +384,7 @@ static void write_modes_b(VP9_COMP *cpi, const TileInfo *const tile, const TOKENEXTRA *const tok_end, int mi_row, int mi_col) { const VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; + MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; MODE_INFO *m; xd->mi = cm->mi + (mi_row * cm->mi_stride + mi_col); @@ -429,7 +431,7 @@ static void write_modes_sb(VP9_COMP *cpi, TOKENEXTRA **tok, const TOKENEXTRA *const tok_end, int mi_row, int mi_col, BLOCK_SIZE bsize) { const VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; + MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; const int bsl = b_width_log2_lookup[bsize]; const int bs = (1 << bsl) / 4; @@ -485,11 +487,12 @@ static void write_modes_sb(VP9_COMP *cpi, static void write_modes(VP9_COMP *cpi, const TileInfo *const tile, vp9_writer *w, TOKENEXTRA **tok, const TOKENEXTRA *const tok_end) { + MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; int mi_row, mi_col; for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; mi_row += MI_BLOCK_SIZE) { - vp9_zero(cpi->mb.e_mbd.left_seg_context); + vp9_zero(xd->left_seg_context); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, @@ -500,7 +503,7 @@ static void write_modes(VP9_COMP *cpi, static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size, vp9_coeff_stats *coef_branch_ct, vp9_coeff_probs_model *coef_probs) { - vp9_coeff_count *coef_counts = cpi->frame_counts->coef_counts[tx_size]; + vp9_coeff_count *coef_counts = cpi->td.rd_counts.coef_counts[tx_size]; unsigned int (*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] = cpi->common.counts.eob_branch[tx_size]; int i, j, k, l, m; @@ -813,7 +816,8 @@ static void encode_segmentation(VP9_COMMON *cm, MACROBLOCKD *xd, } } -static void encode_txfm_probs(VP9_COMMON *cm, vp9_writer *w) { +static void encode_txfm_probs(VP9_COMMON *cm, vp9_writer *w, + FRAME_COUNTS *counts) { // Mode vp9_write_literal(w, MIN(cm->tx_mode, ALLOW_32X32), 2); if (cm->tx_mode >= ALLOW_32X32) @@ -828,20 +832,20 @@ static void encode_txfm_probs(VP9_COMMON *cm, vp9_writer *w) { for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - tx_counts_to_branch_counts_8x8(cm->counts.tx.p8x8[i], ct_8x8p); + tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], ct_8x8p); for (j = 0; j < TX_SIZES - 3; j++) vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p8x8[i][j], ct_8x8p[j]); } for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - tx_counts_to_branch_counts_16x16(cm->counts.tx.p16x16[i], ct_16x16p); + tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], ct_16x16p); for (j = 0; j < TX_SIZES - 2; j++) vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p16x16[i][j], ct_16x16p[j]); } for (i = 0; i < TX_SIZE_CONTEXTS; i++) { - tx_counts_to_branch_counts_32x32(cm->counts.tx.p32x32[i], ct_32x32p); + tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], ct_32x32p); for (j = 0; j < TX_SIZES - 1; j++) vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p32x32[i][j], ct_32x32p[j]); @@ -858,7 +862,7 @@ static void write_interp_filter(INTERP_FILTER filter, vp9_wb_write_literal(wb, filter_to_literal[filter], 2); } -static void fix_interp_filter(VP9_COMMON *cm) { +static void fix_interp_filter(VP9_COMMON *cm, FRAME_COUNTS *counts) { if (cm->interp_filter == SWITCHABLE) { // Check to see if only one of the filters is actually used int count[SWITCHABLE_FILTERS]; @@ -866,7 +870,7 @@ static void fix_interp_filter(VP9_COMMON *cm) { for (i = 0; i < SWITCHABLE_FILTERS; ++i) { count[i] = 0; for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) - count[i] += cm->counts.switchable_interp[j][i]; + count[i] += counts->switchable_interp[j][i]; c += (count[i] > 0); } if (c == 1) { @@ -1084,6 +1088,7 @@ static void write_bitdepth_colorspace_sampling( static void write_uncompressed_header(VP9_COMP *cpi, struct vp9_write_bit_buffer *wb) { VP9_COMMON *const cm = &cpi->common; + MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; vp9_wb_write_literal(wb, VP9_FRAME_MARKER, 2); @@ -1136,7 +1141,7 @@ static void write_uncompressed_header(VP9_COMP *cpi, vp9_wb_write_bit(wb, cm->allow_high_precision_mv); - fix_interp_filter(cm); + fix_interp_filter(cm, cpi->td.counts); write_interp_filter(cm->interp_filter, wb); } } @@ -1150,15 +1155,16 @@ static void write_uncompressed_header(VP9_COMP *cpi, encode_loopfilter(&cm->lf, wb); encode_quantization(cm, wb); - encode_segmentation(cm, &cpi->mb.e_mbd, wb); + encode_segmentation(cm, xd, wb); write_tile_info(cm, wb); } static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; + MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; FRAME_CONTEXT *const fc = cm->fc; + FRAME_COUNTS *counts = cpi->td.counts; vp9_writer header_bc; vp9_start_encode(&header_bc, data); @@ -1166,26 +1172,26 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { if (xd->lossless) cm->tx_mode = ONLY_4X4; else - encode_txfm_probs(cm, &header_bc); + encode_txfm_probs(cm, &header_bc, counts); update_coef_probs(cpi, &header_bc); - update_skip_probs(cm, &header_bc); + update_skip_probs(cm, &header_bc, counts); if (!frame_is_intra_only(cm)) { int i; for (i = 0; i < INTER_MODE_CONTEXTS; ++i) prob_diff_update(vp9_inter_mode_tree, cm->fc->inter_mode_probs[i], - cm->counts.inter_mode[i], INTER_MODES, &header_bc); + counts->inter_mode[i], INTER_MODES, &header_bc); - vp9_zero(cm->counts.inter_mode); + vp9_zero(counts->inter_mode); if (cm->interp_filter == SWITCHABLE) - update_switchable_interp_probs(cm, &header_bc); + update_switchable_interp_probs(cm, &header_bc, counts); for (i = 0; i < INTRA_INTER_CONTEXTS; i++) vp9_cond_prob_diff_update(&header_bc, &fc->intra_inter_prob[i], - cm->counts.intra_inter[i]); + counts->intra_inter[i]); if (cm->allow_comp_inter_inter) { const int use_compound_pred = cm->reference_mode != SINGLE_REFERENCE; @@ -1197,33 +1203,34 @@ static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { if (use_hybrid_pred) for (i = 0; i < COMP_INTER_CONTEXTS; i++) vp9_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i], - cm->counts.comp_inter[i]); + counts->comp_inter[i]); } } if (cm->reference_mode != COMPOUND_REFERENCE) { for (i = 0; i < REF_CONTEXTS; i++) { vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][0], - cm->counts.single_ref[i][0]); + counts->single_ref[i][0]); vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][1], - cm->counts.single_ref[i][1]); + counts->single_ref[i][1]); } } if (cm->reference_mode != SINGLE_REFERENCE) for (i = 0; i < REF_CONTEXTS; i++) vp9_cond_prob_diff_update(&header_bc, &fc->comp_ref_prob[i], - cm->counts.comp_ref[i]); + counts->comp_ref[i]); for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) prob_diff_update(vp9_intra_mode_tree, cm->fc->y_mode_prob[i], - cm->counts.y_mode[i], INTRA_MODES, &header_bc); + counts->y_mode[i], INTRA_MODES, &header_bc); for (i = 0; i < PARTITION_CONTEXTS; ++i) prob_diff_update(vp9_partition_tree, fc->partition_prob[i], - cm->counts.partition[i], PARTITION_TYPES, &header_bc); + counts->partition[i], PARTITION_TYPES, &header_bc); - vp9_write_nmv_probs(cm, cm->allow_high_precision_mv, &header_bc); + vp9_write_nmv_probs(cm, cm->allow_high_precision_mv, &header_bc, + &counts->mv); } vp9_stop_encode(&header_bc); diff --git a/vp9/encoder/vp9_context_tree.c b/vp9/encoder/vp9_context_tree.c index 12acc51..f647ab3 100644 --- a/vp9/encoder/vp9_context_tree.c +++ b/vp9/encoder/vp9_context_tree.c @@ -87,7 +87,7 @@ static void free_tree_contexts(PC_TREE *tree) { // partition level. There are contexts for none, horizontal, vertical, and // split. Along with a block_size value and a selected block_size which // represents the state of our search. -void vp9_setup_pc_tree(VP9_COMMON *cm, VP9_COMP *cpi) { +void vp9_setup_pc_tree(VP9_COMMON *cm, ThreadData *td) { int i, j; const int leaf_nodes = 64; const int tree_nodes = 64 + 16 + 4 + 1; @@ -97,24 +97,24 @@ void vp9_setup_pc_tree(VP9_COMMON *cm, VP9_COMP *cpi) { int square_index = 1; int nodes; - vpx_free(cpi->leaf_tree); - CHECK_MEM_ERROR(cm, cpi->leaf_tree, vpx_calloc(leaf_nodes, - sizeof(*cpi->leaf_tree))); - vpx_free(cpi->pc_tree); - CHECK_MEM_ERROR(cm, cpi->pc_tree, vpx_calloc(tree_nodes, - sizeof(*cpi->pc_tree))); + vpx_free(td->leaf_tree); + CHECK_MEM_ERROR(cm, td->leaf_tree, vpx_calloc(leaf_nodes, + sizeof(*td->leaf_tree))); + vpx_free(td->pc_tree); + CHECK_MEM_ERROR(cm, td->pc_tree, vpx_calloc(tree_nodes, + sizeof(*td->pc_tree))); - this_pc = &cpi->pc_tree[0]; - this_leaf = &cpi->leaf_tree[0]; + this_pc = &td->pc_tree[0]; + this_leaf = &td->leaf_tree[0]; // 4x4 blocks smaller than 8x8 but in the same 8x8 block share the same // context so we only need to allocate 1 for each 8x8 block. for (i = 0; i < leaf_nodes; ++i) - alloc_mode_context(cm, 1, &cpi->leaf_tree[i]); + alloc_mode_context(cm, 1, &td->leaf_tree[i]); // Sets up all the leaf nodes in the tree. for (pc_tree_index = 0; pc_tree_index < leaf_nodes; ++pc_tree_index) { - PC_TREE *const tree = &cpi->pc_tree[pc_tree_index]; + PC_TREE *const tree = &td->pc_tree[pc_tree_index]; tree->block_size = square[0]; alloc_tree_contexts(cm, tree, 4); tree->leaf_split[0] = this_leaf++; @@ -126,7 +126,7 @@ void vp9_setup_pc_tree(VP9_COMMON *cm, VP9_COMP *cpi) { // from leafs to the root. for (nodes = 16; nodes > 0; nodes >>= 2) { for (i = 0; i < nodes; ++i) { - PC_TREE *const tree = &cpi->pc_tree[pc_tree_index]; + PC_TREE *const tree = &td->pc_tree[pc_tree_index]; alloc_tree_contexts(cm, tree, 4 << (2 * square_index)); tree->block_size = square[square_index]; for (j = 0; j < 4; j++) @@ -135,24 +135,24 @@ void vp9_setup_pc_tree(VP9_COMMON *cm, VP9_COMP *cpi) { } ++square_index; } - cpi->pc_root = &cpi->pc_tree[tree_nodes - 1]; - cpi->pc_root[0].none.best_mode_index = 2; + td->pc_root = &td->pc_tree[tree_nodes - 1]; + td->pc_root[0].none.best_mode_index = 2; } -void vp9_free_pc_tree(VP9_COMP *cpi) { +void vp9_free_pc_tree(ThreadData *td) { const int tree_nodes = 64 + 16 + 4 + 1; int i; // Set up all 4x4 mode contexts for (i = 0; i < 64; ++i) - free_mode_context(&cpi->leaf_tree[i]); + free_mode_context(&td->leaf_tree[i]); // Sets up all the leaf nodes in the tree. for (i = 0; i < tree_nodes; ++i) - free_tree_contexts(&cpi->pc_tree[i]); + free_tree_contexts(&td->pc_tree[i]); - vpx_free(cpi->pc_tree); - cpi->pc_tree = NULL; - vpx_free(cpi->leaf_tree); - cpi->leaf_tree = NULL; + vpx_free(td->pc_tree); + td->pc_tree = NULL; + vpx_free(td->leaf_tree); + td->leaf_tree = NULL; } diff --git a/vp9/encoder/vp9_context_tree.h b/vp9/encoder/vp9_context_tree.h index 1710783..70bf032 100644 --- a/vp9/encoder/vp9_context_tree.h +++ b/vp9/encoder/vp9_context_tree.h @@ -15,6 +15,7 @@ struct VP9_COMP; struct VP9Common; +struct ThreadData; // Structure to hold snapshot of coding context during the mode picking process typedef struct { @@ -79,7 +80,7 @@ typedef struct PC_TREE { }; } PC_TREE; -void vp9_setup_pc_tree(struct VP9Common *cm, struct VP9_COMP *cpi); -void vp9_free_pc_tree(struct VP9_COMP *cpi); +void vp9_setup_pc_tree(struct VP9Common *cm, struct ThreadData *td); +void vp9_free_pc_tree(struct ThreadData *td); #endif /* VP9_ENCODER_VP9_CONTEXT_TREE_H_ */ diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index f220157..8ba0656 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -49,7 +49,8 @@ #define SPLIT_MV_ZBIN_BOOST 0 #define INTRA_ZBIN_BOOST 0 -static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, +static void encode_superblock(VP9_COMP *cpi, ThreadData * td, + TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); @@ -154,10 +155,10 @@ static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi, return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); } -static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, +static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col) { - unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb.plane[0].src, + unsigned int var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64); if (var < 8) @@ -171,9 +172,10 @@ static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, } static BLOCK_SIZE get_nonrd_var_based_fixed_partition(VP9_COMP *cpi, + MACROBLOCK *x, int mi_row, int mi_col) { - unsigned int var = get_sby_perpixel_diff_variance(cpi, &cpi->mb.plane[0].src, + unsigned int var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64); if (var < 4) @@ -196,8 +198,8 @@ static INLINE void set_modeinfo_offsets(VP9_COMMON *const cm, } static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, - int mi_row, int mi_col, BLOCK_SIZE bsize) { - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x, int mi_row, int mi_col, + BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *mbmi; @@ -263,10 +265,10 @@ static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd, } static void set_block_size(VP9_COMP * const cpi, + MACROBLOCKD *const xd, int mi_row, int mi_col, BLOCK_SIZE bsize) { if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { - MACROBLOCKD *const xd = &cpi->mb.e_mbd; set_modeinfo_offsets(&cpi->common, xd, mi_row, mi_col); xd->mi[0].src_mi->mbmi.sb_type = bsize; duplicate_mode_info_in_sb(&cpi->common, xd, mi_row, mi_col, bsize); @@ -387,6 +389,7 @@ static void fill_variance_tree(void *data, BLOCK_SIZE bsize) { } static int set_vt_partitioning(VP9_COMP *cpi, + MACROBLOCKD *const xd, void *data, BLOCK_SIZE bsize, int mi_row, @@ -408,7 +411,7 @@ static int set_vt_partitioning(VP9_COMP *cpi, if (mi_col + block_width / 2 < cm->mi_cols && mi_row + block_height / 2 < cm->mi_rows && vt.part_variances->none.variance < threshold) { - set_block_size(cpi, mi_row, mi_col, bsize); + set_block_size(cpi, xd, mi_row, mi_col, bsize); return 1; } @@ -419,8 +422,8 @@ static int set_vt_partitioning(VP9_COMP *cpi, vt.part_variances->vert[0].variance < threshold && vt.part_variances->vert[1].variance < threshold) { BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT); - set_block_size(cpi, mi_row, mi_col, subsize); - set_block_size(cpi, mi_row, mi_col + block_width / 2, subsize); + set_block_size(cpi, xd, mi_row, mi_col, subsize); + set_block_size(cpi, xd, mi_row, mi_col + block_width / 2, subsize); return 1; } @@ -429,8 +432,8 @@ static int set_vt_partitioning(VP9_COMP *cpi, vt.part_variances->horz[0].variance < threshold && vt.part_variances->horz[1].variance < threshold) { BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ); - set_block_size(cpi, mi_row, mi_col, subsize); - set_block_size(cpi, mi_row + block_height / 2, mi_col, subsize); + set_block_size(cpi, xd, mi_row, mi_col, subsize); + set_block_size(cpi, xd, mi_row + block_height / 2, mi_col, subsize); return 1; } } @@ -440,7 +443,7 @@ static int set_vt_partitioning(VP9_COMP *cpi, if (mi_col + block_width / 2 < cm->mi_cols && mi_row + block_height / 2 < cm->mi_rows && vt.part_variances->none.variance < (threshold << 6)) { - set_block_size(cpi, mi_row, mi_col, bsize); + set_block_size(cpi, xd, mi_row, mi_col, bsize); return 1; } } @@ -455,10 +458,10 @@ static int set_vt_partitioning(VP9_COMP *cpi, // of smaller block sizes (i.e., < 16x16). static void choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, + MACROBLOCK *x, int mi_row, int mi_col) { VP9_COMMON * const cm = &cpi->common; - MACROBLOCK *x = &cpi->mb; - MACROBLOCKD *xd = &cpi->mb.e_mbd; + MACROBLOCKD *xd = &x->e_mbd; int i, j, k; v64x64 vt; @@ -472,7 +475,7 @@ static void choose_partitioning(VP9_COMP *cpi, vp9_clear_system_state(); vp9_zero(vt); - set_offsets(cpi, tile, mi_row, mi_col, BLOCK_64X64); + set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); @@ -564,11 +567,11 @@ static void choose_partitioning(VP9_COMP *cpi, // we get to one that's got a variance lower than our threshold, or we // hit 8x8. if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows || - !set_vt_partitioning(cpi, &vt, BLOCK_64X64, mi_row, mi_col)) { + !set_vt_partitioning(cpi, xd, &vt, BLOCK_64X64, mi_row, mi_col)) { for (i = 0; i < 4; ++i) { const int x32_idx = ((i & 1) << 2); const int y32_idx = ((i >> 1) << 2); - if (!set_vt_partitioning(cpi, &vt.split[i], BLOCK_32X32, + if (!set_vt_partitioning(cpi, xd, &vt.split[i], BLOCK_32X32, (mi_row + y32_idx), (mi_col + x32_idx))) { for (j = 0; j < 4; ++j) { const int x16_idx = ((j & 1) << 1); @@ -578,14 +581,14 @@ static void choose_partitioning(VP9_COMP *cpi, // since we do not sufficient samples for variance. // For now, 8x8 partition is only set if the variance of the 16x16 // block is very high. This is controlled in set_vt_partitioning. - if (!set_vt_partitioning(cpi, &vt.split[i].split[j], + if (!set_vt_partitioning(cpi, xd, &vt.split[i].split[j], BLOCK_16X16, mi_row + y32_idx + y16_idx, mi_col + x32_idx + x16_idx)) { for (k = 0; k < 4; ++k) { const int x8_idx = (k & 1); const int y8_idx = (k >> 1); - set_block_size(cpi, + set_block_size(cpi, xd, (mi_row + y32_idx + y16_idx + y8_idx), (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_8X8); @@ -597,13 +600,14 @@ static void choose_partitioning(VP9_COMP *cpi, } } -static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, +static void update_state(VP9_COMP *cpi, ThreadData *td, + PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, BLOCK_SIZE bsize, int output_enabled) { int i, x_idx, y; VP9_COMMON *const cm = &cpi->common; - COUNTS *const counts = cpi->frame_counts; - MACROBLOCK *const x = &cpi->mb; + RD_COUNTS *const rdc = &td->rd_counts; + MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; @@ -695,7 +699,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, if (!vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { for (i = 0; i < TX_MODES; i++) - counts->tx_select_diff[i] += ctx->tx_rd_diff[i]; + rdc->tx_select_diff[i] += ctx->tx_rd_diff[i]; } #if CONFIG_INTERNAL_STATS @@ -720,20 +724,20 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, #endif if (!frame_is_intra_only(cm)) { if (is_inter_block(mbmi)) { - vp9_update_mv_count(cm, xd); + vp9_update_mv_count(td); if (cm->interp_filter == SWITCHABLE) { const int ctx = vp9_get_pred_context_switchable_interp(xd); - ++cm->counts.switchable_interp[ctx][mbmi->interp_filter]; + ++td->counts->switchable_interp[ctx][mbmi->interp_filter]; } } - counts->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; - counts->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; - counts->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; + rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; + rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; + rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) - counts->filter_diff[i] += ctx->best_filter_diff[i]; + rdc->filter_diff[i] += ctx->best_filter_diff[i]; } for (h = 0; h < y_mis; ++h) { @@ -795,12 +799,12 @@ static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, + MACROBLOCK *const x, int mi_row, int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd) { VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; - MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *mbmi; struct macroblock_plane *const p = x->plane; @@ -813,7 +817,7 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, // Use the lower precision, but faster, 32x32 fdct for mode selection. x->use_lp32x32fdct = 1; - set_offsets(cpi, tile_info, mi_row, mi_col, bsize); + set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); mbmi = &xd->mi[0].src_mi->mbmi; mbmi->sb_type = bsize; @@ -908,7 +912,8 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, ctx->dist = rd_cost->dist; } -static void update_stats(VP9_COMMON *cm, const MACROBLOCK *x) { +static void update_stats(VP9_COMMON *cm, ThreadData *td) { + const MACROBLOCK *x = &td->mb; const MACROBLOCKD *const xd = &x->e_mbd; const MODE_INFO *const mi = xd->mi[0].src_mi; const MB_MODE_INFO *const mbmi = &mi->mbmi; @@ -917,7 +922,7 @@ static void update_stats(VP9_COMMON *cm, const MACROBLOCK *x) { const int seg_ref_active = vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME); if (!seg_ref_active) { - FRAME_COUNTS *const counts = &cm->counts; + FRAME_COUNTS *const counts = td->counts; const int inter_block = is_inter_block(mbmi); counts->intra_inter[vp9_get_intra_inter_context(xd)][inter_block]++; @@ -947,12 +952,11 @@ static void update_stats(VP9_COMMON *cm, const MACROBLOCK *x) { } } -static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col, +static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col, ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], BLOCK_SIZE bsize) { - MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; int p; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; @@ -978,12 +982,11 @@ static void restore_context(VP9_COMP *cpi, int mi_row, int mi_col, sizeof(xd->left_seg_context[0]) * mi_height); } -static void save_context(VP9_COMP *cpi, int mi_row, int mi_col, +static void save_context(MACROBLOCK *const x, int mi_row, int mi_col, ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], BLOCK_SIZE bsize) { - const MACROBLOCK *const x = &cpi->mb; const MACROBLOCKD *const xd = &x->e_mbd; int p; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; @@ -1012,27 +1015,30 @@ static void save_context(VP9_COMP *cpi, int mi_row, int mi_col, } static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, + ThreadData *td, TOKENEXTRA **tp, int mi_row, int mi_col, int output_enabled, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { - set_offsets(cpi, tile, mi_row, mi_col, bsize); - update_state(cpi, ctx, mi_row, mi_col, bsize, output_enabled); - encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize, ctx); + MACROBLOCK *const x = &td->mb; + set_offsets(cpi, tile, x, mi_row, mi_col, bsize); + update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled); + encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); if (output_enabled) { - update_stats(&cpi->common, &cpi->mb); + update_stats(&cpi->common, td); (*tp)->token = EOSB_TOKEN; (*tp)++; } } -static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, +static void encode_sb(VP9_COMP *cpi, ThreadData *td, + const TileInfo *const tile, TOKENEXTRA **tp, int mi_row, int mi_col, int output_enabled, BLOCK_SIZE bsize, PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; @@ -1053,41 +1059,41 @@ static void encode_sb(VP9_COMP *cpi, const TileInfo *const tile, partition = partition_lookup[bsl][subsize]; if (output_enabled && bsize != BLOCK_4X4) - cm->counts.partition[ctx][partition]++; + td->counts->partition[ctx][partition]++; switch (partition) { case PARTITION_NONE: - encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize, + encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, &pc_tree->none); break; case PARTITION_VERT: - encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize, + encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, &pc_tree->vertical[0]); if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { - encode_b(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled, subsize, - &pc_tree->vertical[1]); + encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled, + subsize, &pc_tree->vertical[1]); } break; case PARTITION_HORZ: - encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize, + encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, &pc_tree->horizontal[0]); if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { - encode_b(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled, subsize, - &pc_tree->horizontal[1]); + encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled, + subsize, &pc_tree->horizontal[1]); } break; case PARTITION_SPLIT: if (bsize == BLOCK_8X8) { - encode_b(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize, + encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, pc_tree->leaf_split[0]); } else { - encode_sb(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize, + encode_sb(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, pc_tree->split[0]); - encode_sb(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled, subsize, - pc_tree->split[1]); - encode_sb(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled, subsize, - pc_tree->split[2]); - encode_sb(cpi, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled, + encode_sb(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, + subsize, pc_tree->split[1]); + encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, + subsize, pc_tree->split[2]); + encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled, subsize, pc_tree->split[3]); } break; @@ -1188,10 +1194,10 @@ const struct { static void set_source_var_based_partition(VP9_COMP *cpi, const TileInfo *const tile, + MACROBLOCK *const x, MODE_INFO *mi_8x8, int mi_row, int mi_col) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; const int mis = cm->mi_stride; const int row8x8_remaining = tile->mi_row_end - mi_row; const int col8x8_remaining = tile->mi_col_end - mi_col; @@ -1272,10 +1278,11 @@ static void set_source_var_based_partition(VP9_COMP *cpi, } } -static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, +static void update_state_rt(VP9_COMP *cpi, ThreadData *td, + PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, int bsize) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0].src_mi; MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; @@ -1308,11 +1315,11 @@ static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, } if (is_inter_block(mbmi)) { - vp9_update_mv_count(cm, xd); + vp9_update_mv_count(td); if (cm->interp_filter == SWITCHABLE) { const int pred_ctx = vp9_get_pred_context_switchable_interp(xd); - ++cm->counts.switchable_interp[pred_ctx][mbmi->interp_filter]; + ++td->counts->switchable_interp[pred_ctx][mbmi->interp_filter]; } } @@ -1331,33 +1338,36 @@ static void update_state_rt(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, x->skip_txfm[0] = mbmi->segment_id ? 0 : ctx->skip_txfm[0]; } -static void encode_b_rt(VP9_COMP *cpi, const TileInfo *const tile, +static void encode_b_rt(VP9_COMP *cpi, ThreadData *td, + const TileInfo *const tile, TOKENEXTRA **tp, int mi_row, int mi_col, - int output_enabled, BLOCK_SIZE bsize, - PICK_MODE_CONTEXT *ctx) { - set_offsets(cpi, tile, mi_row, mi_col, bsize); - update_state_rt(cpi, ctx, mi_row, mi_col, bsize); + int output_enabled, BLOCK_SIZE bsize, + PICK_MODE_CONTEXT *ctx) { + MACROBLOCK *const x = &td->mb; + set_offsets(cpi, tile, x, mi_row, mi_col, bsize); + update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize); #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && output_enabled) { - vp9_denoiser_denoise(&cpi->denoiser, &cpi->mb, mi_row, mi_col, + vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col, MAX(BLOCK_8X8, bsize), ctx); } #endif - encode_superblock(cpi, tp, output_enabled, mi_row, mi_col, bsize, ctx); - update_stats(&cpi->common, &cpi->mb); + encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); + update_stats(&cpi->common, td); (*tp)->token = EOSB_TOKEN; (*tp)++; } -static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile, +static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td, + const TileInfo *const tile, TOKENEXTRA **tp, int mi_row, int mi_col, int output_enabled, BLOCK_SIZE bsize, PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; @@ -1380,39 +1390,39 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile, partition = partition_lookup[bsl][subsize]; if (output_enabled && bsize != BLOCK_4X4) - cm->counts.partition[ctx][partition]++; + td->counts->partition[ctx][partition]++; switch (partition) { case PARTITION_NONE: - encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize, + encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, &pc_tree->none); break; case PARTITION_VERT: - encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize, + encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, &pc_tree->vertical[0]); if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { - encode_b_rt(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled, + encode_b_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, subsize, &pc_tree->vertical[1]); } break; case PARTITION_HORZ: - encode_b_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize, + encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, &pc_tree->horizontal[0]); if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { - encode_b_rt(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled, + encode_b_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, subsize, &pc_tree->horizontal[1]); } break; case PARTITION_SPLIT: subsize = get_subsize(bsize, PARTITION_SPLIT); - encode_sb_rt(cpi, tile, tp, mi_row, mi_col, output_enabled, subsize, + encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, pc_tree->split[0]); - encode_sb_rt(cpi, tile, tp, mi_row, mi_col + hbs, output_enabled, + encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, subsize, pc_tree->split[1]); - encode_sb_rt(cpi, tile, tp, mi_row + hbs, mi_col, output_enabled, + encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, subsize, pc_tree->split[2]); - encode_sb_rt(cpi, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled, - subsize, pc_tree->split[3]); + encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, + output_enabled, subsize, pc_tree->split[3]); break; default: assert("Invalid partition type."); @@ -1424,6 +1434,7 @@ static void encode_sb_rt(VP9_COMP *cpi, const TileInfo *const tile, } static void rd_use_partition(VP9_COMP *cpi, + ThreadData *td, TileDataEnc *tile_data, MODE_INFO *mi_8x8, TOKENEXTRA **tp, int mi_row, int mi_col, @@ -1432,7 +1443,7 @@ static void rd_use_partition(VP9_COMP *cpi, int do_recon, PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; const int mis = cm->mi_stride; const int bsl = b_width_log2_lookup[bsize]; @@ -1464,10 +1475,10 @@ static void rd_use_partition(VP9_COMP *cpi, subsize = get_subsize(bsize, partition); pc_tree->partitioning = partition; - save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode) { - set_offsets(cpi, tile_info, mi_row, mi_col, bsize); + set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); x->mb_energy = vp9_block_energy(cpi, x, bsize); } @@ -1493,7 +1504,7 @@ static void rd_use_partition(VP9_COMP *cpi, mi_row + (mi_step >> 1) < cm->mi_rows && mi_col + (mi_step >> 1) < cm->mi_cols) { pc_tree->partitioning = PARTITION_NONE; - rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &none_rdc, bsize, + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize, ctx, INT64_MAX); pl = partition_plane_context(xd, mi_row, mi_col, bsize); @@ -1504,7 +1515,7 @@ static void rd_use_partition(VP9_COMP *cpi, none_rdc.dist); } - restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); mi_8x8[0].src_mi->mbmi.sb_type = bs_type; pc_tree->partitioning = partition; } @@ -1512,11 +1523,11 @@ static void rd_use_partition(VP9_COMP *cpi, switch (partition) { case PARTITION_NONE: - rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &last_part_rdc, + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, bsize, ctx, INT64_MAX); break; case PARTITION_HORZ: - rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &last_part_rdc, + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, subsize, &pc_tree->horizontal[0], INT64_MAX); if (last_part_rdc.rate != INT_MAX && @@ -1524,9 +1535,9 @@ static void rd_use_partition(VP9_COMP *cpi, RD_COST tmp_rdc; PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; vp9_rd_cost_init(&tmp_rdc); - update_state(cpi, ctx, mi_row, mi_col, subsize, 0); - encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx); - rd_pick_sb_modes(cpi, tile_data, + update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); + encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); + rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col, &tmp_rdc, subsize, &pc_tree->horizontal[1], INT64_MAX); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { @@ -1539,16 +1550,16 @@ static void rd_use_partition(VP9_COMP *cpi, } break; case PARTITION_VERT: - rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &last_part_rdc, + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, subsize, &pc_tree->vertical[0], INT64_MAX); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) { RD_COST tmp_rdc; PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0]; vp9_rd_cost_init(&tmp_rdc); - update_state(cpi, ctx, mi_row, mi_col, subsize, 0); - encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx); - rd_pick_sb_modes(cpi, tile_data, + update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); + encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1), &tmp_rdc, subsize, &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX); @@ -1563,7 +1574,7 @@ static void rd_use_partition(VP9_COMP *cpi, break; case PARTITION_SPLIT: if (bsize == BLOCK_8X8) { - rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &last_part_rdc, + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, subsize, pc_tree->leaf_split[0], INT64_MAX); break; } @@ -1579,7 +1590,7 @@ static void rd_use_partition(VP9_COMP *cpi, continue; vp9_rd_cost_init(&tmp_rdc); - rd_use_partition(cpi, tile_data, + rd_use_partition(cpi, td, tile_data, mi_8x8 + jj * bss * mis + ii * bss, tp, mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate, &tmp_rdc.dist, @@ -1615,7 +1626,7 @@ static void rd_use_partition(VP9_COMP *cpi, BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); chosen_rdc.rate = 0; chosen_rdc.dist = 0; - restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); pc_tree->partitioning = PARTITION_SPLIT; // Split partition. @@ -1629,13 +1640,13 @@ static void rd_use_partition(VP9_COMP *cpi, if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) continue; - save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); pc_tree->split[i]->partitioning = PARTITION_NONE; - rd_pick_sb_modes(cpi, tile_data, + rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, &tmp_rdc, split_subsize, &pc_tree->split[i]->none, INT64_MAX); - restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { vp9_rd_cost_reset(&chosen_rdc); @@ -1646,7 +1657,7 @@ static void rd_use_partition(VP9_COMP *cpi, chosen_rdc.dist += tmp_rdc.dist; if (i != 3) - encode_sb(cpi, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0, + encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0, split_subsize, pc_tree->split[i]); pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx, @@ -1675,7 +1686,7 @@ static void rd_use_partition(VP9_COMP *cpi, chosen_rdc = none_rdc; } - restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); // We must have chosen a partitioning and encoding or we'll fail later on. // No other opportunities for success. @@ -1689,10 +1700,10 @@ static void rd_use_partition(VP9_COMP *cpi, // and and if necessary apply a Q delta using segmentation to get // closer to the target. if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) { - vp9_select_in_frame_q_segment(cpi, bsize, mi_row, mi_col, + vp9_select_in_frame_q_segment(cpi, x, bsize, mi_row, mi_col, output_enabled, chosen_rdc.rate); } - encode_sb(cpi, tile_info, tp, mi_row, mi_col, output_enabled, bsize, + encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, pc_tree); } @@ -1757,11 +1768,11 @@ static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { // Look at neighboring blocks and set a min and max partition size based on // what they chose. static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, + MACROBLOCKD *const xd, int mi_row, int mi_col, BLOCK_SIZE *min_block_size, BLOCK_SIZE *max_block_size) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; MODE_INFO *mi = xd->mi[0].src_mi; const int left_in_image = xd->left_available && mi[-1].src_mi; const int above_in_image = xd->up_available && mi[-xd->mi_stride].src_mi; @@ -1853,11 +1864,11 @@ static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, } static void auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, + MACROBLOCKD *const xd, int mi_row, int mi_col, BLOCK_SIZE *min_block_size, BLOCK_SIZE *max_block_size) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; MODE_INFO *mi_8x8 = xd->mi; const int left_in_image = xd->left_available && mi_8x8[-1].src_mi; const int above_in_image = xd->up_available && @@ -2025,14 +2036,14 @@ static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv, // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are // unlikely to be selected depending on previous rate-distortion optimization // results, for encoding speed-up. -static void rd_pick_partition(VP9_COMP *cpi, +static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, RD_COST *rd_cost, int64_t best_rd, PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; @@ -2074,7 +2085,7 @@ static void rd_pick_partition(VP9_COMP *cpi, vp9_rd_cost_reset(&best_rdc); best_rdc.rdcost = best_rd; - set_offsets(cpi, tile_info, mi_row, mi_col, bsize); + set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode) x->mb_energy = vp9_block_energy(cpi, x, bsize); @@ -2102,12 +2113,12 @@ static void rd_pick_partition(VP9_COMP *cpi, partition_vert_allowed &= force_vert_split; } - save_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { - set_offsets(cpi, tile_info, mi_row, mi_col, bsize); - src_diff_var = get_sby_perpixel_diff_variance(cpi, &cpi->mb.plane[0].src, + set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); + src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row, mi_col, bsize); } #endif @@ -2165,7 +2176,7 @@ static void rd_pick_partition(VP9_COMP *cpi, // PARTITION_NONE if (partition_none_allowed) { - rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx, best_rdc.rdcost); if (this_rdc.rate != INT_MAX) { if (bsize >= BLOCK_8X8) { @@ -2235,9 +2246,9 @@ static void rd_pick_partition(VP9_COMP *cpi, } if (skip) { if (src_diff_var == UINT_MAX) { - set_offsets(cpi, tile_info, mi_row, mi_col, bsize); + set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); src_diff_var = get_sby_perpixel_diff_variance( - cpi, &cpi->mb.plane[0].src, mi_row, mi_col, bsize); + cpi, &x->plane[0].src, mi_row, mi_col, bsize); } if (src_diff_var < 8) { do_split = 0; @@ -2248,7 +2259,7 @@ static void rd_pick_partition(VP9_COMP *cpi, #endif } } - restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); } // store estimated motion vector @@ -2265,7 +2276,7 @@ static void rd_pick_partition(VP9_COMP *cpi, if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed) pc_tree->leaf_split[0]->pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize, + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, pc_tree->leaf_split[0], best_rdc.rdcost); if (sum_rdc.rate == INT_MAX) sum_rdc.rdcost = INT64_MAX; @@ -2281,7 +2292,7 @@ static void rd_pick_partition(VP9_COMP *cpi, load_pred_mv(x, ctx); pc_tree->split[i]->index = i; - rd_pick_partition(cpi, tile_data, tp, + rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize, &this_rdc, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]); @@ -2313,7 +2324,7 @@ static void rd_pick_partition(VP9_COMP *cpi, if (cpi->sf.less_rectangular_check) do_rect &= !partition_none_allowed; } - restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); } // PARTITION_HORZ @@ -2325,14 +2336,14 @@ static void rd_pick_partition(VP9_COMP *cpi, partition_none_allowed) pc_tree->horizontal[0].pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize, + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->horizontal[0], best_rdc.rdcost); if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows && bsize > BLOCK_8X8) { PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; - update_state(cpi, ctx, mi_row, mi_col, subsize, 0); - encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, ctx); + update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); + encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx); @@ -2340,7 +2351,7 @@ static void rd_pick_partition(VP9_COMP *cpi, partition_none_allowed) pc_tree->horizontal[1].pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile_data, mi_row + mi_step, mi_col, + rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc, subsize, &pc_tree->horizontal[1], best_rdc.rdcost - sum_rdc.rdcost); if (this_rdc.rate == INT_MAX) { @@ -2361,7 +2372,7 @@ static void rd_pick_partition(VP9_COMP *cpi, pc_tree->partitioning = PARTITION_HORZ; } } - restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); } // PARTITION_VERT if (partition_vert_allowed && do_rect) { @@ -2373,12 +2384,12 @@ static void rd_pick_partition(VP9_COMP *cpi, partition_none_allowed) pc_tree->vertical[0].pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize, + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->vertical[0], best_rdc.rdcost); if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols && bsize > BLOCK_8X8) { - update_state(cpi, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0); - encode_superblock(cpi, tp, 0, mi_row, mi_col, subsize, + update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0); + encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, &pc_tree->vertical[0]); if (cpi->sf.adaptive_motion_search) @@ -2387,7 +2398,7 @@ static void rd_pick_partition(VP9_COMP *cpi, partition_none_allowed) pc_tree->vertical[1].pred_interp_filter = ctx->mic.mbmi.interp_filter; - rd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + mi_step, + rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc, subsize, &pc_tree->vertical[1], best_rdc.rdcost - sum_rdc.rdcost); if (this_rdc.rate == INT_MAX) { @@ -2409,7 +2420,7 @@ static void rd_pick_partition(VP9_COMP *cpi, pc_tree->partitioning = PARTITION_VERT; } } - restore_context(cpi, mi_row, mi_col, a, l, sa, sl, bsize); + restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); } // TODO(jbb): This code added so that we avoid static analysis @@ -2428,9 +2439,9 @@ static void rd_pick_partition(VP9_COMP *cpi, // and and if necessary apply a Q delta using segmentation to get // closer to the target. if ((cpi->oxcf.aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) - vp9_select_in_frame_q_segment(cpi, bsize, mi_row, mi_col, output_enabled, - best_rdc.rate); - encode_sb(cpi, tile_info, tp, mi_row, mi_col, output_enabled, + vp9_select_in_frame_q_segment(cpi, x, bsize, mi_row, mi_col, + output_enabled, best_rdc.rate); + encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, pc_tree); } @@ -2444,12 +2455,13 @@ static void rd_pick_partition(VP9_COMP *cpi, } static void encode_rd_sb_row(VP9_COMP *cpi, + ThreadData *td, TileDataEnc *tile_data, int mi_row, TOKENEXTRA **tp) { VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; SPEED_FEATURES *const sf = &cpi->sf; int mi_col; @@ -2471,54 +2483,54 @@ static void encode_rd_sb_row(VP9_COMP *cpi, if (sf->adaptive_pred_interp_filter) { for (i = 0; i < 64; ++i) - cpi->leaf_tree[i].pred_interp_filter = SWITCHABLE; + td->leaf_tree[i].pred_interp_filter = SWITCHABLE; for (i = 0; i < 64; ++i) { - cpi->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE; - cpi->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE; - cpi->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE; - cpi->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE; + td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE; + td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE; + td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE; + td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE; } } - vp9_zero(cpi->mb.pred_mv); - cpi->pc_root->index = 0; + vp9_zero(x->pred_mv); + td->pc_root->index = 0; - cpi->mb.source_variance = UINT_MAX; + x->source_variance = UINT_MAX; if (sf->partition_search_type == FIXED_PARTITION) { - set_offsets(cpi, tile_info, mi_row, mi_col, BLOCK_64X64); + set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, sf->always_this_block_size); - rd_use_partition(cpi, tile_data, mi, tp, mi_row, mi_col, - BLOCK_64X64, &dummy_rate, &dummy_dist, 1, cpi->pc_root); + rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, + BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root); } else if (cpi->partition_search_skippable_frame) { BLOCK_SIZE bsize; - set_offsets(cpi, tile_info, mi_row, mi_col, BLOCK_64X64); - bsize = get_rd_var_based_fixed_partition(cpi, mi_row, mi_col); + set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); + bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col); set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); - rd_use_partition(cpi, tile_data, mi, tp, mi_row, mi_col, - BLOCK_64X64, &dummy_rate, &dummy_dist, 1, cpi->pc_root); + rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, + BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root); } else if (sf->partition_search_type == VAR_BASED_PARTITION && cm->frame_type != KEY_FRAME ) { - choose_partitioning(cpi, tile_info, mi_row, mi_col); - rd_use_partition(cpi, tile_data, mi, tp, mi_row, mi_col, - BLOCK_64X64, &dummy_rate, &dummy_dist, 1, cpi->pc_root); + choose_partitioning(cpi, tile_info, x, mi_row, mi_col); + rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, + BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root); } else { // If required set upper and lower partition size limits if (sf->auto_min_max_partition_size) { - set_offsets(cpi, tile_info, mi_row, mi_col, BLOCK_64X64); - rd_auto_partition_range(cpi, tile_info, mi_row, mi_col, + set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); + rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col, &x->min_partition_size, &x->max_partition_size); } - rd_pick_partition(cpi, tile_data, tp, mi_row, mi_col, BLOCK_64X64, - &dummy_rdc, INT64_MAX, cpi->pc_root); + rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64, + &dummy_rdc, INT64_MAX, td->pc_root); } } } static void init_encode_frame_mb_context(VP9_COMP *cpi) { - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &cpi->td.mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); @@ -2572,8 +2584,8 @@ static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) { return LAST_FRAME; } -static TX_MODE select_tx_mode(const VP9_COMP *cpi) { - if (cpi->mb.e_mbd.lossless) +static TX_MODE select_tx_mode(const VP9_COMP *cpi, MACROBLOCKD *const xd) { + if (xd->lossless) return ONLY_4X4; if (cpi->sf.tx_size_search_method == USE_LARGESTALL) return ALLOW_32X32; @@ -2585,15 +2597,14 @@ static TX_MODE select_tx_mode(const VP9_COMP *cpi) { } static void nonrd_pick_sb_modes(VP9_COMP *cpi, - TileDataEnc *tile_data, + TileDataEnc *tile_data, MACROBLOCK *const x, int mi_row, int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; - MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *mbmi; - set_offsets(cpi, tile_info, mi_row, mi_col, bsize); + set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); mbmi = &xd->mi[0].src_mi->mbmi; mbmi->sb_type = bsize; @@ -2691,7 +2702,7 @@ static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) { } } -static void nonrd_pick_partition(VP9_COMP *cpi, +static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, RD_COST *rd_cost, @@ -2701,7 +2712,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, const VP9EncoderConfig *const oxcf = &cpi->oxcf; VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; const int ms = num_8x8_blocks_wide_lookup[bsize] / 2; TOKENEXTRA *tp_orig = *tp; @@ -2755,7 +2766,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, // PARTITION_NONE if (partition_none_allowed) { - nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, + nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx); ctx->mic.mbmi = xd->mi[0].src_mi->mbmi; ctx->skip_txfm[0] = x->skip_txfm[0]; @@ -2805,7 +2816,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) continue; load_pred_mv(x, ctx); - nonrd_pick_partition(cpi, tile_data, tp, + nonrd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize, &this_rdc, 0, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]); @@ -2836,7 +2847,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, if (sf->adaptive_motion_search) load_pred_mv(x, ctx); pc_tree->horizontal[0].pred_pixel_ready = 1; - nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize, + nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi; @@ -2846,7 +2857,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) { load_pred_mv(x, ctx); pc_tree->horizontal[1].pred_pixel_ready = 1; - nonrd_pick_sb_modes(cpi, tile_data, mi_row + ms, mi_col, + nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + ms, mi_col, &this_rdc, subsize, &pc_tree->horizontal[1]); @@ -2880,7 +2891,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, if (sf->adaptive_motion_search) load_pred_mv(x, ctx); pc_tree->vertical[0].pred_pixel_ready = 1; - nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, &sum_rdc, subsize, + nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->vertical[0]); pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; @@ -2889,7 +2900,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) { load_pred_mv(x, ctx); pc_tree->vertical[1].pred_pixel_ready = 1; - nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + ms, + nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + ms, &this_rdc, subsize, &pc_tree->vertical[1]); pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi; @@ -2935,10 +2946,10 @@ static void nonrd_pick_partition(VP9_COMP *cpi, // and and if necessary apply a Q delta using segmentation to get // closer to the target. if ((oxcf->aq_mode == COMPLEXITY_AQ) && cm->seg.update_map) { - vp9_select_in_frame_q_segment(cpi, bsize, mi_row, mi_col, output_enabled, - best_rdc.rate); + vp9_select_in_frame_q_segment(cpi, x, bsize, mi_row, mi_col, + output_enabled, best_rdc.rate); } - encode_sb_rt(cpi, tile_info, tp, mi_row, mi_col, output_enabled, + encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, pc_tree); } @@ -2952,6 +2963,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, } static void nonrd_select_partition(VP9_COMP *cpi, + ThreadData *td, TileDataEnc *tile_data, MODE_INFO *mi, TOKENEXTRA **tp, @@ -2960,7 +2972,7 @@ static void nonrd_select_partition(VP9_COMP *cpi, RD_COST *rd_cost, PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; const int mis = cm->mi_stride; @@ -2979,18 +2991,18 @@ static void nonrd_select_partition(VP9_COMP *cpi, subsize >= BLOCK_16X16) { x->max_partition_size = BLOCK_32X32; x->min_partition_size = BLOCK_8X8; - nonrd_pick_partition(cpi, tile_data, tp, mi_row, mi_col, bsize, + nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost, 0, INT64_MAX, pc_tree); } else if (bsize == BLOCK_16X16 && partition != PARTITION_NONE) { x->max_partition_size = BLOCK_16X16; x->min_partition_size = BLOCK_8X8; - nonrd_pick_partition(cpi, tile_data, tp, mi_row, mi_col, bsize, + nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost, 0, INT64_MAX, pc_tree); } else { switch (partition) { case PARTITION_NONE: pc_tree->none.pred_pixel_ready = 1; - nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, + nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, &pc_tree->none); pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; @@ -2998,14 +3010,14 @@ static void nonrd_select_partition(VP9_COMP *cpi, break; case PARTITION_VERT: pc_tree->vertical[0].pred_pixel_ready = 1; - nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, + nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, &pc_tree->vertical[0]); pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; if (mi_col + hbs < cm->mi_cols) { pc_tree->vertical[1].pred_pixel_ready = 1; - nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + hbs, + nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &this_rdc, subsize, &pc_tree->vertical[1]); pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; @@ -3019,14 +3031,14 @@ static void nonrd_select_partition(VP9_COMP *cpi, break; case PARTITION_HORZ: pc_tree->horizontal[0].pred_pixel_ready = 1; - nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, + nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; if (mi_row + hbs < cm->mi_rows) { pc_tree->horizontal[1].pred_pixel_ready = 1; - nonrd_pick_sb_modes(cpi, tile_data, mi_row + hbs, mi_col, + nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &this_rdc, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; @@ -3040,10 +3052,10 @@ static void nonrd_select_partition(VP9_COMP *cpi, break; case PARTITION_SPLIT: subsize = get_subsize(bsize, PARTITION_SPLIT); - nonrd_select_partition(cpi, tile_data, mi, tp, mi_row, mi_col, + nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, subsize, output_enabled, rd_cost, pc_tree->split[0]); - nonrd_select_partition(cpi, tile_data, mi + hbs, tp, + nonrd_select_partition(cpi, td, tile_data, mi + hbs, tp, mi_row, mi_col + hbs, subsize, output_enabled, &this_rdc, pc_tree->split[1]); if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && @@ -3051,7 +3063,7 @@ static void nonrd_select_partition(VP9_COMP *cpi, rd_cost->rate += this_rdc.rate; rd_cost->dist += this_rdc.dist; } - nonrd_select_partition(cpi, tile_data, mi + hbs * mis, tp, + nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis, tp, mi_row + hbs, mi_col, subsize, output_enabled, &this_rdc, pc_tree->split[2]); if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && @@ -3059,7 +3071,7 @@ static void nonrd_select_partition(VP9_COMP *cpi, rd_cost->rate += this_rdc.rate; rd_cost->dist += this_rdc.dist; } - nonrd_select_partition(cpi, tile_data, mi + hbs * mis + hbs, tp, + nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp, mi_row + hbs, mi_col + hbs, subsize, output_enabled, &this_rdc, pc_tree->split[3]); if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && @@ -3075,11 +3087,12 @@ static void nonrd_select_partition(VP9_COMP *cpi, } if (bsize == BLOCK_64X64 && output_enabled) - encode_sb_rt(cpi, tile_info, tp, mi_row, mi_col, 1, bsize, pc_tree); + encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, 1, bsize, pc_tree); } static void nonrd_use_partition(VP9_COMP *cpi, + ThreadData *td, TileDataEnc *tile_data, MODE_INFO *mi, TOKENEXTRA **tp, @@ -3088,7 +3101,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, RD_COST *rd_cost, PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; TileInfo *tile_info = &tile_data->tile_info; - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; const int mis = cm->mi_stride; @@ -3105,37 +3118,37 @@ static void nonrd_use_partition(VP9_COMP *cpi, if (output_enabled && bsize != BLOCK_4X4) { int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); - cm->counts.partition[ctx][partition]++; + td->counts->partition[ctx][partition]++; } switch (partition) { case PARTITION_NONE: pc_tree->none.pred_pixel_ready = 1; - nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, + nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, &pc_tree->none); pc_tree->none.mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; pc_tree->none.skip = x->skip; - encode_b_rt(cpi, tile_info, tp, mi_row, mi_col, output_enabled, subsize, - &pc_tree->none); + encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, + subsize, &pc_tree->none); break; case PARTITION_VERT: pc_tree->vertical[0].pred_pixel_ready = 1; - nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, + nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, &pc_tree->vertical[0]); pc_tree->vertical[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; - encode_b_rt(cpi, tile_info, tp, mi_row, mi_col, output_enabled, subsize, - &pc_tree->vertical[0]); + encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, + subsize, &pc_tree->vertical[0]); if (mi_col + hbs < cm->mi_cols) { pc_tree->vertical[1].pred_pixel_ready = 1; - nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col + hbs, + nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &this_rdc, subsize, &pc_tree->vertical[1]); pc_tree->vertical[1].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[1].skip = x->skip; - encode_b_rt(cpi, tile_info, tp, mi_row, mi_col + hbs, + encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col + hbs, output_enabled, subsize, &pc_tree->vertical[1]); if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && @@ -3147,22 +3160,22 @@ static void nonrd_use_partition(VP9_COMP *cpi, break; case PARTITION_HORZ: pc_tree->horizontal[0].pred_pixel_ready = 1; - nonrd_pick_sb_modes(cpi, tile_data, mi_row, mi_col, rd_cost, + nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[0].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; - encode_b_rt(cpi, tile_info, tp, mi_row, mi_col, output_enabled, subsize, - &pc_tree->horizontal[0]); + encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, + subsize, &pc_tree->horizontal[0]); if (mi_row + hbs < cm->mi_rows) { pc_tree->horizontal[1].pred_pixel_ready = 1; - nonrd_pick_sb_modes(cpi, tile_data, mi_row + hbs, mi_col, + nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &this_rdc, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[1].mic.mbmi = xd->mi[0].src_mi->mbmi; pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[1].skip = x->skip; - encode_b_rt(cpi, tile_info, tp, mi_row + hbs, mi_col, + encode_b_rt(cpi, td, tile_info, tp, mi_row + hbs, mi_col, output_enabled, subsize, &pc_tree->horizontal[1]); if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && @@ -3174,10 +3187,10 @@ static void nonrd_use_partition(VP9_COMP *cpi, break; case PARTITION_SPLIT: subsize = get_subsize(bsize, PARTITION_SPLIT); - nonrd_use_partition(cpi, tile_data, mi, tp, mi_row, mi_col, + nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, subsize, output_enabled, rd_cost, pc_tree->split[0]); - nonrd_use_partition(cpi, tile_data, mi + hbs, tp, + nonrd_use_partition(cpi, td, tile_data, mi + hbs, tp, mi_row, mi_col + hbs, subsize, output_enabled, &this_rdc, pc_tree->split[1]); if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && @@ -3185,7 +3198,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, rd_cost->rate += this_rdc.rate; rd_cost->dist += this_rdc.dist; } - nonrd_use_partition(cpi, tile_data, mi + hbs * mis, tp, + nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis, tp, mi_row + hbs, mi_col, subsize, output_enabled, &this_rdc, pc_tree->split[2]); if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && @@ -3193,7 +3206,7 @@ static void nonrd_use_partition(VP9_COMP *cpi, rd_cost->rate += this_rdc.rate; rd_cost->dist += this_rdc.dist; } - nonrd_use_partition(cpi, tile_data, mi + hbs * mis + hbs, tp, + nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp, mi_row + hbs, mi_col + hbs, subsize, output_enabled, &this_rdc, pc_tree->split[3]); if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && @@ -3212,13 +3225,14 @@ static void nonrd_use_partition(VP9_COMP *cpi, } static void encode_nonrd_sb_row(VP9_COMP *cpi, + ThreadData *td, TileDataEnc *tile_data, int mi_row, TOKENEXTRA **tp) { SPEED_FEATURES *const sf = &cpi->sf; VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; int mi_col; @@ -3240,37 +3254,37 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, // Set the partition type of the 64X64 block switch (sf->partition_search_type) { case VAR_BASED_PARTITION: - choose_partitioning(cpi, tile_info, mi_row, mi_col); - nonrd_use_partition(cpi, tile_data, mi, tp, mi_row, mi_col, - BLOCK_64X64, 1, &dummy_rdc, cpi->pc_root); + choose_partitioning(cpi, tile_info, x, mi_row, mi_col); + nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, + BLOCK_64X64, 1, &dummy_rdc, td->pc_root); break; case SOURCE_VAR_BASED_PARTITION: - set_source_var_based_partition(cpi, tile_info, mi, mi_row, mi_col); - nonrd_use_partition(cpi, tile_data, mi, tp, mi_row, mi_col, - BLOCK_64X64, 1, &dummy_rdc, cpi->pc_root); + set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col); + nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, + BLOCK_64X64, 1, &dummy_rdc, td->pc_root); break; case FIXED_PARTITION: bsize = sf->partition_search_type == FIXED_PARTITION ? sf->always_this_block_size : - get_nonrd_var_based_fixed_partition(cpi, mi_row, mi_col); + get_nonrd_var_based_fixed_partition(cpi, x, mi_row, mi_col); set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); - nonrd_use_partition(cpi, tile_data, mi, tp, mi_row, mi_col, - BLOCK_64X64, 1, &dummy_rdc, cpi->pc_root); + nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, + BLOCK_64X64, 1, &dummy_rdc, td->pc_root); break; case REFERENCE_PARTITION: - set_offsets(cpi, tile_info, mi_row, mi_col, BLOCK_64X64); + set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && xd->mi[0].src_mi->mbmi.segment_id) { - auto_partition_range(cpi, tile_info, mi_row, mi_col, + auto_partition_range(cpi, tile_info, xd, mi_row, mi_col, &x->min_partition_size, &x->max_partition_size); - nonrd_pick_partition(cpi, tile_data, tp, mi_row, mi_col, + nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rdc, 1, - INT64_MAX, cpi->pc_root); + INT64_MAX, td->pc_root); } else { - choose_partitioning(cpi, tile_info, mi_row, mi_col); - nonrd_select_partition(cpi, tile_data, mi, tp, mi_row, mi_col, - BLOCK_64X64, 1, &dummy_rdc, cpi->pc_root); + choose_partitioning(cpi, tile_info, x, mi_row, mi_col); + nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, + BLOCK_64X64, 1, &dummy_rdc, td->pc_root); } break; @@ -3394,13 +3408,13 @@ static void source_var_based_partition_search_method(VP9_COMP *cpi) { } } -static int get_skip_encode_frame(const VP9_COMMON *cm) { +static int get_skip_encode_frame(const VP9_COMMON *cm, ThreadData *const td) { unsigned int intra_count = 0, inter_count = 0; int j; for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) { - intra_count += cm->counts.intra_inter[j][0]; - inter_count += cm->counts.intra_inter[j][1]; + intra_count += td->counts->intra_inter[j][0]; + inter_count += td->counts->intra_inter[j][1]; } return (intra_count << 2) < inter_count && @@ -3459,9 +3473,11 @@ static void encode_tiles(VP9_COMP *cpi) { for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end; mi_row += MI_BLOCK_SIZE) { if (cpi->sf.use_nonrd_pick_mode) - encode_nonrd_sb_row(cpi, this_tile, mi_row, &tok[tile_row][tile_col]); + encode_nonrd_sb_row(cpi, &cpi->td, this_tile, mi_row, + &tok[tile_row][tile_col]); else - encode_rd_sb_row(cpi, this_tile, mi_row, &tok[tile_row][tile_col]); + encode_rd_sb_row(cpi, &cpi->td, this_tile, mi_row, + &tok[tile_row][tile_col]); } cpi->tok_count[tile_row][tile_col] = (unsigned int)(tok[tile_row][tile_col] - old_tok); @@ -3488,19 +3504,20 @@ static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats, static void encode_frame_internal(VP9_COMP *cpi) { SPEED_FEATURES *const sf = &cpi->sf; RD_OPT *const rd_opt = &cpi->rd; - MACROBLOCK *const x = &cpi->mb; + ThreadData *const td = &cpi->td; + MACROBLOCK *const x = &td->mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; - COUNTS *const counts = cpi->frame_counts; + RD_COUNTS *const rdc = &cpi->td.rd_counts; xd->mi = cm->mi; xd->mi[0].src_mi = &xd->mi[0]; - vp9_zero(cm->counts); - vp9_zero(counts->coef_counts); - vp9_zero(counts->comp_pred_diff); - vp9_zero(counts->filter_diff); - vp9_zero(counts->tx_select_diff); + vp9_zero(*td->counts); + vp9_zero(rdc->coef_counts); + vp9_zero(rdc->comp_pred_diff); + vp9_zero(rdc->filter_diff); + vp9_zero(rdc->tx_select_diff); vp9_zero(rd_opt->tx_select_threshes); xd->lossless = cm->base_qindex == 0 && @@ -3508,7 +3525,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; - cm->tx_mode = select_tx_mode(cpi); + cm->tx_mode = select_tx_mode(cpi, xd); #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) @@ -3548,7 +3565,7 @@ static void encode_frame_internal(VP9_COMP *cpi) { int i; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; - PICK_MODE_CONTEXT *ctx = &cpi->pc_root->none; + PICK_MODE_CONTEXT *ctx = &cpi->td.pc_root->none; for (i = 0; i < MAX_MB_PLANE; ++i) { p[i].coeff = ctx->coeff_pbuf[i][0]; @@ -3579,7 +3596,8 @@ static void encode_frame_internal(VP9_COMP *cpi) { cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); } - sf->skip_encode_frame = sf->skip_encode_sb ? get_skip_encode_frame(cm) : 0; + sf->skip_encode_frame = sf->skip_encode_sb ? + get_skip_encode_frame(cm, td) : 0; #if 0 // Keep record of the total distortion this time around for future use @@ -3607,7 +3625,8 @@ static INTERP_FILTER get_interp_filter( void vp9_encode_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RD_OPT *const rd_opt = &cpi->rd; - COUNTS *const counts = cpi->frame_counts; + FRAME_COUNTS *counts = cpi->td.counts; + RD_COUNTS *const rdc = &cpi->td.rd_counts; // In the longer term the encoder should be generalized to match the // decoder such that we allow compound where one of the 3 buffers has a @@ -3665,15 +3684,16 @@ void vp9_encode_frame(VP9_COMP *cpi) { encode_frame_internal(cpi); for (i = 0; i < REFERENCE_MODES; ++i) - mode_thrs[i] = (mode_thrs[i] + counts->comp_pred_diff[i] / cm->MBs) / 2; + mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) - filter_thrs[i] = (filter_thrs[i] + counts->filter_diff[i] / cm->MBs) / 2; + filter_thrs[i] = (filter_thrs[i] + rdc->filter_diff[i] / cm->MBs) / 2; for (i = 0; i < TX_MODES; ++i) { - int64_t pd = counts->tx_select_diff[i]; + int64_t pd = rdc->tx_select_diff[i]; if (i == TX_MODE_SELECT) - pd -= RDCOST(cpi->mb.rdmult, cpi->mb.rddiv, 2048 * (TX_SIZES - 1), 0); + pd -= RDCOST(cpi->td.mb.rdmult, cpi->td.mb.rddiv, 2048 * (TX_SIZES - 1), + 0); tx_thrs[i] = (tx_thrs[i] + (int)(pd / cm->MBs)) / 2; } @@ -3682,16 +3702,16 @@ void vp9_encode_frame(VP9_COMP *cpi) { int comp_count_zero = 0; for (i = 0; i < COMP_INTER_CONTEXTS; i++) { - single_count_zero += cm->counts.comp_inter[i][0]; - comp_count_zero += cm->counts.comp_inter[i][1]; + single_count_zero += counts->comp_inter[i][0]; + comp_count_zero += counts->comp_inter[i][1]; } if (comp_count_zero == 0) { cm->reference_mode = SINGLE_REFERENCE; - vp9_zero(cm->counts.comp_inter); + vp9_zero(counts->comp_inter); } else if (single_count_zero == 0) { cm->reference_mode = COMPOUND_REFERENCE; - vp9_zero(cm->counts.comp_inter); + vp9_zero(counts->comp_inter); } } @@ -3702,17 +3722,17 @@ void vp9_encode_frame(VP9_COMP *cpi) { int count32x32 = 0; for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { - count4x4 += cm->counts.tx.p32x32[i][TX_4X4]; - count4x4 += cm->counts.tx.p16x16[i][TX_4X4]; - count4x4 += cm->counts.tx.p8x8[i][TX_4X4]; + count4x4 += counts->tx.p32x32[i][TX_4X4]; + count4x4 += counts->tx.p16x16[i][TX_4X4]; + count4x4 += counts->tx.p8x8[i][TX_4X4]; - count8x8_lp += cm->counts.tx.p32x32[i][TX_8X8]; - count8x8_lp += cm->counts.tx.p16x16[i][TX_8X8]; - count8x8_8x8p += cm->counts.tx.p8x8[i][TX_8X8]; + count8x8_lp += counts->tx.p32x32[i][TX_8X8]; + count8x8_lp += counts->tx.p16x16[i][TX_8X8]; + count8x8_8x8p += counts->tx.p8x8[i][TX_8X8]; - count16x16_16x16p += cm->counts.tx.p16x16[i][TX_16X16]; - count16x16_lp += cm->counts.tx.p32x32[i][TX_16X16]; - count32x32 += cm->counts.tx.p32x32[i][TX_32X32]; + count16x16_16x16p += counts->tx.p16x16[i][TX_16X16]; + count16x16_lp += counts->tx.p32x32[i][TX_16X16]; + count32x32 += counts->tx.p32x32[i][TX_32X32]; } if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && @@ -3773,11 +3793,12 @@ static int get_zbin_mode_boost(const MB_MODE_INFO *mbmi, int enabled) { } } -static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, +static void encode_superblock(VP9_COMP *cpi, ThreadData *td, + TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *mi_8x8 = xd->mi; MODE_INFO *mi = mi_8x8; @@ -3819,8 +3840,8 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, for (plane = 0; plane < MAX_MB_PLANE; ++plane) vp9_encode_intra_block_plane(x, MAX(bsize, BLOCK_8X8), plane); if (output_enabled) - sum_intra_stats(&cm->counts, mi); - vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8)); + sum_intra_stats(td->counts, mi); + vp9_tokenize_sb(cpi, td, t, !output_enabled, MAX(bsize, BLOCK_8X8)); } else { int ref; const int is_compound = has_second_ref(mbmi); @@ -3836,7 +3857,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, MAX(bsize, BLOCK_8X8)); vp9_encode_sb(x, MAX(bsize, BLOCK_8X8)); - vp9_tokenize_sb(cpi, t, !output_enabled, MAX(bsize, BLOCK_8X8)); + vp9_tokenize_sb(cpi, td, t, !output_enabled, MAX(bsize, BLOCK_8X8)); } if (output_enabled) { @@ -3844,7 +3865,7 @@ static void encode_superblock(VP9_COMP *cpi, TOKENEXTRA **t, int output_enabled, mbmi->sb_type >= BLOCK_8X8 && !(is_inter_block(mbmi) && (mbmi->skip || seg_skip))) { ++get_tx_counts(max_txsize_lookup[bsize], vp9_get_tx_size_context(xd), - &cm->counts.tx)[mbmi->tx_size]; + &td->counts->tx)[mbmi->tx_size]; } else { int x, y; TX_SIZE tx_size; diff --git a/vp9/encoder/vp9_encodemv.c b/vp9/encoder/vp9_encodemv.c index f36d76e..f2c4efc 100644 --- a/vp9/encoder/vp9_encodemv.c +++ b/vp9/encoder/vp9_encodemv.c @@ -161,10 +161,10 @@ static void write_mv_update(const vp9_tree_index *tree, update_mv(w, branch_ct[i], &probs[i], MV_UPDATE_PROB); } -void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w) { +void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w, + nmv_context_counts *const counts) { int i, j; nmv_context *const mvc = &cm->fc->nmvc; - nmv_context_counts *const counts = &cm->counts.mv; write_mv_update(vp9_mv_joint_tree, mvc->joints, counts->joints, MV_JOINTS, w); @@ -241,7 +241,8 @@ static void inc_mvs(const MB_MODE_INFO *mbmi, const int_mv mvs[2], } } -void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd) { +void vp9_update_mv_count(ThreadData *td) { + const MACROBLOCKD *xd = &td->mb.e_mbd; const MODE_INFO *mi = xd->mi[0].src_mi; const MB_MODE_INFO *const mbmi = &mi->mbmi; @@ -254,12 +255,12 @@ void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd) { for (idx = 0; idx < 2; idx += num_4x4_w) { const int i = idy * 2 + idx; if (mi->bmi[i].as_mode == NEWMV) - inc_mvs(mbmi, mi->bmi[i].as_mv, &cm->counts.mv); + inc_mvs(mbmi, mi->bmi[i].as_mv, &td->counts->mv); } } } else { if (mbmi->mode == NEWMV) - inc_mvs(mbmi, mbmi->mv, &cm->counts.mv); + inc_mvs(mbmi, mbmi->mv, &td->counts->mv); } } diff --git a/vp9/encoder/vp9_encodemv.h b/vp9/encoder/vp9_encodemv.h index e67f9e3..0ae4737 100644 --- a/vp9/encoder/vp9_encodemv.h +++ b/vp9/encoder/vp9_encodemv.h @@ -20,7 +20,8 @@ extern "C" { void vp9_entropy_mv_init(); -void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w); +void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vp9_writer *w, + nmv_context_counts *const counts); void vp9_encode_mv(VP9_COMP *cpi, vp9_writer* w, const MV* mv, const MV* ref, const nmv_context* mvctx, int usehp); @@ -28,7 +29,7 @@ void vp9_encode_mv(VP9_COMP *cpi, vp9_writer* w, const MV* mv, const MV* ref, void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2], const nmv_context* mvctx, int usehp); -void vp9_update_mv_count(VP9_COMMON *cm, const MACROBLOCKD *xd); +void vp9_update_mv_count(ThreadData *td); #ifdef __cplusplus } // extern "C" diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 877e2c3..7a08be1 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -103,7 +103,7 @@ static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) { } void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) { - MACROBLOCK *const mb = &cpi->mb; + MACROBLOCK *const mb = &cpi->td.mb; cpi->common.allow_high_precision_mv = allow_high_precision_mv; if (cpi->common.allow_high_precision_mv) { mb->mvcost = mb->nmvcost_hp; @@ -235,9 +235,6 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { cpi->nmvsadcosts_hp[0] = NULL; cpi->nmvsadcosts_hp[1] = NULL; - vpx_free(cpi->frame_counts); - cpi->frame_counts = NULL; - vp9_cyclic_refresh_free(cpi->cyclic_refresh); cpi->cyclic_refresh = NULL; @@ -253,7 +250,7 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { vpx_free(cpi->tok); cpi->tok = 0; - vp9_free_pc_tree(cpi); + vp9_free_pc_tree(&cpi->td); for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i]; @@ -285,7 +282,7 @@ static void save_coding_context(VP9_COMP *cpi) { // restored with a call to vp9_restore_coding_context. These functions are // intended for use in a re-code loop in vp9_compress_frame where the // quantizer value is adjusted between loop iterations. - vp9_copy(cc->nmvjointcost, cpi->mb.nmvjointcost); + vp9_copy(cc->nmvjointcost, cpi->td.mb.nmvjointcost); vpx_memcpy(cc->nmvcosts[0], cpi->nmvcosts[0], MV_VALS * sizeof(*cpi->nmvcosts[0])); @@ -313,7 +310,7 @@ static void restore_coding_context(VP9_COMP *cpi) { // Restore key state variables to the snapshot state stored in the // previous call to vp9_save_coding_context. - vp9_copy(cpi->mb.nmvjointcost, cc->nmvjointcost); + vp9_copy(cpi->td.mb.nmvjointcost, cc->nmvjointcost); vpx_memcpy(cpi->nmvcosts[0], cc->nmvcosts[0], MV_VALS * sizeof(*cc->nmvcosts[0])); @@ -553,12 +550,12 @@ void vp9_alloc_compressor_data(VP9_COMP *cpi) { CHECK_MEM_ERROR(cm, cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok))); } - vp9_setup_pc_tree(&cpi->common, cpi); + vp9_setup_pc_tree(&cpi->common, &cpi->td); } static void update_frame_size(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; + MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; vp9_set_mb_mi(cm, cm->width, cm->height); vp9_init_context_buffers(cm); @@ -616,6 +613,9 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { cm->height = oxcf->height; vp9_alloc_compressor_data(cpi); + // Single thread case: use counts in common. + cpi->td.counts = &cm->counts; + // Spatial scalability. cpi->svc.number_spatial_layers = oxcf->ss_number_layers; // Temporal scalability. @@ -1272,7 +1272,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { cpi->oxcf = *oxcf; #if CONFIG_VP9_HIGHBITDEPTH - cpi->mb.e_mbd.bd = (int)cm->bit_depth; + cpi->td.mb.e_mbd.bd = (int)cm->bit_depth; #endif // CONFIG_VP9_HIGHBITDEPTH rc->baseline_gf_interval = DEFAULT_GF_INTERVAL; @@ -1473,9 +1473,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[1], vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[1]))); - CHECK_MEM_ERROR(cm, cpi->frame_counts, vpx_calloc(1, - sizeof(*cpi->frame_counts))); - for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0])); i++) { CHECK_MEM_ERROR(cm, cpi->mbgraph_stats[i].mb_stats, @@ -1537,18 +1534,18 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { cpi->first_time_stamp_ever = INT64_MAX; - cal_nmvjointsadcost(cpi->mb.nmvjointsadcost); - cpi->mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX]; - cpi->mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX]; - cpi->mb.nmvsadcost[0] = &cpi->nmvsadcosts[0][MV_MAX]; - cpi->mb.nmvsadcost[1] = &cpi->nmvsadcosts[1][MV_MAX]; - cal_nmvsadcosts(cpi->mb.nmvsadcost); + cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost); + cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX]; + cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX]; + cpi->td.mb.nmvsadcost[0] = &cpi->nmvsadcosts[0][MV_MAX]; + cpi->td.mb.nmvsadcost[1] = &cpi->nmvsadcosts[1][MV_MAX]; + cal_nmvsadcosts(cpi->td.mb.nmvsadcost); - cpi->mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX]; - cpi->mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX]; - cpi->mb.nmvsadcost_hp[0] = &cpi->nmvsadcosts_hp[0][MV_MAX]; - cpi->mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX]; - cal_nmvsadcosts_hp(cpi->mb.nmvsadcost_hp); + cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX]; + cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX]; + cpi->td.mb.nmvsadcost_hp[0] = &cpi->nmvsadcosts_hp[0][MV_MAX]; + cpi->td.mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX]; + cal_nmvsadcosts_hp(cpi->td.mb.nmvsadcost_hp); #if CONFIG_VP9_TEMPORAL_DENOISING #ifdef OUTPUT_YUV_DENOISED @@ -2039,7 +2036,7 @@ static void generate_psnr_packet(VP9_COMP *cpi) { PSNR_STATS psnr; #if CONFIG_VP9_HIGHBITDEPTH calc_highbd_psnr(cpi->Source, cpi->common.frame_to_show, &psnr, - cpi->mb.e_mbd.bd, cpi->oxcf.input_bit_depth); + cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth); #else calc_psnr(cpi->Source, cpi->common.frame_to_show, &psnr); #endif @@ -2420,7 +2417,7 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { } static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { - MACROBLOCKD *xd = &cpi->mb.e_mbd; + MACROBLOCKD *xd = &cpi->td.mb.e_mbd; struct loopfilter *lf = &cm->lf; if (xd->lossless) { lf->filter_level = 0; @@ -2685,7 +2682,7 @@ void set_frame_size(VP9_COMP *cpi) { int ref_frame; VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; + MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; if (oxcf->pass == 2 && cm->current_video_frame == 0 && @@ -3281,7 +3278,8 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_update_reference_frames(cpi); for (t = TX_4X4; t <= TX_32X32; t++) - full_to_model_counts(cm->counts.coef[t], cpi->frame_counts->coef_counts[t]); + full_to_model_counts(cpi->td.counts->coef[t], + cpi->td.rd_counts.coef_counts[t]); if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) vp9_adapt_coef_probs(cm); @@ -3728,15 +3726,16 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, const int lossless = is_lossless_requested(oxcf); #if CONFIG_VP9_HIGHBITDEPTH if (cpi->oxcf.use_highbitdepth) - cpi->mb.fwd_txm4x4 = lossless ? vp9_highbd_fwht4x4 : vp9_highbd_fdct4x4; + cpi->td.mb.fwd_txm4x4 = lossless ? + vp9_highbd_fwht4x4 : vp9_highbd_fdct4x4; else - cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4; - cpi->mb.highbd_itxm_add = lossless ? vp9_highbd_iwht4x4_add : + cpi->td.mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4; + cpi->td.mb.highbd_itxm_add = lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add; #else - cpi->mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4; + cpi->td.mb.fwd_txm4x4 = lossless ? vp9_fwht4x4 : vp9_fdct4x4; #endif // CONFIG_VP9_HIGHBITDEPTH - cpi->mb.itxm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; + cpi->td.mb.itxm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; vp9_first_pass(cpi, source); } else if (oxcf->pass == 2 && (!cpi->use_svc || is_two_pass_svc(cpi))) { @@ -3789,7 +3788,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer; PSNR_STATS psnr; #if CONFIG_VP9_HIGHBITDEPTH - calc_highbd_psnr(orig, recon, &psnr, cpi->mb.e_mbd.bd, + calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth); #else calc_psnr(orig, recon, &psnr); @@ -3814,7 +3813,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, vp9_clear_system_state(); #if CONFIG_VP9_HIGHBITDEPTH - calc_highbd_psnr(orig, pp, &psnr, cpi->mb.e_mbd.bd, + calc_highbd_psnr(orig, pp, &psnr, cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth); #else calc_psnr(orig, pp, &psnr2); diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 1e67316..81ad6b1 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -241,16 +241,26 @@ typedef struct TileDataEnc { int mode_map[BLOCK_SIZES][MAX_MODES]; } TileDataEnc; -typedef struct { +typedef struct RD_COUNTS { vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES]; int64_t comp_pred_diff[REFERENCE_MODES]; int64_t tx_select_diff[TX_MODES]; int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS]; -} COUNTS; +} RD_COUNTS; + +typedef struct ThreadData { + MACROBLOCK mb; + RD_COUNTS rd_counts; + FRAME_COUNTS *counts; + + PICK_MODE_CONTEXT *leaf_tree; + PC_TREE *pc_tree; + PC_TREE *pc_root; +} ThreadData; typedef struct VP9_COMP { QUANTS quants; - MACROBLOCK mb; + ThreadData td; VP9_COMMON common; VP9EncoderConfig oxcf; struct lookahead_ctx *lookahead; @@ -294,7 +304,6 @@ typedef struct VP9_COMP { int ambient_err; RD_OPT rd; - COUNTS *frame_counts; CODING_CONTEXT coding_context; @@ -424,10 +433,6 @@ typedef struct VP9_COMP { int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES]; int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; - - PICK_MODE_CONTEXT *leaf_tree; - PC_TREE *pc_tree; - PC_TREE *pc_root; int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES]; int multi_arf_allowed; diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index 8f14d4c..74f5efb 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -450,13 +450,13 @@ static void set_first_pass_params(VP9_COMP *cpi) { void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { int mb_row, mb_col; - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &cpi->td.mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; TileInfo tile; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; - const PICK_MODE_CONTEXT *ctx = &cpi->pc_root->none; + const PICK_MODE_CONTEXT *ctx = &cpi->td.pc_root->none; int i; int recon_yoffset, recon_uvoffset; diff --git a/vp9/encoder/vp9_mbgraph.c b/vp9/encoder/vp9_mbgraph.c index bd04c56..18a8c72 100644 --- a/vp9/encoder/vp9_mbgraph.c +++ b/vp9/encoder/vp9_mbgraph.c @@ -24,7 +24,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, MV *dst_mv, int mb_row, int mb_col) { - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; const vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; @@ -80,7 +80,7 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv, int_mv *dst_mv, int mb_row, int mb_col) { - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; unsigned int err, tmp_err; MV tmp_mv; @@ -117,7 +117,7 @@ static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv, } static int do_16x16_zerozero_search(VP9_COMP *cpi, int_mv *dst_mv) { - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; unsigned int err; @@ -131,7 +131,7 @@ static int do_16x16_zerozero_search(VP9_COMP *cpi, int_mv *dst_mv) { return err; } static int find_best_16x16_intra(VP9_COMP *cpi, PREDICTION_MODE *pbest_mode) { - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; PREDICTION_MODE best_mode = -1, mode; unsigned int best_err = INT_MAX; @@ -174,7 +174,7 @@ static void update_mbgraph_mb_stats int mb_row, int mb_col ) { - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; int intra_error; VP9_COMMON *cm = &cpi->common; @@ -229,7 +229,7 @@ static void update_mbgraph_frame_stats(VP9_COMP *cpi, YV12_BUFFER_CONFIG *buf, YV12_BUFFER_CONFIG *golden_ref, YV12_BUFFER_CONFIG *alt_ref) { - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; VP9_COMMON *const cm = &cpi->common; diff --git a/vp9/encoder/vp9_picklpf.c b/vp9/encoder/vp9_picklpf.c index 85984fd..5559f8a 100644 --- a/vp9/encoder/vp9_picklpf.c +++ b/vp9/encoder/vp9_picklpf.c @@ -38,7 +38,7 @@ static int try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, VP9_COMMON *const cm = &cpi->common; int filt_err; - vp9_loop_filter_frame(cm->frame_to_show, cm, &cpi->mb.e_mbd, filt_level, 1, + vp9_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, 1, partial_frame); #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c index fb07866..6bb9794 100644 --- a/vp9/encoder/vp9_pickmode.c +++ b/vp9/encoder/vp9_pickmode.c @@ -744,7 +744,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], &pf_dist[filter], &pf_var[filter], &pf_sse[filter]); cost = RDCOST(x->rdmult, x->rddiv, - vp9_get_switchable_rate(cpi) + pf_rate[filter], + vp9_get_switchable_rate(cpi, xd) + pf_rate[filter], pf_dist[filter]); pf_tx_size[filter] = mbmi->tx_size; if (cost < best_cost) { diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c index ce0ae79..e7a20c4 100644 --- a/vp9/encoder/vp9_quantize.c +++ b/vp9/encoder/vp9_quantize.c @@ -701,7 +701,7 @@ void vp9_update_zbin_extra(VP9_COMP *cpi, MACROBLOCK *x) { void vp9_frame_init_quantizer(VP9_COMP *cpi) { cpi->zbin_mode_boost = 0; - vp9_init_plane_quantizers(cpi, &cpi->mb); + vp9_init_plane_quantizers(cpi, &cpi->td.mb); } void vp9_set_quantizer(VP9_COMMON *cm, int q) { diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index 4023255..5b49bfc 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c @@ -208,23 +208,23 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, int qindex) { #if CONFIG_VP9_HIGHBITDEPTH switch (cpi->common.bit_depth) { case VPX_BITS_8: - cpi->mb.sadperbit16 = sad_per_bit16lut_8[qindex]; - cpi->mb.sadperbit4 = sad_per_bit4lut_8[qindex]; + cpi->td.mb.sadperbit16 = sad_per_bit16lut_8[qindex]; + cpi->td.mb.sadperbit4 = sad_per_bit4lut_8[qindex]; break; case VPX_BITS_10: - cpi->mb.sadperbit16 = sad_per_bit16lut_10[qindex]; - cpi->mb.sadperbit4 = sad_per_bit4lut_10[qindex]; + cpi->td.mb.sadperbit16 = sad_per_bit16lut_10[qindex]; + cpi->td.mb.sadperbit4 = sad_per_bit4lut_10[qindex]; break; case VPX_BITS_12: - cpi->mb.sadperbit16 = sad_per_bit16lut_12[qindex]; - cpi->mb.sadperbit4 = sad_per_bit4lut_12[qindex]; + cpi->td.mb.sadperbit16 = sad_per_bit16lut_12[qindex]; + cpi->td.mb.sadperbit4 = sad_per_bit4lut_12[qindex]; break; default: assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); } #else - cpi->mb.sadperbit16 = sad_per_bit16lut_8[qindex]; - cpi->mb.sadperbit4 = sad_per_bit4lut_8[qindex]; + cpi->td.mb.sadperbit16 = sad_per_bit16lut_8[qindex]; + cpi->td.mb.sadperbit4 = sad_per_bit4lut_8[qindex]; #endif // CONFIG_VP9_HIGHBITDEPTH } @@ -262,7 +262,7 @@ static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) { void vp9_initialize_rd_consts(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &cpi->td.mb; RD_OPT *const rd = &cpi->rd; int i; @@ -524,8 +524,7 @@ const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, return (scaled_idx != ref_idx) ? &cm->frame_bufs[scaled_idx].buf : NULL; } -int vp9_get_switchable_rate(const VP9_COMP *cpi) { - const MACROBLOCKD *const xd = &cpi->mb.e_mbd; +int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) { const MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; const int ctx = vp9_get_pred_context_switchable_interp(xd); return SWITCHABLE_INTERP_RATE_FACTOR * diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h index 33670d3..e1593af 100644 --- a/vp9/encoder/vp9_rd.h +++ b/vp9/encoder/vp9_rd.h @@ -138,7 +138,8 @@ void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n, unsigned int qstep, int *rate, int64_t *dist); -int vp9_get_switchable_rate(const struct VP9_COMP *cpi); +int vp9_get_switchable_rate(const struct VP9_COMP *cpi, + const MACROBLOCKD *const xd); const YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const struct VP9_COMP *cpi, int ref_frame); diff --git a/vp9/encoder/vp9_rdopt.c b/vp9/encoder/vp9_rdopt.c index 98524c2..f86e21c 100644 --- a/vp9/encoder/vp9_rdopt.c +++ b/vp9/encoder/vp9_rdopt.c @@ -1220,13 +1220,12 @@ static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } -static void choose_intra_uv_mode(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, +static void choose_intra_uv_mode(VP9_COMP *cpi, MACROBLOCK *const x, + PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize, TX_SIZE max_tx_size, int *rate_uv, int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv, PREDICTION_MODE *mode_uv) { - MACROBLOCK *const x = &cpi->mb; - // Use an estimated rd for uv_intra based on DC_PRED if the // appropriate speed flag is set. if (cpi->sf.use_uv_intra_rd_estimate) { @@ -2519,7 +2518,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, int64_t tmp_skip_sse = INT64_MAX; mbmi->interp_filter = i; - rs = vp9_get_switchable_rate(cpi); + rs = vp9_get_switchable_rate(cpi, xd); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); if (i > 0 && intpel_mv) { @@ -2603,7 +2602,7 @@ static int64_t handle_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, // Set the appropriate filter mbmi->interp_filter = cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter; - rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi) : 0; + rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi, xd) : 0; if (pred_exists) { if (best_needs_copy) { @@ -3146,7 +3145,7 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, uv_tx = get_uv_tx_size_impl(mbmi->tx_size, bsize, pd->subsampling_x, pd->subsampling_y); if (rate_uv_intra[uv_tx] == INT_MAX) { - choose_intra_uv_mode(cpi, ctx, bsize, uv_tx, + choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]); } @@ -3517,7 +3516,7 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, int best_rs = INT_MAX; for (i = 0; i < SWITCHABLE_FILTERS; ++i) { mbmi->interp_filter = i; - rs = vp9_get_switchable_rate(cpi); + rs = vp9_get_switchable_rate(cpi, xd); if (rs < best_rs) { best_rs = rs; best_filter = mbmi->interp_filter; @@ -3528,7 +3527,7 @@ void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, // Set the appropriate filter if (cm->interp_filter == SWITCHABLE) { mbmi->interp_filter = best_filter; - rate2 += vp9_get_switchable_rate(cpi); + rate2 += vp9_get_switchable_rate(cpi, xd); } else { mbmi->interp_filter = cm->interp_filter; } @@ -3780,7 +3779,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, distortion2 += distortion_y; if (rate_uv_intra == INT_MAX) { - choose_intra_uv_mode(cpi, ctx, bsize, TX_4X4, + choose_intra_uv_mode(cpi, x, ctx, bsize, TX_4X4, &rate_uv_intra, &rate_uv_tokenonly, &dist_uv, &skip_uv, @@ -3844,7 +3843,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, if (tmp_rd == INT64_MAX) continue; - rs = vp9_get_switchable_rate(cpi); + rs = vp9_get_switchable_rate(cpi, xd); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); filter_cache[switchable_filter_index] = tmp_rd; filter_cache[SWITCHABLE_FILTERS] = @@ -3922,7 +3921,7 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, distortion2 += distortion; if (cm->interp_filter == SWITCHABLE) - rate2 += vp9_get_switchable_rate(cpi); + rate2 += vp9_get_switchable_rate(cpi, xd); if (!mode_excluded) mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE diff --git a/vp9/encoder/vp9_segmentation.c b/vp9/encoder/vp9_segmentation.c index f1d5177..c9874f7 100644 --- a/vp9/encoder/vp9_segmentation.c +++ b/vp9/encoder/vp9_segmentation.c @@ -37,10 +37,6 @@ void vp9_set_segment_data(struct segmentation *seg, seg->abs_delta = abs_delta; vpx_memcpy(seg->feature_data, feature_data, sizeof(seg->feature_data)); - - // TBD ?? Set the feature mask - // vpx_memcpy(cpi->mb.e_mbd.segment_feature_mask, 0, - // sizeof(cpi->mb.e_mbd.segment_feature_mask)); } void vp9_disable_segfeature(struct segmentation *seg, int segment_id, SEG_LVL_FEATURES feature_id) { diff --git a/vp9/encoder/vp9_speed_features.c b/vp9/encoder/vp9_speed_features.c index 1801b20..9afc7b2 100644 --- a/vp9/encoder/vp9_speed_features.c +++ b/vp9/encoder/vp9_speed_features.c @@ -382,7 +382,7 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) { void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) { SPEED_FEATURES *const sf = &cpi->sf; VP9_COMMON *const cm = &cpi->common; - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &cpi->td.mb; const VP9EncoderConfig *const oxcf = &cpi->oxcf; int i; diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 5599227..a4051f0 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -213,7 +213,7 @@ static int temporal_filter_find_matching_mb_c(VP9_COMP *cpi, uint8_t *arf_frame_buf, uint8_t *frame_ptr_buf, int stride) { - MACROBLOCK *const x = &cpi->mb; + MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; const MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; int step_param; @@ -282,7 +282,7 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, int mb_uv_offset = 0; DECLARE_ALIGNED_ARRAY(16, unsigned int, accumulator, 16 * 16 * 3); DECLARE_ALIGNED_ARRAY(16, uint16_t, count, 16 * 16 * 3); - MACROBLOCKD *mbd = &cpi->mb.e_mbd; + MACROBLOCKD *mbd = &cpi->td.mb.e_mbd; YV12_BUFFER_CONFIG *f = frames[alt_ref_index]; uint8_t *dst1, *dst2; #if CONFIG_VP9_HIGHBITDEPTH @@ -321,8 +321,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, // 8 - VP9_INTERP_EXTEND. // To keep the mv in play for both Y and UV planes the max that it // can be on a border is therefore 16 - (2*VP9_INTERP_EXTEND+1). - cpi->mb.mv_row_min = -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND)); - cpi->mb.mv_row_max = ((mb_rows - 1 - mb_row) * 16) + cpi->td.mb.mv_row_min = -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND)); + cpi->td.mb.mv_row_max = ((mb_rows - 1 - mb_row) * 16) + (17 - 2 * VP9_INTERP_EXTEND); for (mb_col = 0; mb_col < mb_cols; mb_col++) { @@ -332,8 +332,8 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi, vpx_memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0])); vpx_memset(count, 0, 16 * 16 * 3 * sizeof(count[0])); - cpi->mb.mv_col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND)); - cpi->mb.mv_col_max = ((mb_cols - 1 - mb_col) * 16) + cpi->td.mb.mv_col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND)); + cpi->td.mb.mv_col_max = ((mb_cols - 1 - mb_col) * 16) + (17 - 2 * VP9_INTERP_EXTEND); for (frame = 0; frame < frame_count; frame++) { @@ -653,6 +653,7 @@ static void adjust_arnr_filter(VP9_COMP *cpi, void vp9_temporal_filter(VP9_COMP *cpi, int distance) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; + MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; int frame; int frames_to_blur; int start_frame; @@ -720,8 +721,8 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) { } } cm->mi = cm->mip + cm->mi_stride + 1; - cpi->mb.e_mbd.mi = cm->mi; - cpi->mb.e_mbd.mi[0].src_mi = &cpi->mb.e_mbd.mi[0]; + xd->mi = cm->mi; + xd->mi[0].src_mi = &xd->mi[0]; } else { // ARF is produced at the native frame size and resized when coded. #if CONFIG_VP9_HIGHBITDEPTH diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index fa6b1d9..393eb1a 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -244,15 +244,17 @@ void vp9_tokenize_initialize() { struct tokenize_b_args { VP9_COMP *cpi; - MACROBLOCKD *xd; + ThreadData *td; TOKENEXTRA **tp; }; static void set_entropy_context_b(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct tokenize_b_args* const args = arg; - MACROBLOCKD *const xd = args->xd; - struct macroblock_plane *p = &args->cpi->mb.plane[plane]; + ThreadData *const td = args->td; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; + struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; int aoff, loff; txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff); @@ -294,10 +296,12 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct tokenize_b_args* const args = arg; VP9_COMP *cpi = args->cpi; - MACROBLOCKD *xd = args->xd; + ThreadData *const td = args->td; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; TOKENEXTRA **tp = args->tp; uint8_t token_cache[32 * 32]; - struct macroblock_plane *p = &cpi->mb.plane[plane]; + struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; MB_MODE_INFO *mbmi = &xd->mi[0].src_mi->mbmi; int pt; /* near block/prev token context index */ @@ -311,11 +315,11 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, const scan_order *so; const int ref = is_inter_block(mbmi); unsigned int (*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] = - cpi->frame_counts->coef_counts[tx_size][type][ref]; + td->rd_counts.coef_counts[tx_size][type][ref]; vp9_prob (*const coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = cpi->common.fc->coef_probs[tx_size][type][ref]; unsigned int (*const eob_branch)[COEFF_CONTEXTS] = - cpi->common.counts.eob_branch[tx_size][type][ref]; + td->counts->eob_branch[tx_size][type][ref]; const uint8_t *const band = get_band_translate(tx_size); const int seg_eob = get_tx_eob(&cpi->common.seg, segment_id, tx_size); const TOKENVALUE *dct_value_tokens; @@ -421,19 +425,20 @@ int vp9_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { return result; } -void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, - BLOCK_SIZE bsize) { +void vp9_tokenize_sb(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, + int dry_run, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; - MACROBLOCKD *const xd = &cpi->mb.e_mbd; + MACROBLOCK *const x = &td->mb; + MACROBLOCKD *const xd = &x->e_mbd; MB_MODE_INFO *const mbmi = &xd->mi[0].src_mi->mbmi; TOKENEXTRA *t_backup = *t; const int ctx = vp9_get_skip_context(xd); const int skip_inc = !vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP); - struct tokenize_b_args arg = {cpi, xd, t}; + struct tokenize_b_args arg = {cpi, td, t}; if (mbmi->skip) { if (!dry_run) - cm->counts.skip[ctx][1] += skip_inc; + td->counts->skip[ctx][1] += skip_inc; reset_skip_context(xd, bsize); if (dry_run) *t = t_backup; @@ -441,7 +446,7 @@ void vp9_tokenize_sb(VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, } if (!dry_run) { - cm->counts.skip[ctx][0] += skip_inc; + td->counts->skip[ctx][0] += skip_inc; vp9_foreach_transformed_block(xd, bsize, tokenize_b, &arg); } else { vp9_foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg); diff --git a/vp9/encoder/vp9_tokenize.h b/vp9/encoder/vp9_tokenize.h index 825252b..00afb72 100644 --- a/vp9/encoder/vp9_tokenize.h +++ b/vp9/encoder/vp9_tokenize.h @@ -52,9 +52,10 @@ int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); int vp9_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); struct VP9_COMP; +struct ThreadData; -void vp9_tokenize_sb(struct VP9_COMP *cpi, TOKENEXTRA **t, int dry_run, - BLOCK_SIZE bsize); +void vp9_tokenize_sb(struct VP9_COMP *cpi, struct ThreadData *td, + TOKENEXTRA **t, int dry_run, BLOCK_SIZE bsize); extern const int16_t *vp9_dct_value_cost_ptr; /* TODO: The Token field should be broken out into a separate char array to -- 2.7.4