From d0b547c6760270e07571f5a592549fbdabcd08c4 Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Fri, 14 Nov 2014 16:04:15 -0800 Subject: [PATCH] vp9_ethread: combine encoder counts in separate struct Several frame counters in encoder are updated at SB level. Combine those counters and put them in a separate struct, which allows us to allocate one copy for each thread. Change-Id: I00366296a13c0ada4d8fa12f5e07728388b6cab7 --- vp9/encoder/vp9_bitstream.c | 2 +- vp9/encoder/vp9_encodeframe.c | 28 +++++++++++++++------------- vp9/encoder/vp9_encoder.c | 8 +++++++- vp9/encoder/vp9_encoder.h | 9 ++++++++- vp9/encoder/vp9_rd.h | 3 --- vp9/encoder/vp9_tokenize.c | 2 +- 6 files changed, 32 insertions(+), 20 deletions(-) diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c index 4a3e79e..73ce789 100644 --- a/vp9/encoder/vp9_bitstream.c +++ b/vp9/encoder/vp9_bitstream.c @@ -500,7 +500,7 @@ static void write_modes(VP9_COMP *cpi, static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size, vp9_coeff_stats *coef_branch_ct, vp9_coeff_probs_model *coef_probs) { - vp9_coeff_count *coef_counts = cpi->coef_counts[tx_size]; + vp9_coeff_count *coef_counts = cpi->frame_counts->coef_counts[tx_size]; unsigned int (*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] = cpi->common.counts.eob_branch[tx_size]; int i, j, k, l, m; diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 3beceb4..f01f25f 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -602,7 +602,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, int output_enabled) { int i, x_idx, y; VP9_COMMON *const cm = &cpi->common; - RD_OPT *const rd_opt = &cpi->rd; + COUNTS *const counts = cpi->frame_counts; MACROBLOCK *const x = cpi->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = x->plane; @@ -695,7 +695,7 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, if (!vp9_segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) { for (i = 0; i < TX_MODES; i++) - rd_opt->tx_select_diff[i] += ctx->tx_rd_diff[i]; + counts->tx_select_diff[i] += ctx->tx_rd_diff[i]; } #if CONFIG_INTERNAL_STATS @@ -728,12 +728,12 @@ static void update_state(VP9_COMP *cpi, PICK_MODE_CONTEXT *ctx, } } - rd_opt->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; - rd_opt->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; - rd_opt->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; + counts->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; + counts->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; + counts->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) - rd_opt->filter_diff[i] += ctx->best_filter_diff[i]; + counts->filter_diff[i] += ctx->best_filter_diff[i]; } for (h = 0; h < y_mis; ++h) { @@ -3495,15 +3495,16 @@ static void encode_frame_internal(VP9_COMP *cpi) { MACROBLOCK *const x = cpi->mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; + COUNTS *const counts = cpi->frame_counts; xd->mi = cm->mi; xd->mi[0].src_mi = &xd->mi[0]; vp9_zero(cm->counts); - vp9_zero(cpi->coef_counts); - vp9_zero(rd_opt->comp_pred_diff); - vp9_zero(rd_opt->filter_diff); - vp9_zero(rd_opt->tx_select_diff); + vp9_zero(counts->coef_counts); + vp9_zero(counts->comp_pred_diff); + vp9_zero(counts->filter_diff); + vp9_zero(counts->tx_select_diff); vp9_zero(rd_opt->tx_select_threshes); xd->lossless = cm->base_qindex == 0 && @@ -3610,6 +3611,7 @@ static INTERP_FILTER get_interp_filter( void vp9_encode_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RD_OPT *const rd_opt = &cpi->rd; + COUNTS *const counts = cpi->frame_counts; // In the longer term the encoder should be generalized to match the // decoder such that we allow compound where one of the 3 buffers has a @@ -3667,13 +3669,13 @@ void vp9_encode_frame(VP9_COMP *cpi) { encode_frame_internal(cpi); for (i = 0; i < REFERENCE_MODES; ++i) - mode_thrs[i] = (mode_thrs[i] + rd_opt->comp_pred_diff[i] / cm->MBs) / 2; + mode_thrs[i] = (mode_thrs[i] + counts->comp_pred_diff[i] / cm->MBs) / 2; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) - filter_thrs[i] = (filter_thrs[i] + rd_opt->filter_diff[i] / cm->MBs) / 2; + filter_thrs[i] = (filter_thrs[i] + counts->filter_diff[i] / cm->MBs) / 2; for (i = 0; i < TX_MODES; ++i) { - int64_t pd = rd_opt->tx_select_diff[i]; + int64_t pd = counts->tx_select_diff[i]; if (i == TX_MODE_SELECT) pd -= RDCOST(cpi->mb->rdmult, cpi->mb->rddiv, 2048 * (TX_SIZES - 1), 0); tx_thrs[i] = (tx_thrs[i] + (int)(pd / cm->MBs)) / 2; diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 4425d2f..06b153d 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -235,6 +235,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) { cpi->nmvsadcosts_hp[0] = NULL; cpi->nmvsadcosts_hp[1] = NULL; + vpx_free(cpi->frame_counts); + cpi->frame_counts = NULL; + vp9_cyclic_refresh_free(cpi->cyclic_refresh); cpi->cyclic_refresh = NULL; @@ -1476,6 +1479,9 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf) { CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[1], vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[1]))); + CHECK_MEM_ERROR(cm, cpi->frame_counts, vpx_calloc(1, + sizeof(*cpi->frame_counts))); + for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0])); i++) { CHECK_MEM_ERROR(cm, cpi->mbgraph_stats[i].mb_stats, @@ -3289,7 +3295,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, vp9_update_reference_frames(cpi); for (t = TX_4X4; t <= TX_32X32; t++) - full_to_model_counts(cm->counts.coef[t], cpi->coef_counts[t]); + full_to_model_counts(cm->counts.coef[t], cpi->frame_counts->coef_counts[t]); if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) vp9_adapt_coef_probs(cm); diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index ccac59d..cd9ac3a 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -241,6 +241,13 @@ typedef struct TileDataEnc { int mode_map[BLOCK_SIZES][MAX_MODES]; } TileDataEnc; +typedef struct { + vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES]; + int64_t comp_pred_diff[REFERENCE_MODES]; + int64_t tx_select_diff[TX_MODES]; + int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS]; +} COUNTS; + typedef struct VP9_COMP { QUANTS quants; MACROBLOCK *mb; @@ -287,6 +294,7 @@ typedef struct VP9_COMP { int ambient_err; RD_OPT rd; + COUNTS *frame_counts; CODING_CONTEXT coding_context; @@ -305,7 +313,6 @@ typedef struct VP9_COMP { RATE_CONTROL rc; double framerate; - vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES]; int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]; struct vpx_codec_pkt_list *output_pkt_list; diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h index ebbe821..835882d 100644 --- a/vp9/encoder/vp9_rd.h +++ b/vp9/encoder/vp9_rd.h @@ -105,13 +105,10 @@ typedef struct RD_OPT { int mode_map[BLOCK_SIZES][MAX_MODES]; - int64_t comp_pred_diff[REFERENCE_MODES]; int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES]; - int64_t tx_select_diff[TX_MODES]; // TODO(agrange): can this overflow? int tx_select_threshes[MAX_REF_FRAMES][TX_MODES]; - int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS]; int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS]; int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; int64_t mask_filter; diff --git a/vp9/encoder/vp9_tokenize.c b/vp9/encoder/vp9_tokenize.c index 74c9cfd..dfde606 100644 --- a/vp9/encoder/vp9_tokenize.c +++ b/vp9/encoder/vp9_tokenize.c @@ -311,7 +311,7 @@ static void tokenize_b(int plane, int block, BLOCK_SIZE plane_bsize, const scan_order *so; const int ref = is_inter_block(mbmi); unsigned int (*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] = - cpi->coef_counts[tx_size][type][ref]; + cpi->frame_counts->coef_counts[tx_size][type][ref]; vp9_prob (*const coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = cpi->common.fc->coef_probs[tx_size][type][ref]; unsigned int (*const eob_branch)[COEFF_CONTEXTS] = -- 2.7.4