From b41c17d625dc269eef8d0fbcc971f17523057e3b Mon Sep 17 00:00:00 2001 From: Attila Nagy Date: Tue, 17 Apr 2012 10:40:56 +0300 Subject: [PATCH] Shares one set of RD costs tables between all encoding threads RD costs were local to MACROBLOCK data and had to be copied all the time to each thread's MACROBLOCK data. Tables moved to a common place and only pointers are setup for each encoding thread. vp8_cost_tokens() generates 'int' costs so changed all types to be int (i.e. removed unsigned). NOTE: Could do some more cleaning in vp8cx_init_mbrthread_data(). Change-Id: Ifa4de4c6286dffaca7ed3082041fe5af1345ddc0 --- vp8/encoder/block.h | 18 +++++++++--------- vp8/encoder/ethreading.c | 43 +++++++++++++++++++------------------------ vp8/encoder/modecosts.c | 21 ++++++++++++++------- vp8/encoder/onyx_if.c | 25 +++++++++++++++++-------- vp8/encoder/onyx_int.h | 11 +++++++++++ vp8/encoder/pickinter.c | 4 ++-- vp8/encoder/ratectrl.c | 4 ++-- vp8/encoder/rdopt.c | 31 +++++++++++++++++-------------- 8 files changed, 91 insertions(+), 66 deletions(-) diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h index 6165d04..1be1021 100644 --- a/vp8/encoder/block.h +++ b/vp8/encoder/block.h @@ -90,16 +90,17 @@ typedef struct macroblock signed int act_zbin_adj; signed int last_act_zbin_adj; - int mvcosts[2][MVvals+1]; int *mvcost[2]; - int mvsadcosts[2][MVfpvals+1]; int *mvsadcost[2]; - int mbmode_cost[2][MB_MODE_COUNT]; - int intra_uv_mode_cost[2][MB_MODE_COUNT]; - unsigned int bmode_costs[10][10][10]; - unsigned int inter_bmode_costs[B_MODE_COUNT]; - - // These define limits to motion vector components to prevent them from extending outside the UMV borders + int (*mbmode_cost)[MB_MODE_COUNT]; + int (*intra_uv_mode_cost)[MB_MODE_COUNT]; + int (*bmode_costs)[10][10]; + int *inter_bmode_costs; + int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS] + [MAX_ENTROPY_TOKENS]; + + // These define limits to motion vector components to prevent + // them from extending outside the UMV borders int mv_col_min; int mv_col_max; int mv_row_min; @@ -115,7 +116,6 @@ typedef struct macroblock unsigned char *active_ptr; MV_CONTEXT *mvc; - unsigned int token_costs[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; int optimize; int q_index; diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c index b549a7d..555a699 100644 --- a/vp8/encoder/ethreading.c +++ b/vp8/encoder/ethreading.c @@ -337,21 +337,16 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) z->src.v_buffer = x->src.v_buffer; */ + z->mvcost[0] = x->mvcost[0]; + z->mvcost[1] = x->mvcost[1]; + z->mvsadcost[0] = x->mvsadcost[0]; + z->mvsadcost[1] = x->mvsadcost[1]; - vpx_memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts)); - z->mvcost[0] = &z->mvcosts[0][mv_max+1]; - z->mvcost[1] = &z->mvcosts[1][mv_max+1]; - z->mvsadcost[0] = &z->mvsadcosts[0][mvfp_max+1]; - z->mvsadcost[1] = &z->mvsadcosts[1][mvfp_max+1]; - - - vpx_memcpy(z->token_costs, x->token_costs, sizeof(x->token_costs)); - vpx_memcpy(z->inter_bmode_costs, x->inter_bmode_costs, sizeof(x->inter_bmode_costs)); - //memcpy(z->mvcosts, x->mvcosts, sizeof(x->mvcosts)); - //memcpy(z->mvcost, x->mvcost, sizeof(x->mvcost)); - vpx_memcpy(z->mbmode_cost, x->mbmode_cost, sizeof(x->mbmode_cost)); - vpx_memcpy(z->intra_uv_mode_cost, x->intra_uv_mode_cost, sizeof(x->intra_uv_mode_cost)); - vpx_memcpy(z->bmode_costs, x->bmode_costs, sizeof(x->bmode_costs)); + z->token_costs = x->token_costs; + z->inter_bmode_costs = x->inter_bmode_costs; + z->mbmode_cost = x->mbmode_cost; + z->intra_uv_mode_cost = x->intra_uv_mode_cost; + z->bmode_costs = x->bmode_costs; for (i = 0; i < 25; i++) { @@ -359,17 +354,15 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) z->block[i].quant_fast = x->block[i].quant_fast; z->block[i].quant_shift = x->block[i].quant_shift; z->block[i].zbin = x->block[i].zbin; - z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost; + z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost; z->block[i].round = x->block[i].round; - z->q_index = x->q_index; - z->act_zbin_adj = x->act_zbin_adj; - z->last_act_zbin_adj = x->last_act_zbin_adj; - /* - z->block[i].src = x->block[i].src; - */ - z->block[i].src_stride = x->block[i].src_stride; + z->block[i].src_stride = x->block[i].src_stride; } + z->q_index = x->q_index; + z->act_zbin_adj = x->act_zbin_adj; + z->last_act_zbin_adj = x->last_act_zbin_adj; + { MACROBLOCKD *xd = &x->e_mbd; MACROBLOCKD *zd = &z->e_mbd; @@ -401,9 +394,11 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) zd->subpixel_predict16x16 = xd->subpixel_predict16x16; zd->segmentation_enabled = xd->segmentation_enabled; zd->mb_segement_abs_delta = xd->mb_segement_abs_delta; - vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); + vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, + sizeof(xd->segment_feature_data)); - vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); + vpx_memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, + sizeof(xd->dequant_y1_dc)); vpx_memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); vpx_memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); vpx_memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); diff --git a/vp8/encoder/modecosts.c b/vp8/encoder/modecosts.c index c752a0a..c61563c 100644 --- a/vp8/encoder/modecosts.c +++ b/vp8/encoder/modecosts.c @@ -18,6 +18,8 @@ void vp8_init_mode_costs(VP8_COMP *c) { VP8_COMMON *x = &c->common; + struct rd_costs_struct *rd_costs = &c->rd_costs; + { const vp8_tree_p T = vp8_bmode_tree; @@ -29,19 +31,24 @@ void vp8_init_mode_costs(VP8_COMP *c) do { - vp8_cost_tokens((int *)c->mb.bmode_costs[i][j], vp8_kf_bmode_prob[i][j], T); + vp8_cost_tokens(rd_costs->bmode_costs[i][j], + vp8_kf_bmode_prob[i][j], T); } while (++j < VP8_BINTRAMODES); } while (++i < VP8_BINTRAMODES); - vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.bmode_prob, T); + vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.bmode_prob, T); } - vp8_cost_tokens((int *)c->mb.inter_bmode_costs, x->fc.sub_mv_ref_prob, vp8_sub_mv_ref_tree); + vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.sub_mv_ref_prob, + vp8_sub_mv_ref_tree); - vp8_cost_tokens(c->mb.mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree); - vp8_cost_tokens(c->mb.mbmode_cost[0], vp8_kf_ymode_prob, vp8_kf_ymode_tree); + vp8_cost_tokens(rd_costs->mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree); + vp8_cost_tokens(rd_costs->mbmode_cost[0], vp8_kf_ymode_prob, + vp8_kf_ymode_tree); - vp8_cost_tokens(c->mb.intra_uv_mode_cost[1], x->fc.uv_mode_prob, vp8_uv_mode_tree); - vp8_cost_tokens(c->mb.intra_uv_mode_cost[0], vp8_kf_uv_mode_prob, vp8_uv_mode_tree); + vp8_cost_tokens(rd_costs->intra_uv_mode_cost[1], x->fc.uv_mode_prob, + vp8_uv_mode_tree); + vp8_cost_tokens(rd_costs->intra_uv_mode_cost[0], vp8_kf_uv_mode_prob, + vp8_uv_mode_tree); } diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index 06887f8..8819633 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -1883,13 +1883,6 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) cpi->gf_rate_correction_factor = 1.0; cpi->twopass.est_max_qcorrection_factor = 1.0; - cpi->mb.mvcost[0] = &cpi->mb.mvcosts[0][mv_max+1]; - cpi->mb.mvcost[1] = &cpi->mb.mvcosts[1][mv_max+1]; - cpi->mb.mvsadcost[0] = &cpi->mb.mvsadcosts[0][mvfp_max+1]; - cpi->mb.mvsadcost[1] = &cpi->mb.mvsadcosts[1][mvfp_max+1]; - - cal_mvsadcosts(cpi->mb.mvsadcost); - for (i = 0; i < KEY_FRAME_CONTEXT; i++) { cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate; @@ -2023,13 +2016,29 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf) cpi->common.error.setjmp = 0; #if CONFIG_MULTI_RES_ENCODING + /* Calculate # of MBs in a row in lower-resolution level image. */ if (cpi->oxcf.mr_encoder_id > 0) vp8_cal_low_res_mb_cols(cpi); + #endif - return cpi; + /* setup RD costs to MACROBLOCK struct */ + cpi->mb.mvcost[0] = &cpi->rd_costs.mvcosts[0][mv_max+1]; + cpi->mb.mvcost[1] = &cpi->rd_costs.mvcosts[1][mv_max+1]; + cpi->mb.mvsadcost[0] = &cpi->rd_costs.mvsadcosts[0][mvfp_max+1]; + cpi->mb.mvsadcost[1] = &cpi->rd_costs.mvsadcosts[1][mvfp_max+1]; + + cal_mvsadcosts(cpi->mb.mvsadcost); + + cpi->mb.mbmode_cost = cpi->rd_costs.mbmode_cost; + cpi->mb.intra_uv_mode_cost = cpi->rd_costs.intra_uv_mode_cost; + cpi->mb.bmode_costs = cpi->rd_costs.bmode_costs; + cpi->mb.inter_bmode_costs = cpi->rd_costs.inter_bmode_costs; + cpi->mb.token_costs = cpi->rd_costs.token_costs; + + return cpi; } diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index c7a1de8..ac6f6c2 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -693,6 +693,17 @@ typedef struct VP8_COMP int mr_low_res_mb_cols; #endif + struct rd_costs_struct + { + int mvcosts[2][MVvals+1]; + int mvsadcosts[2][MVfpvals+1]; + int mbmode_cost[2][MB_MODE_COUNT]; + int intra_uv_mode_cost[2][MB_MODE_COUNT]; + int bmode_costs[10][10][10]; + int inter_bmode_costs[B_MODE_COUNT]; + int token_costs[BLOCK_TYPES][COEF_BANDS] + [PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; + } rd_costs; } VP8_COMP; void control_data_rate(VP8_COMP *cpi); diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index dafb645..618a296 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -132,7 +132,7 @@ static int pick_intra4x4block( MACROBLOCK *x, int ib, B_PREDICTION_MODE *best_mode, - unsigned int *mode_costs, + const int *mode_costs, int *bestrate, int *bestdistortion) @@ -185,7 +185,7 @@ static int pick_intra4x4mby_modes int cost = mb->mbmode_cost [xd->frame_type] [B_PRED]; int error; int distortion = 0; - unsigned int *bmode_costs; + const int *bmode_costs; intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16); diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c index 1caea2c..967a752 100644 --- a/vp8/encoder/ratectrl.c +++ b/vp8/encoder/ratectrl.c @@ -235,7 +235,7 @@ void vp8_save_coding_context(VP8_COMP *cpi) cc->frames_since_golden = cpi->common.frames_since_golden; vp8_copy(cc->mvc, cpi->common.fc.mvc); - vp8_copy(cc->mvcosts, cpi->mb.mvcosts); + vp8_copy(cc->mvcosts, cpi->rd_costs.mvcosts); vp8_copy(cc->ymode_prob, cpi->common.fc.ymode_prob); vp8_copy(cc->uv_mode_prob, cpi->common.fc.uv_mode_prob); @@ -272,7 +272,7 @@ void vp8_restore_coding_context(VP8_COMP *cpi) vp8_copy(cpi->common.fc.mvc, cc->mvc); - vp8_copy(cpi->mb.mvcosts, cc->mvcosts); + vp8_copy(cpi->rd_costs.mvcosts, cc->mvcosts); vp8_copy(cpi->common.fc.ymode_prob, cc->ymode_prob); vp8_copy(cpi->common.fc.uv_mode_prob, cc->uv_mode_prob); diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index 2b706ba..167192d 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -149,8 +149,8 @@ const int vp8_ref_frame_order[MAX_MODES] = }; static void fill_token_costs( - unsigned int c [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS], - const vp8_prob p [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [ENTROPY_NODES] + int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS], + const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES] ) { int i, j, k; @@ -159,21 +159,24 @@ static void fill_token_costs( for (i = 0; i < BLOCK_TYPES; i++) for (j = 0; j < COEF_BANDS; j++) for (k = 0; k < PREV_COEF_CONTEXTS; k++) + // check for pt=0 and band > 1 if block type 0 and 0 if blocktype 1 - if(k==0 && j>(i==0) ) - vp8_cost_tokens2((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree,2); + if (k == 0 && j > (i == 0)) + vp8_cost_tokens2(c[i][j][k], p [i][j][k], vp8_coef_tree, 2); else - vp8_cost_tokens((int *)(c [i][j][k]), p [i][j][k], vp8_coef_tree); + vp8_cost_tokens(c[i][j][k], p [i][j][k], vp8_coef_tree); } -static int rd_iifactor [ 32 ] = { 4, 4, 3, 2, 1, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - }; +static const int rd_iifactor[32] = +{ + 4, 4, 3, 2, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 +}; /* values are now correlated to quantizer */ -static int sad_per_bit16lut[QINDEX_RANGE] = +static const int sad_per_bit16lut[QINDEX_RANGE] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -192,7 +195,7 @@ static int sad_per_bit16lut[QINDEX_RANGE] = 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14 }; -static int sad_per_bit4lut[QINDEX_RANGE] = +static const int sad_per_bit4lut[QINDEX_RANGE] = { 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, @@ -637,7 +640,7 @@ static int rd_pick_intra4x4block( BLOCK *be, BLOCKD *b, B_PREDICTION_MODE *best_mode, - unsigned int *bmode_costs, + const int *bmode_costs, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, @@ -717,7 +720,7 @@ static int rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; - unsigned int *bmode_costs; + const int *bmode_costs; vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); -- 2.7.4