From 6cf62dcf8a560e6a5c1025c9b7bcd7ca2f923d56 Mon Sep 17 00:00:00 2001 From: sdeng Date: Fri, 31 May 2019 15:11:53 -0700 Subject: [PATCH] Update rdcost using the rd_mult in current block This CL is a preparation for implementing hierarchical SSIM rdmult scaling. There is very little impact on metrics and speed: avg_psnr ovr_psnr ssim midres 0.009 0.009 0.015 perf stat -e instructions:u ./vpxenc park_joy_480p.y4m --limit=50 with this cl: 317,722,808,461 before: 317,700,108,619 Change-Id: I7b1d1482ac69f7bc87065a93223a0274bcbe8ce3 --- vp9/encoder/vp9_encodeframe.c | 26 ++++++++++++-------------- vp9/encoder/vp9_rd.c | 8 ++++++++ vp9/encoder/vp9_rd.h | 2 ++ 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index 976d5f7..e7cc609 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -3822,10 +3822,12 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, // RD search. It may be used to prune ref frame selection of rect partitions. uint8_t ref_frames_used[4] = { 0, 0, 0, 0 }; + const int rd_div = x->rddiv; int partition_mul = x->cb_rdmult; if (oxcf->tuning == VP8_TUNE_SSIM) { set_ssim_rdmult(cpi, x, mi_row, mi_col, &partition_mul); } + vp9_rd_cost_update(partition_mul, rd_div, &best_rdc); (void)*tp_orig; @@ -3983,9 +3985,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } } if (bsize >= BLOCK_8X8) { - this_rdc.rdcost += RDCOST(partition_mul, x->rddiv, - cpi->partition_cost[pl][PARTITION_NONE], 0); this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; + vp9_rd_cost_update(partition_mul, rd_div, &this_rdc); } if (this_rdc.rdcost < best_rdc.rdcost) { @@ -4155,7 +4156,9 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, mi_col + x_idx, subsize, &this_rdc, best_rdc_split, pc_tree->split[i]); - if (this_rdc.rdcost == best_rdc_split.rdcost) { + if (this_rdc.dist == INT64_MAX || this_rdc.rate == INT_MAX || + (this_rdc.dist == best_rdc_split.dist && + this_rdc.rate == best_rdc_split.rate)) { sum_rdc.rdcost = INT64_MAX; break; } else { @@ -4168,15 +4171,14 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; - sum_rdc.rdcost += this_rdc.rdcost; + vp9_rd_cost_update(partition_mul, rd_div, &sum_rdc); } } } if (((sum_rdc.rdcost < best_rdc.rdcost) || must_split) && i == 4) { - sum_rdc.rdcost += RDCOST(partition_mul, x->rddiv, - cpi->partition_cost[pl][PARTITION_SPLIT], 0); sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; + vp9_rd_cost_update(partition_mul, rd_div, &sum_rdc); if ((sum_rdc.rdcost < best_rdc.rdcost) || (must_split && (sum_rdc.dist < best_rdc.dist))) { @@ -4234,8 +4236,6 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (partition_horz_allowed && (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) { const int part_mode_rate = cpi->partition_cost[pl][PARTITION_HORZ]; - const int64_t part_mode_rdcost = - RDCOST(partition_mul, x->rddiv, part_mode_rate, 0); subsize = get_subsize(bsize, PARTITION_HORZ); load_pred_mv(x, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && @@ -4245,8 +4245,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, &pc_tree->horizontal[0], best_rdc.rate - part_mode_rate, best_rdc.dist); if (sum_rdc.rdcost < INT64_MAX) { - sum_rdc.rdcost += part_mode_rdcost; sum_rdc.rate += part_mode_rate; + vp9_rd_cost_update(partition_mul, rd_div, &sum_rdc); } if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows && @@ -4266,7 +4266,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } else { sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; - sum_rdc.rdcost += this_rdc.rdcost; + vp9_rd_cost_update(partition_mul, rd_div, &sum_rdc); } } @@ -4286,8 +4286,6 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, if (partition_vert_allowed && (do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) { const int part_mode_rate = cpi->partition_cost[pl][PARTITION_VERT]; - const int64_t part_mode_rdcost = - RDCOST(partition_mul, x->rddiv, part_mode_rate, 0); subsize = get_subsize(bsize, PARTITION_VERT); load_pred_mv(x, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && @@ -4297,8 +4295,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, &pc_tree->vertical[0], best_rdc.rate - part_mode_rate, best_rdc.dist); if (sum_rdc.rdcost < INT64_MAX) { - sum_rdc.rdcost += part_mode_rdcost; sum_rdc.rate += part_mode_rate; + vp9_rd_cost_update(partition_mul, rd_div, &sum_rdc); } if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols && @@ -4318,7 +4316,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td, } else { sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; - sum_rdc.rdcost += this_rdc.rdcost; + vp9_rd_cost_update(partition_mul, rd_div, &sum_rdc); } } diff --git a/vp9/encoder/vp9_rd.c b/vp9/encoder/vp9_rd.c index 3e38799..2a02c10 100644 --- a/vp9/encoder/vp9_rd.c +++ b/vp9/encoder/vp9_rd.c @@ -69,6 +69,14 @@ int64_t vp9_calculate_rd_cost(int mult, int div, int rate, int64_t dist) { } return -RDCOST(mult, div, -rate, -dist); } +void vp9_rd_cost_update(int mult, int div, RD_COST *rd_cost) { + if (rd_cost->rate < INT_MAX && rd_cost->dist < INT64_MAX) { + rd_cost->rdcost = + vp9_calculate_rd_cost(mult, div, rd_cost->rate, rd_cost->dist); + } else { + vp9_rd_cost_reset(rd_cost); + } +} // The baseline rd thresholds for breaking out of the rd loop for // certain modes are assumed to be based on 8x8 blocks. diff --git a/vp9/encoder/vp9_rd.h b/vp9/encoder/vp9_rd.h index b778f6a..df6ea90 100644 --- a/vp9/encoder/vp9_rd.h +++ b/vp9/encoder/vp9_rd.h @@ -138,6 +138,8 @@ void vp9_rd_cost_reset(RD_COST *rd_cost); void vp9_rd_cost_init(RD_COST *rd_cost); // It supports negative rate and dist, which is different from RDCOST(). int64_t vp9_calculate_rd_cost(int mult, int div, int rate, int64_t dist); +// Update the cost value based on its rate and distortion. +void vp9_rd_cost_update(int mult, int div, RD_COST *rd_cost); struct TileInfo; struct TileDataEnc; -- 2.7.4