From d6da7b8ea1092d3c99591b2087811ad22d667d1b Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Thu, 14 Oct 2010 11:06:37 -0400 Subject: [PATCH] Improve bounds checking in vp8_diamond_search_sadx4() In order to know if all 4/8 neighbor points are within the bounds, 4 bounds checking are enough instead of checking 4 bounds for each points (16/32 checkings). This improvement reduces cost of vp8_diamond_search_sadx4() by 30%, and gives encoder a 1.5% performance gain (test options: 1 pass, good, speed=4). Change-Id: Ie8da29d18a6ecfc9829e74ac02f6fa70e042331a --- vp8/encoder/mcomp.c | 81 +++++++++++++++++++++++------------------------------ 1 file changed, 35 insertions(+), 46 deletions(-) diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index b89354e..4d60b92 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -1035,84 +1035,73 @@ int vp8_diamond_search_sadx4 for (step = 0; step < tot_steps ; step++) { - int check_row_min, check_col_min, check_row_max, check_col_max; + int all_in = 1, t; - check_row_min = x->mv_row_min - best_mv->row; - check_row_max = x->mv_row_max - best_mv->row; - check_col_min = x->mv_col_min - best_mv->col; - check_col_max = x->mv_col_max - best_mv->col; + // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of + // checking 4 bounds for each points. + all_in &= ((best_mv->row + ss[i].mv.row)> x->mv_row_min); + all_in &= ((best_mv->row + ss[i+1].mv.row) < x->mv_row_max); + all_in &= ((best_mv->col + ss[i+2].mv.col) > x->mv_col_min); + all_in &= ((best_mv->col + ss[i+3].mv.col) < x->mv_col_max); - for (j = 0 ; j < x->searches_per_step ; j += 4) + if (all_in) { - unsigned char *block_offset[4]; - unsigned int valid_block[4]; - int all_in = 1, t; + unsigned int sad_array[4]; - for (t = 0; t < 4; t++) + for (j = 0 ; j < x->searches_per_step ; j += 4) { - valid_block [t] = (ss[t+i].mv.col > check_col_min); - valid_block [t] &= (ss[t+i].mv.col < check_col_max); - valid_block [t] &= (ss[t+i].mv.row > check_row_min); - valid_block [t] &= (ss[t+i].mv.row < check_row_max); + unsigned char *block_offset[4]; - all_in &= valid_block[t]; - block_offset[t] = ss[i+t].offset + best_address; - } - - if (all_in) - { - unsigned int sad_array[4]; + for (t = 0; t < 4; t++) + block_offset[t] = ss[i+t].offset + best_address; fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); for (t = 0; t < 4; t++, i++) { - thissad = sad_array[t]; - - if (thissad < bestsad) + if (sad_array[t] < bestsad) { this_mv.row = (best_mv->row + ss[i].mv.row) << 3; this_mv.col = (best_mv->col + ss[i].mv.col) << 3; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); + sad_array[t] += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); - if (thissad < bestsad) + if (sad_array[t] < bestsad) { - bestsad = thissad; + bestsad = sad_array[t]; best_site = i; } } } } - else + } + else + { + for (j = 0 ; j < x->searches_per_step ; j++) { - int t; + // Trap illegal vectors + this_row_offset = best_mv->row + ss[i].mv.row; + this_col_offset = best_mv->col + ss[i].mv.col; - for (t = 0; t < 4; i++, t++) + if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && + (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { - // Trap illegal vectors - if (valid_block[t]) + check_here = ss[i].offset + best_address; + thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); + if (thissad < bestsad) { - check_here = block_offset[t]; - thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad); + this_mv.row = this_row_offset << 3; + this_mv.col = this_col_offset << 3; + thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); if (thissad < bestsad) { - this_row_offset = best_mv->row + ss[i].mv.row; - this_col_offset = best_mv->col + ss[i].mv.col; - - this_mv.row = this_row_offset << 3; - this_mv.col = this_col_offset << 3; - thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); - - if (thissad < bestsad) - { - bestsad = thissad; - best_site = i; - } + bestsad = thissad; + best_site = i; } } } + i++; } } -- 2.7.4