From 3c18a2bb2e5f6cde8189643345e33a1c27189ff8 Mon Sep 17 00:00:00 2001 From: Scott LaVarnway Date: Wed, 26 Jan 2011 16:42:56 -0500 Subject: [PATCH] Performance improvement of first pass Improved the performance of the first pass only (~6% on 720p test clip) by making use of LUT instead of the float calculations. Might try a SIMD version later. Also started to make use of int_mv instead of MV. Change-Id: If2a217c7d6b59cd2c25c5553e0ca7e0502403af8 --- vp8/encoder/firstpass.c | 99 ++++++++++++++++++++++++++++++------------------- 1 file changed, 61 insertions(+), 38 deletions(-) diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c index 06e26be..fc6f043 100644 --- a/vp8/encoder/firstpass.c +++ b/vp8/encoder/firstpass.c @@ -8,7 +8,6 @@ * be found in the AUTHORS file in the root of the source tree. */ - #include "math.h" #include "limits.h" #include "block.h" @@ -178,40 +177,68 @@ static double calculate_modified_err(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) return modified_err; } +static const double weight_table[256] = { +0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, +0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, +0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, +0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, +0.020000, 0.031250, 0.062500, 0.093750, 0.125000, 0.156250, 0.187500, 0.218750, +0.250000, 0.281250, 0.312500, 0.343750, 0.375000, 0.406250, 0.437500, 0.468750, +0.500000, 0.531250, 0.562500, 0.593750, 0.625000, 0.656250, 0.687500, 0.718750, +0.750000, 0.781250, 0.812500, 0.843750, 0.875000, 0.906250, 0.937500, 0.968750, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000 +}; + double vp8_simple_weight(YV12_BUFFER_CONFIG *source) { int i, j; unsigned char *src = source->y_buffer; - unsigned char value; double sum_weights = 0.0; - double Weight; // Loop throught the Y plane raw examining levels and creating a weight for the image - for (i = 0; i < source->y_height; i++) + i = source->y_height; + do { - for (j = 0; j < source->y_width; j++) + j = source->y_width; + do { - value = src[j]; - - if (value >= 64) - Weight = 1.0; - else if (value > 32) - Weight = (value - 32.0f) / 32.0f; - else - Weight = 0.02; - - sum_weights += Weight; - } - + sum_weights += weight_table[ *src]; + src++; + }while(--j); + src -= source->y_width; src += source->y_stride; - } + }while(--i); sum_weights /= (source->y_height * source->y_width); return sum_weights; } + // This function returns the current per frame maximum bitrate target int frame_max_bits(VP8_COMP *cpi) { @@ -440,7 +467,6 @@ void vp8_end_first_pass(VP8_COMP *cpi) vp8_output_stats(cpi, cpi->output_pkt_list, cpi->total_stats); } - void vp8_zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * recon_buffer, int * best_motion_err, int recon_yoffset ) { MACROBLOCKD * const xd = & x->e_mbd; @@ -460,7 +486,6 @@ void vp8_zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * r VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16) ( src_ptr, src_stride, ref_ptr, ref_stride, (unsigned int *)(best_motion_err)); } - void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *best_mv, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset ) { MACROBLOCKD *const xd = & x->e_mbd; @@ -548,7 +573,6 @@ void vp8_first_pass(VP8_COMP *cpi) int sum_in_vectors = 0; - MV best_ref_mv = {0, 0}; MV zero_ref_mv = {0, 0}; unsigned char *fp_motion_map_ptr = cpi->fp_motion_map; @@ -586,13 +610,20 @@ void vp8_first_pass(VP8_COMP *cpi) // for each macroblock row in image for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { - MV best_ref_mv = {0, 0}; + int_mv best_ref_mv; + + best_ref_mv.as_int = 0; // reset above block coeffs xd->up_available = (mb_row != 0); recon_yoffset = (mb_row * recon_y_stride * 16); recon_uvoffset = (mb_row * recon_uv_stride * 8); + // Set up limit values for motion vectors to prevent them extending outside the UMV borders + x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); + x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); + + // for each macroblock col in image for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { @@ -625,8 +656,6 @@ void vp8_first_pass(VP8_COMP *cpi) // Set up limit values for motion vectors to prevent them extending outside the UMV borders x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); - x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); - x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); // Other than for the first frame do a motion search if (cm->current_video_frame > 0) @@ -647,12 +676,12 @@ void vp8_first_pass(VP8_COMP *cpi) // Test last reference frame using the previous best mv as the // starting point (best reference) for the search - vp8_first_pass_motion_search(cpi, x, &best_ref_mv, + vp8_first_pass_motion_search(cpi, x, &best_ref_mv.as_mv, &d->bmi.mv.as_mv, lst_yv12, &motion_error, recon_yoffset); // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well - if ((best_ref_mv.col != 0) || (best_ref_mv.row != 0)) + if (best_ref_mv.as_int) { tmp_err = INT_MAX; vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, @@ -664,7 +693,6 @@ void vp8_first_pass(VP8_COMP *cpi) d->bmi.mv.as_mv.row = tmp_mv.row; d->bmi.mv.as_mv.col = tmp_mv.col; } - } // Experimental search in a second reference frame ((0,0) based only) @@ -693,6 +721,9 @@ void vp8_first_pass(VP8_COMP *cpi) xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset; } + /* Intra assumed best */ + best_ref_mv.as_int = 0; + if (motion_error <= this_error) { d->bmi.mv.as_mv.row <<= 3; @@ -708,13 +739,10 @@ void vp8_first_pass(VP8_COMP *cpi) sum_mvcs += d->bmi.mv.as_mv.col * d->bmi.mv.as_mv.col; intercount++; - best_ref_mv.row = d->bmi.mv.as_mv.row; - best_ref_mv.col = d->bmi.mv.as_mv.col; - //best_ref_mv.row = 0; - //best_ref_mv.col = 0; + best_ref_mv.as_int = d->bmi.mv.as_int; // Was the vector non-zero - if (d->bmi.mv.as_mv.row || d->bmi.mv.as_mv.col) + if (d->bmi.mv.as_int) { mvcount++; @@ -770,12 +798,6 @@ void vp8_first_pass(VP8_COMP *cpi) *fp_motion_map_ptr = 1; } } - else - { - // Intra was best - best_ref_mv.row = 0; - best_ref_mv.col = 0; - } } coded_error += this_error; @@ -813,6 +835,7 @@ void vp8_first_pass(VP8_COMP *cpi) fps.coded_error = coded_error >> 8; weight = vp8_simple_weight(cpi->Source); + if (weight < 0.1) weight = 0.1; -- 2.7.4