From: Yunqing Wang Date: Mon, 18 Apr 2011 19:48:34 +0000 (-0400) Subject: Use sub-pixel search's SSE in mode selection X-Git-Tag: v0.9.7~211^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b8f0b599853e02993ac6894d06511c695ca4bf3a;p=platform%2Fupstream%2Flibvpx.git Use sub-pixel search's SSE in mode selection Passed SSE from sub-pixel search back to pick_inter_mode function, which is compared with the encode_breakout to see if we could skip evaluating the remaining modes. Change-Id: I4a86442834f0d1b880a19e21ea52d17d505f941d --- diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c index eb840d7..651890d 100644 --- a/vp8/encoder/mcomp.c +++ b/vp8/encoder/mcomp.c @@ -194,13 +194,13 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) #define DIST(r,c) vfp->svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function. #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e; #define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost -#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse;}}, v=INT_MAX;)// checks if (r,c) has better score than previous best +#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best #define MIN(x,y) (((x)<(y))?(x):(y)) #define MAX(x,y) (((x)>(y))?(x):(y)) //#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; } -int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion) +int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1) { unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col; unsigned char *z = (*(b->base_src) + b->src); @@ -226,7 +226,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, bestmv->col <<= 3; // calculate central point error - besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse); + besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); @@ -316,7 +316,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, #undef CHECK_BETTER #undef MIN #undef MAX -int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion) +int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1) { int bestmse = INT_MAX; MV startmv; @@ -345,7 +345,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, startmv = *bestmv; // calculate central point error - bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse); + bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1); *distortion = bestmse; bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); @@ -360,6 +360,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, *bestmv = this_mv; bestmse = left; *distortion = thismse; + *sse1 = sse; } this_mv.col += 8; @@ -371,6 +372,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, *bestmv = this_mv; bestmse = right; *distortion = thismse; + *sse1 = sse; } // go up then down and check error @@ -384,6 +386,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, *bestmv = this_mv; bestmse = up; *distortion = thismse; + *sse1 = sse; } this_mv.row += 8; @@ -395,6 +398,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, *bestmv = this_mv; bestmse = down; *distortion = thismse; + *sse1 = sse; } @@ -436,6 +440,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, *bestmv = this_mv; bestmse = diag; *distortion = thismse; + *sse1 = sse; } // } @@ -473,6 +478,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, *bestmv = this_mv; bestmse = left; *distortion = thismse; + *sse1 = sse; } this_mv.col += 4; @@ -484,6 +490,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, *bestmv = this_mv; bestmse = right; *distortion = thismse; + *sse1 = sse; } // go up then down and check error @@ -507,6 +514,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, *bestmv = this_mv; bestmse = up; *distortion = thismse; + *sse1 = sse; } this_mv.row += 4; @@ -518,6 +526,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, *bestmv = this_mv; bestmse = down; *distortion = thismse; + *sse1 = sse; } @@ -608,12 +617,13 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, *bestmv = this_mv; bestmse = diag; *distortion = thismse; + *sse1 = sse; } return bestmse; } -int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion) +int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1) { int bestmse = INT_MAX; MV startmv; @@ -640,7 +650,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm startmv = *bestmv; // calculate central point error - bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse); + bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1); *distortion = bestmse; bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); @@ -655,6 +665,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm *bestmv = this_mv; bestmse = left; *distortion = thismse; + *sse1 = sse; } this_mv.col += 8; @@ -666,6 +677,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm *bestmv = this_mv; bestmse = right; *distortion = thismse; + *sse1 = sse; } // go up then down and check error @@ -679,6 +691,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm *bestmv = this_mv; bestmse = up; *distortion = thismse; + *sse1 = sse; } this_mv.row += 8; @@ -690,6 +703,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm *bestmv = this_mv; bestmse = down; *distortion = thismse; + *sse1 = sse; } // somewhat strangely not doing all the diagonals for half pel is slower than doing them. @@ -741,6 +755,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm *bestmv = this_mv; bestmse = diag; *distortion = thismse; + *sse1 = sse; } this_mv.col += 8; @@ -752,6 +767,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm *bestmv = this_mv; bestmse = diag; *distortion = thismse; + *sse1 = sse; } this_mv.col = (this_mv.col - 8) | 4; @@ -764,6 +780,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm *bestmv = this_mv; bestmse = diag; *distortion = thismse; + *sse1 = sse; } this_mv.col += 8; @@ -775,6 +792,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm *bestmv = this_mv; bestmse = diag; *distortion = thismse; + *sse1 = sse; } #endif diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h index 72faf8e..b14cbcb 100644 --- a/vp8/encoder/mcomp.h +++ b/vp8/encoder/mcomp.h @@ -49,7 +49,7 @@ extern int vp8_hex_search typedef int (fractional_mv_step_fp) (MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, - int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion); + int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse); extern fractional_mv_step_fp vp8_find_best_sub_pixel_step_iteratively; extern fractional_mv_step_fp vp8_find_best_sub_pixel_step; extern fractional_mv_step_fp vp8_find_best_half_pixel_step; diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c index 66b975b..a547f77 100644 --- a/vp8/encoder/pickinter.c +++ b/vp8/encoder/pickinter.c @@ -50,7 +50,7 @@ extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]); extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv); -int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion) +int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse) { (void) b; (void) d; @@ -59,6 +59,7 @@ int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, (void) vfp; (void) mvcost; (void) distortion; + (void) sse; bestmv->row <<= 3; bestmv->col <<= 3; return 0; @@ -443,7 +444,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re int bestsme; //int all_rds[MAX_MODES]; // Experimental debug code. int best_mode_index = 0; - int sse = INT_MAX; + unsigned int sse = INT_MAX; MV mvp; int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7}; @@ -791,7 +792,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re } if (bestsme < INT_MAX) - cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost, &distortion2); + cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost, &distortion2, &sse); mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; mode_mv[NEWMV].col = d->bmi.mv.as_mv.col; @@ -822,7 +823,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mode_info_context->mbmi.mv.as_int; if((this_mode != NEWMV) || !(have_subp_search)) - distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], (unsigned int *)(&sse)); + distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &sse); this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c index c97feed..b6e61eb 100644 --- a/vp8/encoder/rdopt.c +++ b/vp8/encoder/rdopt.c @@ -1206,13 +1206,14 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, if (bestsme < INT_MAX) { int distortion; + unsigned int sse; if (!cpi->common.full_pixel) cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], - bsi->ref_mv, x->errorperbit / 2, v_fn_ptr, x->mvcost, &distortion); + bsi->ref_mv, x->errorperbit / 2, v_fn_ptr, x->mvcost, &distortion, &sse); else vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], - bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost, &distortion); + bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost, &distortion, &sse); } } /* NEW4X4 */ @@ -2190,9 +2191,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int x->mv_row_max = tmp_row_max; if (bestsme < INT_MAX) - { - int dis; /* TODO: use dis in distortion calculation later. */ - cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis); + { + int dis; /* TODO: use dis in distortion calculation later. */ + unsigned int sse; + cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse); } mode_mv[NEWMV].row = d->bmi.mv.as_mv.row; @@ -2239,7 +2241,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int } else if (x->encode_breakout) { - int sum, sse; + int sum; + unsigned int sse; int threshold = (xd->block[0].dequant[1] * xd->block[0].dequant[1] >>4); @@ -2248,7 +2251,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var) (x->src.y_buffer, x->src.y_stride, - x->e_mbd.predictor, 16, (unsigned int *)(&sse), &sum); + x->e_mbd.predictor, 16, &sse, &sum); if (sse < threshold) { @@ -2272,8 +2275,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int distortion_uv = sse2; disable_skip = 1; - this_rd = RDCOST(x->rdmult, x->rddiv, rate2, - distortion2); + this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); break; } diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c index 5ede33f..b771955 100644 --- a/vp8/encoder/temporal_filter.c +++ b/vp8/encoder/temporal_filter.c @@ -209,10 +209,11 @@ static int vp8_temporal_filter_find_matching_mb_c //if (bestsme > error_thresh && bestsme < INT_MAX) { int distortion; + unsigned int sse; bestsme = cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], - mvcost, &distortion); + mvcost, &distortion, &sse); } #endif