From: Yunqing Wang <yunqingwang@google.com>
Date: Mon, 18 Apr 2011 19:48:34 +0000 (-0400)
Subject: Use sub-pixel search's SSE in mode selection
X-Git-Tag: v0.9.7~211^2
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b8f0b599853e02993ac6894d06511c695ca4bf3a;p=platform%2Fupstream%2Flibvpx.git

Use sub-pixel search's SSE in mode selection

Passed SSE from sub-pixel search back to pick_inter_mode
function, which is compared with the encode_breakout to
see if we could skip evaluating the remaining modes.

Change-Id: I4a86442834f0d1b880a19e21ea52d17d505f941d
---

diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index eb840d7..651890d 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -194,13 +194,13 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
 #define DIST(r,c) vfp->svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
 #define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
-#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse;}}, v=INT_MAX;)// checks if (r,c) has better score than previous best
+#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{thismse = DIST(r,c); if((v = (MVC(r,c)+thismse)) < besterr) { besterr = v; br=r; bc=c; *distortion = thismse; *sse1 = sse; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
 #define MIN(x,y) (((x)<(y))?(x):(y))
 #define MAX(x,y) (((x)>(y))?(x):(y))
 
 //#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }
 
-int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion)
+int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1)
 {
     unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
     unsigned char *z = (*(b->base_src) + b->src);
@@ -226,7 +226,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     bestmv->col <<= 3;
 
     // calculate central point error
-    besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
+    besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1);
     *distortion = besterr;
     besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
 
@@ -316,7 +316,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
 #undef CHECK_BETTER
 #undef MIN
 #undef MAX
-int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion)
+int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1)
 {
     int bestmse = INT_MAX;
     MV startmv;
@@ -345,7 +345,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
     startmv = *bestmv;
 
     // calculate central point error
-    bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
+    bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1);
     *distortion = bestmse;
     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
 
@@ -360,6 +360,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
         *bestmv = this_mv;
         bestmse = left;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
     this_mv.col += 8;
@@ -371,6 +372,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
         *bestmv = this_mv;
         bestmse = right;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
     // go up then down and check error
@@ -384,6 +386,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
         *bestmv = this_mv;
         bestmse = up;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
     this_mv.row += 8;
@@ -395,6 +398,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
         *bestmv = this_mv;
         bestmse = down;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
 
@@ -436,6 +440,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
         *bestmv = this_mv;
         bestmse = diag;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
 //  }
@@ -473,6 +478,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
         *bestmv = this_mv;
         bestmse = left;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
     this_mv.col += 4;
@@ -484,6 +490,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
         *bestmv = this_mv;
         bestmse = right;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
     // go up then down and check error
@@ -507,6 +514,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
         *bestmv = this_mv;
         bestmse = up;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
     this_mv.row += 4;
@@ -518,6 +526,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
         *bestmv = this_mv;
         bestmse = down;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
 
@@ -608,12 +617,13 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
         *bestmv = this_mv;
         bestmse = diag;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
     return bestmse;
 }
 
-int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion)
+int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1)
 {
     int bestmse = INT_MAX;
     MV startmv;
@@ -640,7 +650,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
     startmv = *bestmv;
 
     // calculate central point error
-    bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
+    bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, sse1);
     *distortion = bestmse;
     bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
 
@@ -655,6 +665,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
         *bestmv = this_mv;
         bestmse = left;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
     this_mv.col += 8;
@@ -666,6 +677,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
         *bestmv = this_mv;
         bestmse = right;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
     // go up then down and check error
@@ -679,6 +691,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
         *bestmv = this_mv;
         bestmse = up;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
     this_mv.row += 8;
@@ -690,6 +703,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
         *bestmv = this_mv;
         bestmse = down;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
     // somewhat strangely not doing all the diagonals for half pel is slower than doing them.
@@ -741,6 +755,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
         *bestmv = this_mv;
         bestmse = diag;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
     this_mv.col += 8;
@@ -752,6 +767,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
         *bestmv = this_mv;
         bestmse = diag;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
     this_mv.col = (this_mv.col - 8) | 4;
@@ -764,6 +780,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
         *bestmv = this_mv;
         bestmse = diag;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
     this_mv.col += 8;
@@ -775,6 +792,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
         *bestmv = this_mv;
         bestmse = diag;
         *distortion = thismse;
+        *sse1 = sse;
     }
 
 #endif
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index 72faf8e..b14cbcb 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -49,7 +49,7 @@ extern int vp8_hex_search
 
 typedef int (fractional_mv_step_fp)
     (MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv,
-     int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion);
+     int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse);
 extern fractional_mv_step_fp vp8_find_best_sub_pixel_step_iteratively;
 extern fractional_mv_step_fp vp8_find_best_sub_pixel_step;
 extern fractional_mv_step_fp vp8_find_best_half_pixel_step;
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 66b975b..a547f77 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -50,7 +50,7 @@ extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
 extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv);
 
 
-int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion)
+int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse)
 {
     (void) b;
     (void) d;
@@ -59,6 +59,7 @@ int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv,
     (void) vfp;
     (void) mvcost;
     (void) distortion;
+    (void) sse;
     bestmv->row <<= 3;
     bestmv->col <<= 3;
     return 0;
@@ -443,7 +444,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re
     int bestsme;
     //int all_rds[MAX_MODES];         // Experimental debug code.
     int best_mode_index = 0;
-    int sse = INT_MAX;
+    unsigned int sse = INT_MAX;
 
     MV mvp;
     int near_sadidx[8] = {0, 1, 2, 3, 4, 5, 6, 7};
@@ -791,7 +792,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re
             }
 
             if (bestsme < INT_MAX)
-                cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost, &distortion2);
+                cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost, &distortion2, &sse);
 
             mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
             mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
@@ -822,7 +823,7 @@ void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int re
             x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mode_info_context->mbmi.mv.as_int;
 
             if((this_mode != NEWMV) || !(have_subp_search))
-                distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], (unsigned int *)(&sse));
+                distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &sse);
 
             this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
 
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index c97feed..b6e61eb 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1206,13 +1206,14 @@ static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x,
                 if (bestsme < INT_MAX)
                 {
                     int distortion;
+                    unsigned int sse;
 
                     if (!cpi->common.full_pixel)
                         cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
-                                                     bsi->ref_mv, x->errorperbit / 2, v_fn_ptr, x->mvcost, &distortion);
+                                                     bsi->ref_mv, x->errorperbit / 2, v_fn_ptr, x->mvcost, &distortion, &sse);
                     else
                         vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4],
-                                                    bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost, &distortion);
+                                                    bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost, &distortion, &sse);
                 }
             } /* NEW4X4 */
 
@@ -2190,9 +2191,10 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                 x->mv_row_max = tmp_row_max;
 
                 if (bestsme < INT_MAX)
-                    {
-                        int dis; /* TODO: use dis in distortion calculation later. */
-                        cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis);
+                {
+                    int dis; /* TODO: use dis in distortion calculation later. */
+                    unsigned int sse;
+                    cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse);
                 }
 
                 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
@@ -2239,7 +2241,8 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             }
             else if (x->encode_breakout)
             {
-                int sum, sse;
+                int sum;
+                unsigned int sse;
                 int threshold = (xd->block[0].dequant[1]
                             * xd->block[0].dequant[1] >>4);
 
@@ -2248,7 +2251,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 
                 VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)
                     (x->src.y_buffer, x->src.y_stride,
-                     x->e_mbd.predictor, 16, (unsigned int *)(&sse), &sum);
+                     x->e_mbd.predictor, 16, &sse, &sum);
 
                 if (sse < threshold)
                 {
@@ -2272,8 +2275,7 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                             distortion_uv = sse2;
 
                             disable_skip = 1;
-                            this_rd = RDCOST(x->rdmult, x->rddiv, rate2,
-                                             distortion2);
+                            this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2);
 
                             break;
                         }
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index 5ede33f..b771955 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -209,10 +209,11 @@ static int vp8_temporal_filter_find_matching_mb_c
     //if (bestsme > error_thresh && bestsme < INT_MAX)
     {
         int distortion;
+        unsigned int sse;
         bestsme = cpi->find_fractional_mv_step(x, b, d,
                     &d->bmi.mv.as_mv, &best_ref_mv1,
                     x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
-                    mvcost, &distortion);
+                    mvcost, &distortion, &sse);
     }
 #endif