Use diamond search to replace full search in full-pixel refining search
authorYunqing Wang <yunqingwang@google.com>
Fri, 6 May 2011 16:51:31 +0000 (12:51 -0400)
committerYunqing Wang <yunqingwang@google.com>
Mon, 9 May 2011 18:07:06 +0000 (14:07 -0400)
In NEWMV mode, currently, full search is used as the refining search
after n-step search. By replacing it with an iterative diamond search
of radius 1 largely reduced the computation complexity, but still
maintained the same encoding quality since the refining search is
done for every macroblock instead of only a small precentage of
macroblocks while using full search.

Tests on the test set showed a 3.4% encoding speed increase with none
psnr & ssim loss.

Change-Id: Ife907d7eb9544d15c34f17dc6e4cfd97cb743d41

vp8/encoder/generic/csystemdependent.c
vp8/encoder/mcomp.c
vp8/encoder/mcomp.h
vp8/encoder/onyx_if.c
vp8/encoder/onyx_int.h
vp8/encoder/rdopt.c
vp8/encoder/x86/mcomp_x86.h
vp8/encoder/x86/x86_csystemdependent.c

index d48c95b..58f810d 100644 (file)
@@ -90,6 +90,7 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
     cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
     cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;
     cpi->rtcd.search.full_search             = vp8_full_search_sad;
+    cpi->rtcd.search.refining_search         = vp8_refining_search_sad;
     cpi->rtcd.search.diamond_search          = vp8_diamond_search_sad;
 #if !(CONFIG_REALTIME_ONLY)
     cpi->rtcd.temporal.apply                 = vp8_temporal_filter_apply_c;
index 9d447b2..90e3983 100644 (file)
@@ -1621,6 +1621,187 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
         return INT_MAX;
 }
 
+int vp8_refining_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int search_range, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], MV *center_mv)
+{
+    MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
+    MV tempmv;
+    int i, j;
+    short this_row_offset, this_col_offset;
+
+    int what_stride = b->src_stride;
+    int in_what_stride = d->pre_stride;
+    unsigned char *what = (*(b->base_src) + b->src);
+    unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre + (ref_mv->row * (d->pre_stride)) + ref_mv->col);
+    unsigned char *check_here;
+    unsigned int thissad;
+    MV this_mv;
+    unsigned int bestsad = INT_MAX;
+
+    int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
+    MV fcenter_mv;
+
+    fcenter_mv.row = center_mv->row >> 3;
+    fcenter_mv.col = center_mv->col >> 3;
+
+    bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
+
+    for (i=0; i<search_range; i++)
+    {
+        tempmv.row = ref_mv->row;
+        tempmv.col = ref_mv->col;
+
+        for (j = 0 ; j < 4 ; j++)
+        {
+            this_row_offset = ref_mv->row + neighbors[j].row;
+            this_col_offset = ref_mv->col + neighbors[j].col;
+
+            if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
+            (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
+            {
+                check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
+                thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
+
+                if (thissad < bestsad)
+                {
+                    this_mv.row = this_row_offset;
+                    this_mv.col = this_col_offset;
+                    thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
+
+                    if (thissad < bestsad)
+                    {
+                        bestsad = thissad;
+                        ref_mv->row = this_row_offset;
+                        ref_mv->col = this_col_offset;
+                        best_address = check_here;
+                    }
+                }
+            }
+        }
+
+        if (tempmv.row == ref_mv->row && tempmv.col == ref_mv->col )
+            break;
+    }
+
+    this_mv.row = ref_mv->row << 3;
+    this_mv.col = ref_mv->col << 3;
+
+    if (bestsad < INT_MAX)
+        return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
++ mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
+    else
+        return INT_MAX;
+}
+
+int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int search_range, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], MV *center_mv)
+{
+    MV neighbors[4] = {{-1, 0}, {0, -1}, {0, 1}, {1, 0}};
+    MV tempmv;
+    int i, j;
+    short this_row_offset, this_col_offset;
+
+    int what_stride = b->src_stride;
+    int in_what_stride = d->pre_stride;
+    unsigned char *what = (*(b->base_src) + b->src);
+    unsigned char *best_address = (unsigned char *)(*(d->base_pre) + d->pre + (ref_mv->row * (d->pre_stride)) + ref_mv->col);
+    unsigned char *check_here;
+    unsigned int thissad;
+    MV this_mv;
+    unsigned int bestsad = INT_MAX;
+
+    int *mvsadcost[2] = {x->mvsadcost[0], x->mvsadcost[1]};
+    MV fcenter_mv;
+
+    fcenter_mv.row = center_mv->row >> 3;
+    fcenter_mv.col = center_mv->col >> 3;
+
+    bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride, 0x7fffffff) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit);
+
+    for (i=0; i<search_range; i++)
+    {
+        int all_in = 1;
+
+        tempmv.row = ref_mv->row;
+        tempmv.col = ref_mv->col;
+
+        all_in &= ((ref_mv->row - 1) > x->mv_row_min);
+        all_in &= ((ref_mv->row + 1) < x->mv_row_max);
+        all_in &= ((ref_mv->col - 1) > x->mv_col_min);
+        all_in &= ((ref_mv->col + 1) < x->mv_col_max);
+
+        if(all_in)
+        {
+            unsigned int sad_array[4];
+            unsigned char *block_offset[4];
+            block_offset[0] = best_address - in_what_stride;
+            block_offset[1] = best_address - 1;
+            block_offset[2] = best_address + 1;
+            block_offset[3] = best_address + in_what_stride;
+
+            fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
+
+            for (j = 0; j < 4; j++)
+            {
+                if (sad_array[j] < bestsad)
+                {
+                    this_mv.row = ref_mv->row + neighbors[j].row;
+                    this_mv.col = ref_mv->col + neighbors[j].col;
+                    sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
+
+                    if (sad_array[j] < bestsad)
+                    {
+                        bestsad = sad_array[j];
+                        ref_mv->row = this_mv.row;
+                        ref_mv->col = this_mv.col;
+                        best_address = block_offset[j];
+                    }
+                }
+            }
+        }
+        else
+        {
+            for (j = 0 ; j < 4 ; j++)
+            {
+                this_row_offset = ref_mv->row + neighbors[j].row;
+                this_col_offset = ref_mv->col + neighbors[j].col;
+
+                if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
+                (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
+                {
+                    check_here = (neighbors[j].row)*in_what_stride + neighbors[j].col + best_address;
+                    thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
+
+                    if (thissad < bestsad)
+                    {
+                        this_mv.row = this_row_offset;
+                        this_mv.col = this_col_offset;
+                        thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit);
+
+                        if (thissad < bestsad)
+                        {
+                            bestsad = thissad;
+                            ref_mv->row = this_row_offset;
+                            ref_mv->col = this_col_offset;
+                            best_address = check_here;
+                        }
+                    }
+                }
+            }
+        }
+
+        if (tempmv.row == ref_mv->row && tempmv.col == ref_mv->col )
+              break;
+    }
+
+    this_mv.row = ref_mv->row << 3;
+    this_mv.col = ref_mv->col << 3;
+
+    if (bestsad < INT_MAX)
+        return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
++ mv_err_cost(&this_mv, center_mv, mvcost, error_per_bit);
+    else
+        return INT_MAX;
+}
+
 #ifdef ENTROPY_STATS
 void print_mode_context(void)
 {
index b14cbcb..d655b83 100644 (file)
@@ -69,6 +69,20 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step;
      MV *center_mv \
     )
 
+#define prototype_refining_search_sad(sym)\
+    int (sym)\
+    (\
+     MACROBLOCK *x, \
+     BLOCK *b, \
+     BLOCKD *d, \
+     MV *ref_mv, \
+     int error_per_bit, \
+     int distance, \
+     vp8_variance_fn_ptr_t *fn_ptr, \
+     int *mvcost[2], \
+     MV *center_mv \
+    )
+
 #define prototype_diamond_search_sad(sym)\
     int (sym)\
     (\
@@ -94,6 +108,10 @@ extern prototype_full_search_sad(vp8_full_search_sad);
 extern prototype_full_search_sad(vp8_full_search_sadx3);
 extern prototype_full_search_sad(vp8_full_search_sadx8);
 
+typedef prototype_refining_search_sad(*vp8_refining_search_fn_t);
+extern prototype_refining_search_sad(vp8_refining_search_sad);
+extern prototype_refining_search_sad(vp8_refining_search_sadx4);
+
 typedef prototype_diamond_search_sad(*vp8_diamond_search_fn_t);
 extern prototype_diamond_search_sad(vp8_diamond_search_sad);
 extern prototype_diamond_search_sad(vp8_diamond_search_sadx4);
@@ -103,6 +121,11 @@ extern prototype_diamond_search_sad(vp8_diamond_search_sadx4);
 #endif
 extern prototype_full_search_sad(vp8_search_full_search);
 
+#ifndef vp8_search_refining_search
+#define vp8_search_refining_search vp8_refining_search_sad
+#endif
+extern prototype_refining_search_sad(vp8_search_refining_search);
+
 #ifndef vp8_search_diamond_search
 #define vp8_search_diamond_search vp8_diamond_search_sad
 #endif
@@ -111,6 +134,7 @@ extern prototype_diamond_search_sad(vp8_search_diamond_search);
 typedef struct
 {
     prototype_full_search_sad(*full_search);
+    prototype_refining_search_sad(*refining_search);
     prototype_diamond_search_sad(*diamond_search);
 } vp8_search_rtcd_vtable_t;
 
index 1738e56..fe5ec46 100644 (file)
@@ -570,9 +570,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
     sf->max_step_search_steps = MAX_MVSEARCH_STEPS;
     sf->improved_mv_pred = 1;
 
-    cpi->do_full[0] = 0;
-    cpi->do_full[1] = 0;
-
     // default thresholds to 0
     for (i = 0; i < MAX_MODES; i++)
         sf->thresh_mult[i] = 0;
@@ -1976,9 +1973,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
         cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate;
     }
 
-    cpi->check_freq[0] = 15;
-    cpi->check_freq[1] = 15;
-
 #ifdef OUTPUT_YUV_SRC
     yuv_file = fopen("bd.yuv", "ab");
 #endif
@@ -2084,6 +2078,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
 
     cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search);
     cpi->diamond_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, diamond_search);
+    cpi->refining_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, refining_search);
 
     cpi->ready_for_new_frame = 1;
 
index e2e6b36..607e57f 100644 (file)
@@ -311,9 +311,6 @@ typedef struct
     unsigned int mode_chosen_counts[MAX_MODES];
     unsigned int mbs_tested_so_far;
 
-    unsigned int check_freq[2];
-    unsigned int do_full[2];
-
     int rd_thresh_mult[MAX_MODES];
     int rd_baseline_thresh[MAX_MODES];
     int rd_threshes[MAX_MODES];
@@ -599,6 +596,7 @@ typedef struct
 
     fractional_mv_step_fp *find_fractional_mv_step;
     vp8_full_search_fn_t full_search_sad;
+    vp8_refining_search_fn_t refining_search_sad;
     vp8_diamond_search_fn_t diamond_search_sad;
     vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SEGMENTS];
     unsigned int time_receive_data;
index 2789cff..a49f76c 100644 (file)
@@ -2006,168 +2006,138 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             break;
 
         case NEWMV:
+        {
+            int thissme;
+            int bestsme = INT_MAX;
+            int step_param = cpi->sf.first_step;
+            int further_steps;
+            int n;
+            int do_refine=1;   /* If last step (1-away) of n-step search doesn't pick the center point as the best match,
+                                  we will do a final 1-away diamond refining search  */
 
-            // Decrement full search counter
-            if (cpi->check_freq[lf_or_gf] > 0)
-                cpi->check_freq[lf_or_gf] --;
+            int sadpb = x->sadperbit16;
 
-            {
-                int thissme;
-                int bestsme = INT_MAX;
-                int step_param = cpi->sf.first_step;
-                int search_range;
-                int further_steps;
-                int n;
+            int col_min = (best_ref_mv.col - MAX_FULL_PEL_VAL) >>3;
+            int col_max = (best_ref_mv.col + MAX_FULL_PEL_VAL) >>3;
+            int row_min = (best_ref_mv.row - MAX_FULL_PEL_VAL) >>3;
+            int row_max = (best_ref_mv.row + MAX_FULL_PEL_VAL) >>3;
 
-                int col_min = (best_ref_mv.col - MAX_FULL_PEL_VAL) >>3;
-                int col_max = (best_ref_mv.col + MAX_FULL_PEL_VAL) >>3;
-                int row_min = (best_ref_mv.row - MAX_FULL_PEL_VAL) >>3;
-                int row_max = (best_ref_mv.row + MAX_FULL_PEL_VAL) >>3;
-
-                int tmp_col_min = x->mv_col_min;
-                int tmp_col_max = x->mv_col_max;
-                int tmp_row_min = x->mv_row_min;
-                int tmp_row_max = x->mv_row_max;
-
-                // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search.
-                if (x->mv_col_min < col_min )
-                    x->mv_col_min = col_min;
-                if (x->mv_col_max > col_max )
-                    x->mv_col_max = col_max;
-                if (x->mv_row_min < row_min )
-                    x->mv_row_min = row_min;
-                if (x->mv_row_max > row_max )
-                    x->mv_row_max = row_max;
-
-                //adjust search range according to sr from mv prediction
-                if(sr > step_param)
-                    step_param = sr;
-
-                // Work out how long a search we should do
-                search_range = MAXF(abs(best_ref_mv.col), abs(best_ref_mv.row)) >> 3;
-
-                if (search_range >= x->vector_range)
-                    x->vector_range = search_range;
-                else if (x->vector_range > cpi->sf.min_fs_radius)
-                    x->vector_range--;
-
-                // Initial step/diamond search
-                {
-                    int sadpb = x->sadperbit16;
-
-                    if (cpi->sf.search_method == HEX)
-                    {
-                        bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv);
-                        mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
-                        mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
-                    }
-                    else
-                    {
-                        bestsme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); //sadpb < 9
-                        mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
-                        mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
+            int tmp_col_min = x->mv_col_min;
+            int tmp_col_max = x->mv_col_max;
+            int tmp_row_min = x->mv_row_min;
+            int tmp_row_max = x->mv_row_max;
 
-                        // Further step/diamond searches as necessary
-                        n = 0;
-                        further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
+            // Get intersection of UMV window and valid MV window to reduce # of checks in diamond search.
+            if (x->mv_col_min < col_min )
+                x->mv_col_min = col_min;
+            if (x->mv_col_max > col_max )
+                x->mv_col_max = col_max;
+            if (x->mv_row_min < row_min )
+                x->mv_row_min = row_min;
+            if (x->mv_row_max > row_max )
+                x->mv_row_max = row_max;
 
-                        n = num00;
-                        num00 = 0;
+            //adjust search range according to sr from mv prediction
+            if(sr > step_param)
+                step_param = sr;
 
-                        while (n < further_steps)
-                        {
-                            n++;
+            // Initial step/diamond search
+            if (cpi->sf.search_method == HEX)
+            {
+                bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost, &best_ref_mv);
+                mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
+                mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
+            }
+            else
+            {
+                bestsme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); //sadpb < 9
+                mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
+                mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
 
-                            if (num00)
-                                num00--;
-                            else
-                            {
-                                thissme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); //sadpb = 9
+                // Further step/diamond searches as necessary
+                n = 0;
+                further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
 
-                                if (thissme < bestsme)
-                                {
-                                    bestsme = thissme;
-                                    mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
-                                    mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
-                                }
-                                else
-                                {
-                                    d->bmi.mv.as_mv.row = mode_mv[NEWMV].row;
-                                    d->bmi.mv.as_mv.col = mode_mv[NEWMV].col;
-                                }
-                            }
-                        }
-                    }
+                n = num00;
+                num00 = 0;
 
-                }
+                /* If there won't be more n-step search, check to see if refining search is needed. */
+                if (n > further_steps)
+                    do_refine = 0;
 
-                // Should we do a full search
-                if (!cpi->check_freq[lf_or_gf] || cpi->do_full[lf_or_gf])
+                while (n < further_steps)
                 {
-                    int thissme;
-                    int full_flag_thresh = 0;
+                    n++;
 
-                    // Update x->vector_range based on best vector found in step search
-                    search_range = MAXF(abs((mvp.row>>3) - d->bmi.mv.as_mv.row), abs((mvp.col>>3) - d->bmi.mv.as_mv.col));
-                    //search_range *= 1.4;  //didn't improve PSNR
-
-                    if (search_range > x->vector_range)
-                        x->vector_range = search_range;
+                    if (num00)
+                        num00--;
                     else
-                        search_range = x->vector_range;
-
-                    // Apply limits
-                    search_range = (search_range > cpi->sf.max_fs_radius) ? cpi->sf.max_fs_radius : search_range;
-
-                    //add this to reduce full search range.
-                    if(sr<=3 && search_range > 8) search_range = 8;
-
                     {
-                        int sadpb = x->sadperbit16 >> 2;
-                        /* use diamond search result as full search staring point */
-                        thissme = cpi->full_search_sad(x, b, d, &d->bmi.mv.as_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
-                    }
+                        thissme = cpi->diamond_search_sad(x, b, d, &mvp, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); //sadpb = 9
 
-                    // Barrier threshold to initiating full search
-                    // full_flag_thresh = 10 + (thissme >> 7);
-                    if ((thissme + full_flag_thresh) < bestsme)
-                    {
-                        cpi->do_full[lf_or_gf] ++;
-                        bestsme = thissme;
-                    }
-                    else if (thissme < bestsme)
-                        bestsme = thissme;
-                    else
-                    {
-                        cpi->do_full[lf_or_gf] = cpi->do_full[lf_or_gf] >> 1;
-                        cpi->check_freq[lf_or_gf] = cpi->sf.full_freq[lf_or_gf];
+                        /* check to see if refining search is needed. */
+                        if (num00 > (further_steps-n))
+                            do_refine = 0;
 
-                        // The full search result is actually worse so re-instate the previous best vector
-                        d->bmi.mv.as_mv.row = mode_mv[NEWMV].row;
-                        d->bmi.mv.as_mv.col = mode_mv[NEWMV].col;
+                        if (thissme < bestsme)
+                        {
+                            bestsme = thissme;
+                            mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
+                            mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
+                        }
+                        else
+                        {
+                            d->bmi.mv.as_mv.row = mode_mv[NEWMV].row;
+                            d->bmi.mv.as_mv.col = mode_mv[NEWMV].col;
+                        }
                     }
                 }
+            }
+
+            /* final 1-away diamond refining search */
+            if (do_refine == 1)
+            {
+                int search_range;
 
-                x->mv_col_min = tmp_col_min;
-                x->mv_col_max = tmp_col_max;
-                x->mv_row_min = tmp_row_min;
-                x->mv_row_max = tmp_row_max;
+                //It seems not a good way to set search_range. Need further investigation.
+                //search_range = MAXF(abs((mvp.row>>3) - d->bmi.mv.as_mv.row), abs((mvp.col>>3) - d->bmi.mv.as_mv.col));
+                search_range = 8;
 
-                if (bestsme < INT_MAX)
+                //thissme = cpi->full_search_sad(x, b, d, &d->bmi.mv.as_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
+                thissme = cpi->refining_search_sad(x, b, d, &d->bmi.mv.as_mv, sadpb/4, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv);
+
+                if (thissme < bestsme)
                 {
-                    int dis; /* TODO: use dis in distortion calculation later. */
-                    unsigned int sse;
-                    cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse);
+                    bestsme = thissme;
+                    mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
+                    mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
                 }
+                else
+                {
+                    d->bmi.mv.as_mv.row = mode_mv[NEWMV].row;
+                    d->bmi.mv.as_mv.col = mode_mv[NEWMV].col;
+                }
+            }
 
-                mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
-                mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
-
-                // Add the new motion vector cost to our rolling cost variable
-                rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
+            x->mv_col_min = tmp_col_min;
+            x->mv_col_max = tmp_col_max;
+            x->mv_row_min = tmp_row_min;
+            x->mv_row_max = tmp_row_max;
 
+            if (bestsme < INT_MAX)
+            {
+                int dis; /* TODO: use dis in distortion calculation later. */
+                unsigned int sse;
+                cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse);
             }
 
+            mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
+            mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
+
+            // Add the new motion vector cost to our rolling cost variable
+            rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96);
+        }
+
         case NEARESTMV:
         case NEARMV:
 
@@ -2403,17 +2373,6 @@ void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 
     }
 
-    // If we have chosen new mv or split then decay the full search check count more quickly.
-    if ((vp8_mode_order[best_mode_index] == NEWMV) || (vp8_mode_order[best_mode_index] == SPLITMV))
-    {
-        int lf_or_gf = (vp8_ref_frame_order[best_mode_index] == LAST_FRAME) ? 0 : 1;
-
-        if (cpi->check_freq[lf_or_gf] && !cpi->do_full[lf_or_gf])
-        {
-            cpi->check_freq[lf_or_gf] --;
-        }
-    }
-
     // Keep a record of best mode index that we chose
     cpi->last_best_mode_index = best_mode_index;
 
index 3b7b29c..efe7241 100644 (file)
@@ -18,6 +18,9 @@
 #undef  vp8_search_full_search
 #define vp8_search_full_search vp8_full_search_sadx3
 
+#undef  vp8_search_refining_search
+#define vp8_search_refining_search vp8_refining_search_sadx4
+
 #undef  vp8_search_diamond_search
 #define vp8_search_diamond_search vp8_diamond_search_sadx4
 
index b01319f..014e076 100644 (file)
@@ -278,6 +278,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.variance.sad8x8x4d             = vp8_sad8x8x4d_sse3;
         cpi->rtcd.variance.sad4x4x4d             = vp8_sad4x4x4d_sse3;
         cpi->rtcd.search.diamond_search          = vp8_diamond_search_sadx4;
+        cpi->rtcd.search.refining_search         = vp8_refining_search_sadx4;
     }
 #endif