Improve bounds checking in vp8_diamond_search_sadx4()
authorYunqing Wang <yunqingwang@google.com>
Thu, 14 Oct 2010 15:06:37 +0000 (11:06 -0400)
committerYunqing Wang <yunqingwang@google.com>
Thu, 14 Oct 2010 15:06:37 +0000 (11:06 -0400)
In order to know if all 4/8 neighbor points are within the bounds,
4 bounds checking are enough instead of checking 4 bounds for
each points (16/32 checkings). This improvement reduces cost of
vp8_diamond_search_sadx4() by 30%, and gives encoder a 1.5%
performance gain (test options: 1 pass, good, speed=4).

Change-Id: Ie8da29d18a6ecfc9829e74ac02f6fa70e042331a

vp8/encoder/mcomp.c

index b89354e..4d60b92 100644 (file)
@@ -1035,84 +1035,73 @@ int vp8_diamond_search_sadx4
 
     for (step = 0; step < tot_steps ; step++)
     {
-        int check_row_min, check_col_min, check_row_max, check_col_max;
+        int all_in = 1, t;
 
-        check_row_min = x->mv_row_min - best_mv->row;
-        check_row_max = x->mv_row_max - best_mv->row;
-        check_col_min = x->mv_col_min - best_mv->col;
-        check_col_max = x->mv_col_max - best_mv->col;
+        // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of
+        // checking 4 bounds for each points.
+        all_in &= ((best_mv->row + ss[i].mv.row)> x->mv_row_min);
+        all_in &= ((best_mv->row + ss[i+1].mv.row) < x->mv_row_max);
+        all_in &= ((best_mv->col + ss[i+2].mv.col) > x->mv_col_min);
+        all_in &= ((best_mv->col + ss[i+3].mv.col) < x->mv_col_max);
 
-        for (j = 0 ; j < x->searches_per_step ; j += 4)
+        if (all_in)
         {
-            unsigned char *block_offset[4];
-            unsigned int valid_block[4];
-            int all_in = 1, t;
+            unsigned int sad_array[4];
 
-            for (t = 0; t < 4; t++)
+            for (j = 0 ; j < x->searches_per_step ; j += 4)
             {
-                valid_block [t]  = (ss[t+i].mv.col > check_col_min);
-                valid_block [t] &= (ss[t+i].mv.col < check_col_max);
-                valid_block [t] &= (ss[t+i].mv.row > check_row_min);
-                valid_block [t] &= (ss[t+i].mv.row < check_row_max);
+                unsigned char *block_offset[4];
 
-                all_in &= valid_block[t];
-                block_offset[t] = ss[i+t].offset + best_address;
-            }
-
-            if (all_in)
-            {
-                unsigned int sad_array[4];
+                for (t = 0; t < 4; t++)
+                    block_offset[t] = ss[i+t].offset + best_address;
 
                 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
 
                 for (t = 0; t < 4; t++, i++)
                 {
-                    thissad = sad_array[t];
-
-                    if (thissad < bestsad)
+                    if (sad_array[t] < bestsad)
                     {
                         this_mv.row = (best_mv->row + ss[i].mv.row) << 3;
                         this_mv.col = (best_mv->col + ss[i].mv.col) << 3;
-                        thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+                        sad_array[t] += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
 
-                        if (thissad < bestsad)
+                        if (sad_array[t] < bestsad)
                         {
-                            bestsad = thissad;
+                            bestsad = sad_array[t];
                             best_site = i;
                         }
                     }
                 }
             }
-            else
+        }
+        else
+        {
+            for (j = 0 ; j < x->searches_per_step ; j++)
             {
-                int t;
+                // Trap illegal vectors
+                this_row_offset = best_mv->row + ss[i].mv.row;
+                this_col_offset = best_mv->col + ss[i].mv.col;
 
-                for (t = 0; t < 4; i++, t++)
+                if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
+                (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
                 {
-                    // Trap illegal vectors
-                    if (valid_block[t])
+                    check_here = ss[i].offset + best_address;
+                    thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
 
+                    if (thissad < bestsad)
                     {
-                        check_here = block_offset[t];
-                        thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
+                        this_mv.row = this_row_offset << 3;
+                        this_mv.col = this_col_offset << 3;
+                        thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
 
                         if (thissad < bestsad)
                         {
-                            this_row_offset = best_mv->row + ss[i].mv.row;
-                            this_col_offset = best_mv->col + ss[i].mv.col;
-
-                            this_mv.row = this_row_offset << 3;
-                            this_mv.col = this_col_offset << 3;
-                            thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
-
-                            if (thissad < bestsad)
-                            {
-                                bestsad = thissad;
-                                best_site = i;
-                            }
+                            bestsad = thissad;
+                            best_site = i;
                         }
                     }
                 }
+                i++;
             }
         }