From 2987fa1dc130256562e661bafb0136ac9b704abf Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Mon, 15 Apr 2013 13:18:24 -0700
Subject: [PATCH] Removing rounding from UV MV calculation

Consider the previous behavior for the MV 1 3/8 (11/8 pel). In the
existing code, the fractional part of the MV is considered separately,
and rounded is applied, giving a result of 6/8. Rounding is not required
in this case, as we're increasing the precision from a q3 to a q4, and
the correct value 11/16 can be represented exactly.

Slight gain observed (+.033 average on derf)

Change-Id: I320e160e8b12f1dd66aa0ce7966b5088870fe9f8
---
 vp9/common/vp9_reconinter.c       | 69 ++++++++++-----------------------------
 vp9/common/vp9_reconinter.h       |  3 +-
 vp9/encoder/vp9_temporal_filter.c | 16 ++++-----
 3 files changed, 24 insertions(+), 64 deletions(-)

diff --git a/vp9/common/vp9_reconinter.c b/vp9/common/vp9_reconinter.c
index 002e6eb..64929c1 100644
--- a/vp9/common/vp9_reconinter.c
+++ b/vp9/common/vp9_reconinter.c
@@ -363,21 +363,18 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
  */
 void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride,
                                   uint8_t *dst, int dst_stride,
-                                  const int_mv *fullpel_mv_q3,
-                                  const int_mv *frac_mv_q4,
+                                  const int_mv *mv_q4,
                                   const struct scale_factors *scale,
                                   int w, int h, int weight,
                                   const struct subpix_fn_table *subpix) {
-  const int mv_row_q4 = ((fullpel_mv_q3->as_mv.row >> 3) << 4)
-                        + (frac_mv_q4->as_mv.row & 0xf);
-  const int mv_col_q4 = ((fullpel_mv_q3->as_mv.col >> 3) << 4)
-                        + (frac_mv_q4->as_mv.col & 0xf);
   const int scaled_mv_row_q4 =
-      scale->scale_motion_vector_component_q4(mv_row_q4, scale->y_num,
-                                              scale->y_den, scale->y_offset_q4);
+      scale->scale_motion_vector_component_q4(mv_q4->as_mv.row,
+                                              scale->y_num, scale->y_den,
+                                              scale->y_offset_q4);
   const int scaled_mv_col_q4 =
-      scale->scale_motion_vector_component_q4(mv_col_q4, scale->x_num,
-                                              scale->x_den, scale->x_offset_q4);
+      scale->scale_motion_vector_component_q4(mv_q4->as_mv.col,
+                                              scale->x_num, scale->x_den,
+                                              scale->x_offset_q4);
   const int subpel_x = scaled_mv_col_q4 & 15;
   const int subpel_y = scaled_mv_row_q4 & 15;
 
@@ -973,30 +970,14 @@ static void build_inter16x16_predictors_mbuv_w(MACROBLOCKD *xd,
     uint8_t *uptr, *vptr;
     int pre_stride = which_mv ? xd->second_pre.uv_stride
                               : xd->pre.uv_stride;
-    int_mv _o16x16mv;
-    int_mv _16x16mv;
+    int_mv mv;
 
     struct scale_factors *scale = &xd->scale_factor_uv[which_mv];
+    mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;
 
-    _16x16mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;
 
     if (clamp_mvs)
-      clamp_mv_to_umv_border(&_16x16mv.as_mv, xd);
-
-    _o16x16mv = _16x16mv;
-    /* calc uv motion vectors */
-    if (_16x16mv.as_mv.row < 0)
-      _16x16mv.as_mv.row -= 1;
-    else
-      _16x16mv.as_mv.row += 1;
-
-    if (_16x16mv.as_mv.col < 0)
-      _16x16mv.as_mv.col -= 1;
-    else
-      _16x16mv.as_mv.col += 1;
-
-    _16x16mv.as_mv.row /= 2;
-    _16x16mv.as_mv.col /= 2;
+      clamp_mv_to_umv_border(&mv.as_mv, xd);
 
     uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer);
     vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer);
@@ -1004,11 +985,11 @@ static void build_inter16x16_predictors_mbuv_w(MACROBLOCKD *xd,
     scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16);
 
     vp9_build_inter_predictor_q4(
-        uptr, pre_stride, dst_u, dst_uvstride, &_16x16mv, &_o16x16mv,
+        uptr, pre_stride, dst_u, dst_uvstride, &mv,
         scale, 8, 8, which_mv ? weight : 0, &xd->subpix);
 
     vp9_build_inter_predictor_q4(
-        vptr, pre_stride, dst_v, dst_uvstride, &_16x16mv, &_o16x16mv,
+        vptr, pre_stride, dst_v, dst_uvstride, &mv,
         scale, 8, 8, which_mv ? weight : 0, &xd->subpix);
   }
 }
@@ -1046,30 +1027,14 @@ void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
     uint8_t *uptr, *vptr;
     int pre_stride = which_mv ? xd->second_pre.uv_stride
                               : xd->pre.uv_stride;
-    int_mv _o16x16mv;
-    int_mv _16x16mv;
+    int_mv mv;
 
     struct scale_factors *scale = &xd->scale_factor_uv[which_mv];
+    mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;
 
-    _16x16mv.as_int = xd->mode_info_context->mbmi.mv[which_mv].as_int;
 
     if (clamp_mvs)
-      clamp_mv_to_umv_border(&_16x16mv.as_mv, xd);
-
-    _o16x16mv = _16x16mv;
-    /* calc uv motion vectors */
-    if (_16x16mv.as_mv.row < 0)
-      _16x16mv.as_mv.row -= 1;
-    else
-      _16x16mv.as_mv.row += 1;
-
-    if (_16x16mv.as_mv.col < 0)
-      _16x16mv.as_mv.col -= 1;
-    else
-      _16x16mv.as_mv.col += 1;
-
-    _16x16mv.as_mv.row /= 2;
-    _16x16mv.as_mv.col /= 2;
+      clamp_mv_to_umv_border(&mv.as_mv, xd);
 
     uptr = (which_mv ? xd->second_pre.u_buffer : xd->pre.u_buffer);
     vptr = (which_mv ? xd->second_pre.v_buffer : xd->pre.v_buffer);
@@ -1077,12 +1042,12 @@ void vp9_build_inter16x16_predictors_mbuv(MACROBLOCKD *xd,
     scale->set_scaled_offsets(scale, mb_row * 16, mb_col * 16);
 
     vp9_build_inter_predictor_q4(
-        uptr, pre_stride, dst_u, dst_uvstride, &_16x16mv, &_o16x16mv,
+        uptr, pre_stride, dst_u, dst_uvstride, &mv,
         scale, 8, 8,
         which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix);
 
     vp9_build_inter_predictor_q4(
-        vptr, pre_stride, dst_v, dst_uvstride, &_16x16mv, &_o16x16mv,
+        vptr, pre_stride, dst_v, dst_uvstride, &mv,
         scale, 8, 8,
         which_mv << (2 * CONFIG_IMPLICIT_COMPOUNDINTER_WEIGHT), &xd->subpix);
   }
diff --git a/vp9/common/vp9_reconinter.h b/vp9/common/vp9_reconinter.h
index 77fa9ab..38981e9 100644
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -67,8 +67,7 @@ void vp9_build_inter_predictor(const uint8_t *src, int src_stride,
 
 void vp9_build_inter_predictor_q4(const uint8_t *src, int src_stride,
                                   uint8_t *dst, int dst_stride,
-                                  const int_mv *fullpel_mv_q3,
-                                  const int_mv *frac_mv_q4,
+                                  const int_mv *mv_q4,
                                   const struct scale_factors *scale,
                                   int w, int h, int do_avg,
                                   const struct subpix_fn_table *subpix);
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c
index cf84fa1..6149518 100644
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -41,18 +41,14 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
                                             int mv_col,
                                             uint8_t *pred) {
   const int which_mv = 0;
-  int_mv subpel_mv;
-  int_mv fullpel_mv;
+  int_mv mv;
 
-  subpel_mv.as_mv.row = mv_row;
-  subpel_mv.as_mv.col = mv_col;
-  // TODO(jkoleszar): Make this rounding consistent with the rest of the code
-  fullpel_mv.as_mv.row = (mv_row >> 1) & ~7;
-  fullpel_mv.as_mv.col = (mv_col >> 1) & ~7;
+  mv.as_mv.row = mv_row;
+  mv.as_mv.col = mv_col;
 
   vp9_build_inter_predictor(y_mb_ptr, stride,
                             &pred[0], 16,
-                            &subpel_mv,
+                            &mv,
                             &xd->scale_factor[which_mv],
                             16, 16,
                             which_mv <<
@@ -63,7 +59,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
 
   vp9_build_inter_predictor_q4(u_mb_ptr, stride,
                                &pred[256], 8,
-                               &fullpel_mv, &subpel_mv,
+                               &mv,
                                &xd->scale_factor_uv[which_mv],
                                8, 8,
                                which_mv <<
@@ -72,7 +68,7 @@ static void temporal_filter_predictors_mb_c(MACROBLOCKD *xd,
 
   vp9_build_inter_predictor_q4(v_mb_ptr, stride,
                                &pred[320], 8,
-                               &fullpel_mv, &subpel_mv,
+                               &mv,
                                &xd->scale_factor_uv[which_mv],
                                8, 8,
                                which_mv <<
-- 
2.7.4