Apply fast motion search to golden reference frame

author Jingning Han <jingning@google.com>

Tue, 10 Mar 2015 01:55:38 +0000 (18:55 -0700)

committer Jingning Han <jingning@google.com>

Wed, 11 Mar 2015 23:03:49 +0000 (16:03 -0700)
author Jingning Han <jingning@google.com>
Tue, 10 Mar 2015 01:55:38 +0000 (18:55 -0700)
committer Jingning Han <jingning@google.com>
Wed, 11 Mar 2015 23:03:49 +0000 (16:03 -0700)
diff --git a/vp9/encoder/vp9_pickmode.c b/vp9/encoder/vp9_pickmode.c

index 6c2576a..23a2569 100644 (file)
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -784,15 +784,43 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
          continue;
  
        if (this_mode == NEWMV) {
-        if (ref_frame > LAST_FRAME)
-          continue;
          if (cpi->sf.partition_search_type != VAR_BASED_PARTITION &&
              best_rdc.rdcost < (int64_t)(1 << num_pels_log2_lookup[bsize]))
            continue;
-        if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
-                                    &frame_mv[NEWMV][ref_frame],
-                                    &rate_mv, best_rdc.rdcost))
+
+        if (ref_frame > LAST_FRAME) {
+          int tmp_sad;
+          int dis, cost_list[5];
+
+          if (bsize < BLOCK_16X16)
+            continue;
+
+          tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize);
+          if (tmp_sad > x->pred_mv_sad[LAST_FRAME])
+            continue;
+
+          frame_mv[NEWMV][ref_frame].as_int = mbmi->mv[0].as_int;
+          rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv,
+                                    &mbmi->ref_mvs[ref_frame][0].as_mv,
+                                    x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+          frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
+          frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;
+
+          cpi->find_fractional_mv_step(x, &frame_mv[NEWMV][ref_frame].as_mv,
+                                       &mbmi->ref_mvs[ref_frame][0].as_mv,
+                                       cpi->common.allow_high_precision_mv,
+                                       x->errorperbit,
+                                       &cpi->fn_ptr[bsize],
+                                       cpi->sf.mv.subpel_force_stop,
+                                       cpi->sf.mv.subpel_iters_per_step,
+                                       cond_cost_list(cpi, cost_list),
+                                       x->nmvjointcost, x->mvcost, &dis,
+                                       &x->pred_sse[ref_frame], NULL, 0, 0);
+        } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
+                                           &frame_mv[NEWMV][ref_frame],
+                                           &rate_mv, best_rdc.rdcost)) {
            continue;
+        }
        }
  
        if (this_mode != NEARESTMV &&
@@ -817,7 +845,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x,
        }
  
        if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
-          pred_filter_search &&
+          pred_filter_search && (ref_frame == LAST_FRAME) &&
            ((mbmi->mv[0].as_mv.row & 0x07) != 0 ||
             (mbmi->mv[0].as_mv.col & 0x07) != 0)) {
          int pf_rate[3];
diff --git a/vp9/encoder/x86/vp9_avg_intrin_sse2.c b/vp9/encoder/x86/vp9_avg_intrin_sse2.c

index f499499..618b5f7 100644 (file)
--- a/vp9/encoder/x86/vp9_avg_intrin_sse2.c
+++ b/vp9/encoder/x86/vp9_avg_intrin_sse2.c
@@ -61,7 +61,7 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
                            const int ref_stride, const int height) {
    int idx;
    __m128i zero = _mm_setzero_si128();
-  __m128i src_line = _mm_load_si128((const __m128i *)ref);
+  __m128i src_line = _mm_loadu_si128((const __m128i *)ref);
    __m128i s0 = _mm_unpacklo_epi8(src_line, zero);
    __m128i s1 = _mm_unpackhi_epi8(src_line, zero);
    __m128i t0, t1;
@@ -69,14 +69,14 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
    ref += ref_stride;
  
    for (idx = 1; idx < height_1; idx += 2) {
-    src_line = _mm_load_si128((const __m128i *)ref);
+    src_line = _mm_loadu_si128((const __m128i *)ref);
      t0 = _mm_unpacklo_epi8(src_line, zero);
      t1 = _mm_unpackhi_epi8(src_line, zero);
      s0 = _mm_adds_epu16(s0, t0);
      s1 = _mm_adds_epu16(s1, t1);
      ref += ref_stride;
  
-    src_line = _mm_load_si128((const __m128i *)ref);
+    src_line = _mm_loadu_si128((const __m128i *)ref);
      t0 = _mm_unpacklo_epi8(src_line, zero);
      t1 = _mm_unpackhi_epi8(src_line, zero);
      s0 = _mm_adds_epu16(s0, t0);
@@ -84,7 +84,7 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
      ref += ref_stride;
    }
  
-  src_line = _mm_load_si128((const __m128i *)ref);
+  src_line = _mm_loadu_si128((const __m128i *)ref);
    t0 = _mm_unpacklo_epi8(src_line, zero);
    t1 = _mm_unpackhi_epi8(src_line, zero);
    s0 = _mm_adds_epu16(s0, t0);
@@ -101,9 +101,9 @@ void vp9_int_pro_row_sse2(int16_t *hbuf, uint8_t const*ref,
      s1 = _mm_srai_epi16(s1, 3);
    }
  
-  _mm_store_si128((__m128i *)hbuf, s0);
+  _mm_storeu_si128((__m128i *)hbuf, s0);
    hbuf += 8;
-  _mm_store_si128((__m128i *)hbuf, s1);
+  _mm_storeu_si128((__m128i *)hbuf, s1);
  }
  
  int16_t vp9_int_pro_col_sse2(uint8_t const *ref, const int width) {
author	Jingning Han <jingning@google.com>
	Tue, 10 Mar 2015 01:55:38 +0000 (18:55 -0700)
committer	Jingning Han <jingning@google.com>
	Wed, 11 Mar 2015 23:03:49 +0000 (16:03 -0700)
vp9/encoder/vp9_pickmode.c		patch \| blob \| history
vp9/encoder/x86/vp9_avg_intrin_sse2.c		patch \| blob \| history