Make vp9 subpixel match vp8

author Johann <johannkoenig@google.com>

Tue, 26 May 2015 18:30:25 +0000 (11:30 -0700)

committer Johann <johannkoenig@google.com>

Thu, 4 Jun 2015 05:10:51 +0000 (22:10 -0700)
author Johann <johannkoenig@google.com>
Tue, 26 May 2015 18:30:25 +0000 (11:30 -0700)
committer Johann <johannkoenig@google.com>
Thu, 4 Jun 2015 05:10:51 +0000 (22:10 -0700)
diff --git a/test/variance_test.cc b/test/variance_test.cc

index 02d0a33..08a247d 100644 (file)
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -21,6 +21,9 @@
  #include "vpx/vpx_integer.h"
  #include "vpx_mem/vpx_mem.h"
  #include "vpx_ports/mem.h"
+#if CONFIG_VP8_ENCODER
+# include "./vp8_rtcd.h"
+#endif  // CONFIG_VP8_ENCODER
  #if CONFIG_VP9_ENCODER
  # include "./vp9_rtcd.h"
  # include "vp9/encoder/vp9_variance.h"
@@ -32,10 +35,13 @@ namespace {
  typedef unsigned int (*VarianceMxNFunc)(const uint8_t *a, int a_stride,
                                          const uint8_t *b, int b_stride,
                                          unsigned int *sse);
+typedef unsigned int (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride,
+                                         int xoffset, int yoffset,
+                                         const uint8_t *b, int b_stride,
+                                         unsigned int *sse);
  typedef unsigned int (*Get4x4SseFunc)(const uint8_t *a, int a_stride,
                                        const uint8_t *b, int b_stride);
  
-
  using ::std::tr1::get;
  using ::std::tr1::make_tuple;
  using ::std::tr1::tuple;
@@ -102,6 +108,12 @@ static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref,
                                  (l2w + l2h)));
  }
  
+/* The subpel reference functions differ from the codec version in one aspect:
+ * they calculate the bilinear factors directly instead of using a lookup table
+ * and therefore upshift xoff and yoff by 1. Only every other calculated value
+ * is used so the codec version shrinks the table to save space and maintain
+ * compatibility with vp8.
+ */
  static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
                                      int l2w, int l2h, int xoff, int yoff,
                                      uint32_t *sse_ptr,
@@ -111,6 +123,10 @@ static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
    uint64_t sse = 0;
    const int w = 1 << l2w;
    const int h = 1 << l2h;
+
+  xoff <<= 1;
+  yoff <<= 1;
+
    for (int y = 0; y < h; y++) {
      for (int x = 0; x < w; x++) {
        // Bilinear interpolation at a 16th pel step.
@@ -480,6 +496,10 @@ static uint32_t subpel_avg_variance_ref(const uint8_t *ref,
    uint64_t sse = 0;
    const int w = 1 << l2w;
    const int h = 1 << l2h;
+
+  xoff <<= 1;
+  yoff <<= 1;
+
    for (int y = 0; y < h; y++) {
      for (int x = 0; x < w; x++) {
        // bilinear interpolation at a 16th pel step
@@ -598,8 +618,8 @@ class SubpelVarianceTest
  
  template<typename SubpelVarianceFunctionType>
  void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
-  for (int x = 0; x < 16; ++x) {
-    for (int y = 0; y < 16; ++y) {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
        if (!use_high_bit_depth_) {
          for (int j = 0; j < block_size_; j++) {
            src_[j] = rnd_.Rand8();
@@ -621,8 +641,9 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
        unsigned int var1;
        ASM_REGISTER_STATE_CHECK(var1 = subpel_variance_(ref_, width_ + 1, x, y,
                                                         src_, width_, &sse1));
-      const unsigned int var2 = subpel_variance_ref(ref_, src_, log2width_,
-                                                    log2height_, x, y, &sse2,
+      const unsigned int var2 = subpel_variance_ref(ref_, src_,
+                                                    log2width_, log2height_,
+                                                    x, y, &sse2,
                                                      use_high_bit_depth_,
                                                      bit_depth_);
        EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
@@ -636,8 +657,8 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
    // Compare against reference.
    // Src: Set the first half of values to 0, the second half to the maximum.
    // Ref: Set the first half of values to the maximum, the second half to 0.
-  for (int x = 0; x < 16; ++x) {
-    for (int y = 0; y < 16; ++y) {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
        const int half = block_size_ / 2;
        if (!use_high_bit_depth_) {
          memset(src_, 0, half);
@@ -658,10 +679,10 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
        ASM_REGISTER_STATE_CHECK(
            var1 = subpel_variance_(ref_, width_ + 1, x, y, src_, width_, &sse1));
        const unsigned int var2 =
-          subpel_variance_ref(ref_, src_, log2width_, log2height_, x, y, &sse2,
-                              use_high_bit_depth_, bit_depth_);
-      EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
-      EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
+          subpel_variance_ref(ref_, src_, log2width_, log2height_,
+                              x, y, &sse2, use_high_bit_depth_, bit_depth_);
+      EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
+      EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
      }
    }
  }
@@ -669,8 +690,8 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
  #if CONFIG_VP9_ENCODER
  template<>
  void SubpelVarianceTest<vp9_subp_avg_variance_fn_t>::RefTest() {
-  for (int x = 0; x < 16; ++x) {
-    for (int y = 0; y < 16; ++y) {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
        if (!use_high_bit_depth_) {
          for (int j = 0; j < block_size_; j++) {
            src_[j] = rnd_.Rand8();
@@ -795,7 +816,6 @@ const VarianceMxNFunc highbd_8_mse16x16_c = vpx_highbd_8_mse16x16_c;
  const VarianceMxNFunc highbd_8_mse16x8_c = vpx_highbd_8_mse16x8_c;
  const VarianceMxNFunc highbd_8_mse8x16_c = vpx_highbd_8_mse8x16_c;
  const VarianceMxNFunc highbd_8_mse8x8_c = vpx_highbd_8_mse8x8_c;
-
  INSTANTIATE_TEST_CASE_P(
      C, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_c),
                                          make_tuple(4, 4, highbd_12_mse16x8_c),
@@ -811,7 +831,6 @@ INSTANTIATE_TEST_CASE_P(
                                          make_tuple(4, 4, highbd_8_mse8x8_c)));
  */
  
-
  const VarianceMxNFunc highbd_12_variance64x64_c = vpx_highbd_12_variance64x64_c;
  const VarianceMxNFunc highbd_12_variance64x32_c = vpx_highbd_12_variance64x32_c;
  const VarianceMxNFunc highbd_12_variance32x64_c = vpx_highbd_12_variance32x64_c;
@@ -976,7 +995,6 @@ const VarianceMxNFunc highbd_8_mse16x16_sse2 = vpx_highbd_8_mse16x16_sse2;
  const VarianceMxNFunc highbd_8_mse16x8_sse2 = vpx_highbd_8_mse16x8_sse2;
  const VarianceMxNFunc highbd_8_mse8x16_sse2 = vpx_highbd_8_mse8x16_sse2;
  const VarianceMxNFunc highbd_8_mse8x8_sse2 = vpx_highbd_8_mse8x8_sse2;
-
  INSTANTIATE_TEST_CASE_P(
      SSE2, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_sse2),
                                             make_tuple(4, 3, highbd_12_mse16x8_sse2),
@@ -1088,8 +1106,15 @@ INSTANTIATE_TEST_CASE_P(
  #endif  // CONFIG_VP9_HIGHBITDEPTH
  #endif  // HAVE_SSE2
  
+#if CONFIG_VP8
+typedef SubpelVarianceTest<SubpixVarMxNFunc> VP8SubpelVarianceTest;
+
+TEST_P(VP8SubpelVarianceTest, Ref) { RefTest(); }
+TEST_P(VP8SubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
+#endif  // CONFIG_VP8
+
  #if CONFIG_VP9_ENCODER
-typedef SubpelVarianceTest<vp9_subpixvariance_fn_t> VP9SubpelVarianceTest;
+typedef SubpelVarianceTest<SubpixVarMxNFunc> VP9SubpelVarianceTest;
  typedef SubpelVarianceTest<vp9_subp_avg_variance_fn_t> VP9SubpelAvgVarianceTest;
  
  TEST_P(VP9SubpelVarianceTest, Ref) { RefTest(); }
@@ -1097,7 +1122,7 @@ TEST_P(VP9SubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
  TEST_P(VP9SubpelAvgVarianceTest, Ref) { RefTest(); }
  
  #if CONFIG_VP9_HIGHBITDEPTH
-typedef SubpelVarianceTest<vp9_subpixvariance_fn_t> VP9SubpelVarianceHighTest;
+typedef SubpelVarianceTest<SubpixVarMxNFunc> VP9SubpelVarianceHighTest;
  typedef SubpelVarianceTest<vp9_subp_avg_variance_fn_t>
      VP9SubpelAvgVarianceHighTest;
  
@@ -1106,32 +1131,19 @@ TEST_P(VP9SubpelVarianceHighTest, ExtremeRef) { ExtremeRefTest(); }
  TEST_P(VP9SubpelAvgVarianceHighTest, Ref) { RefTest(); }
  #endif  // CONFIG_VP9_HIGHBITDEPTH
  
-const vp9_subpixvariance_fn_t subpel_variance4x4_c =
-    vp9_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t subpel_variance4x8_c =
-    vp9_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t subpel_variance8x4_c =
-    vp9_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t subpel_variance8x8_c =
-    vp9_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t subpel_variance8x16_c =
-    vp9_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t subpel_variance16x8_c =
-    vp9_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t subpel_variance16x16_c =
-    vp9_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t subpel_variance16x32_c =
-    vp9_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t subpel_variance32x16_c =
-    vp9_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t subpel_variance32x32_c =
-    vp9_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t subpel_variance32x64_c =
-    vp9_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t subpel_variance64x32_c =
-    vp9_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t subpel_variance64x64_c =
-    vp9_sub_pixel_variance64x64_c;
+const SubpixVarMxNFunc subpel_variance4x4_c = vp9_sub_pixel_variance4x4_c;
+const SubpixVarMxNFunc subpel_variance4x8_c = vp9_sub_pixel_variance4x8_c;
+const SubpixVarMxNFunc subpel_variance8x4_c = vp9_sub_pixel_variance8x4_c;
+const SubpixVarMxNFunc subpel_variance8x8_c = vp9_sub_pixel_variance8x8_c;
+const SubpixVarMxNFunc subpel_variance8x16_c = vp9_sub_pixel_variance8x16_c;
+const SubpixVarMxNFunc subpel_variance16x8_c = vp9_sub_pixel_variance16x8_c;
+const SubpixVarMxNFunc subpel_variance16x16_c = vp9_sub_pixel_variance16x16_c;
+const SubpixVarMxNFunc subpel_variance16x32_c = vp9_sub_pixel_variance16x32_c;
+const SubpixVarMxNFunc subpel_variance32x16_c = vp9_sub_pixel_variance32x16_c;
+const SubpixVarMxNFunc subpel_variance32x32_c = vp9_sub_pixel_variance32x32_c;
+const SubpixVarMxNFunc subpel_variance32x64_c = vp9_sub_pixel_variance32x64_c;
+const SubpixVarMxNFunc subpel_variance64x32_c = vp9_sub_pixel_variance64x32_c;
+const SubpixVarMxNFunc subpel_variance64x64_c = vp9_sub_pixel_variance64x64_c;
  INSTANTIATE_TEST_CASE_P(
      C, VP9SubpelVarianceTest,
      ::testing::Values(make_tuple(2, 2, subpel_variance4x4_c, 0),
@@ -1147,6 +1159,23 @@ INSTANTIATE_TEST_CASE_P(
                        make_tuple(5, 6, subpel_variance32x64_c, 0),
                        make_tuple(6, 5, subpel_variance64x32_c, 0),
                        make_tuple(6, 6, subpel_variance64x64_c, 0)));
+
+#if CONFIG_VP8
+const SubpixVarMxNFunc vp8_subpel_variance16x16_c =
+    vp8_sub_pixel_variance16x16_c;
+const SubpixVarMxNFunc vp8_subpel_variance16x8_c = vp8_sub_pixel_variance16x8_c;
+const SubpixVarMxNFunc vp8_subpel_variance8x16_c = vp8_sub_pixel_variance8x16_c;
+const SubpixVarMxNFunc vp8_subpel_variance8x8_c = vp8_sub_pixel_variance8x8_c;
+const SubpixVarMxNFunc vp8_subpel_variance4x4_c = vp8_sub_pixel_variance4x4_c;
+INSTANTIATE_TEST_CASE_P(
+    C, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(2, 2, vp8_subpel_variance4x4_c, 0),
+                      make_tuple(3, 3, vp8_subpel_variance8x8_c, 0),
+                      make_tuple(3, 4, vp8_subpel_variance8x16_c, 0),
+                      make_tuple(4, 3, vp8_subpel_variance16x8_c, 0),
+                      make_tuple(4, 4, vp8_subpel_variance16x16_c, 0)));
+#endif  // CONFIG_VP8
+
  const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_c =
      vp9_sub_pixel_avg_variance4x4_c;
  const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_c =
@@ -1189,83 +1218,83 @@ INSTANTIATE_TEST_CASE_P(
                        make_tuple(6, 5, subpel_avg_variance64x32_c, 0),
                        make_tuple(6, 6, subpel_avg_variance64x64_c, 0)));
  #if CONFIG_VP9_HIGHBITDEPTH
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance4x4_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance4x4_c =
      vp9_highbd_10_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance4x8_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance4x8_c =
      vp9_highbd_10_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x4_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x4_c =
      vp9_highbd_10_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x8_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x8_c =
      vp9_highbd_10_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x16_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x16_c =
      vp9_highbd_10_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x8_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x8_c =
      vp9_highbd_10_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x16_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x16_c =
      vp9_highbd_10_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x32_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x32_c =
      vp9_highbd_10_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x16_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x16_c =
      vp9_highbd_10_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x32_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x32_c =
      vp9_highbd_10_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x64_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x64_c =
      vp9_highbd_10_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x32_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x32_c =
      vp9_highbd_10_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x64_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x64_c =
      vp9_highbd_10_sub_pixel_variance64x64_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance4x4_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance4x4_c =
      vp9_highbd_12_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance4x8_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance4x8_c =
      vp9_highbd_12_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x4_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x4_c =
      vp9_highbd_12_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x8_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x8_c =
      vp9_highbd_12_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x16_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x16_c =
      vp9_highbd_12_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x8_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x8_c =
      vp9_highbd_12_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x16_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x16_c =
      vp9_highbd_12_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x32_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x32_c =
      vp9_highbd_12_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x16_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x16_c =
      vp9_highbd_12_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x32_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x32_c =
      vp9_highbd_12_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x64_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x64_c =
      vp9_highbd_12_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x32_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x32_c =
      vp9_highbd_12_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x64_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x64_c =
      vp9_highbd_12_sub_pixel_variance64x64_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance4x4_c =
+const SubpixVarMxNFunc highbd_subpel_variance4x4_c =
      vp9_highbd_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance4x8_c =
+const SubpixVarMxNFunc highbd_subpel_variance4x8_c =
      vp9_highbd_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x4_c =
+const SubpixVarMxNFunc highbd_subpel_variance8x4_c =
      vp9_highbd_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x8_c =
+const SubpixVarMxNFunc highbd_subpel_variance8x8_c =
      vp9_highbd_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x16_c =
+const SubpixVarMxNFunc highbd_subpel_variance8x16_c =
      vp9_highbd_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x8_c =
+const SubpixVarMxNFunc highbd_subpel_variance16x8_c =
      vp9_highbd_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x16_c =
+const SubpixVarMxNFunc highbd_subpel_variance16x16_c =
      vp9_highbd_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x32_c =
+const SubpixVarMxNFunc highbd_subpel_variance16x32_c =
      vp9_highbd_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x16_c =
+const SubpixVarMxNFunc highbd_subpel_variance32x16_c =
      vp9_highbd_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x32_c =
+const SubpixVarMxNFunc highbd_subpel_variance32x32_c =
      vp9_highbd_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x64_c =
+const SubpixVarMxNFunc highbd_subpel_variance32x64_c =
      vp9_highbd_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x32_c =
+const SubpixVarMxNFunc highbd_subpel_variance64x32_c =
      vp9_highbd_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x64_c =
+const SubpixVarMxNFunc highbd_subpel_variance64x64_c =
      vp9_highbd_sub_pixel_variance64x64_c;
  INSTANTIATE_TEST_CASE_P(
      C, VP9SubpelVarianceHighTest,
@@ -1431,34 +1460,48 @@ INSTANTIATE_TEST_CASE_P(
  #endif  // CONFIG_VP9_HIGHBITDEPTH
  #endif  // CONFIG_VP9_ENCODER
  
+#if CONFIG_VP8
+#if HAVE_MMX
+const SubpixVarMxNFunc subpel_variance16x16_mmx =
+    vp8_sub_pixel_variance16x16_mmx;
+const SubpixVarMxNFunc subpel_variance16x8_mmx = vp8_sub_pixel_variance16x8_mmx;
+const SubpixVarMxNFunc subpel_variance8x16_mmx = vp8_sub_pixel_variance8x16_mmx;
+const SubpixVarMxNFunc subpel_variance8x8_mmx = vp8_sub_pixel_variance8x8_mmx;
+const SubpixVarMxNFunc subpel_variance4x4_mmx = vp8_sub_pixel_variance4x4_mmx;
+INSTANTIATE_TEST_CASE_P(
+    MMX, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(4, 4, subpel_variance16x16_mmx, 0),
+                      make_tuple(4, 3, subpel_variance16x8_mmx, 0),
+                      make_tuple(3, 4, subpel_variance8x16_mmx, 0),
+                      make_tuple(3, 3, subpel_variance8x8_mmx, 0),
+                      make_tuple(2, 2, subpel_variance4x4_mmx, 0)));
+#endif  // HAVE_MMX
+#endif  // CONFIG_VP8
+
  #if CONFIG_VP9_ENCODER
  #if HAVE_SSE2
  #if CONFIG_USE_X86INC
-const vp9_subpixvariance_fn_t subpel_variance4x4_sse =
-    vp9_sub_pixel_variance4x4_sse;
-const vp9_subpixvariance_fn_t subpel_variance4x8_sse =
-    vp9_sub_pixel_variance4x8_sse;
-const vp9_subpixvariance_fn_t subpel_variance8x4_sse2 =
-    vp9_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t subpel_variance8x8_sse2 =
-    vp9_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc subpel_variance4x4_sse = vp9_sub_pixel_variance4x4_sse;
+const SubpixVarMxNFunc subpel_variance4x8_sse = vp9_sub_pixel_variance4x8_sse;
+const SubpixVarMxNFunc subpel_variance8x4_sse2 = vp9_sub_pixel_variance8x4_sse2;
+const SubpixVarMxNFunc subpel_variance8x8_sse2 = vp9_sub_pixel_variance8x8_sse2;
+const SubpixVarMxNFunc subpel_variance8x16_sse2 =
      vp9_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc subpel_variance16x8_sse2 =
      vp9_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc subpel_variance16x16_sse2 =
      vp9_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc subpel_variance16x32_sse2 =
      vp9_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc subpel_variance32x16_sse2 =
      vp9_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc subpel_variance32x32_sse2 =
      vp9_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc subpel_variance32x64_sse2 =
      vp9_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc subpel_variance64x32_sse2 =
      vp9_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc subpel_variance64x64_sse2 =
      vp9_sub_pixel_variance64x64_sse2;
  INSTANTIATE_TEST_CASE_P(
      SSE2, VP9SubpelVarianceTest,
@@ -1517,71 +1560,71 @@ INSTANTIATE_TEST_CASE_P(
                        make_tuple(6, 5, subpel_avg_variance64x32_sse2, 0),
                        make_tuple(6, 6, subpel_avg_variance64x64_sse2, 0)));
  #if CONFIG_VP9_HIGHBITDEPTH
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x4_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance8x4_sse2 =
      vp9_highbd_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x8_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance8x8_sse2 =
      vp9_highbd_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance8x16_sse2 =
      vp9_highbd_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance16x8_sse2 =
      vp9_highbd_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance16x16_sse2 =
      vp9_highbd_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance16x32_sse2 =
      vp9_highbd_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance32x16_sse2 =
      vp9_highbd_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance32x32_sse2 =
      vp9_highbd_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance32x64_sse2 =
      vp9_highbd_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance64x32_sse2 =
      vp9_highbd_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance64x64_sse2 =
      vp9_highbd_sub_pixel_variance64x64_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x4_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x4_sse2 =
      vp9_highbd_10_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x8_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x8_sse2 =
      vp9_highbd_10_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x16_sse2 =
      vp9_highbd_10_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x8_sse2 =
      vp9_highbd_10_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x16_sse2 =
      vp9_highbd_10_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x32_sse2 =
      vp9_highbd_10_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x16_sse2 =
      vp9_highbd_10_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x32_sse2 =
      vp9_highbd_10_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x64_sse2 =
      vp9_highbd_10_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x32_sse2 =
      vp9_highbd_10_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x64_sse2 =
      vp9_highbd_10_sub_pixel_variance64x64_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x4_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x4_sse2 =
      vp9_highbd_12_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x8_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x8_sse2 =
      vp9_highbd_12_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x16_sse2 =
      vp9_highbd_12_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x8_sse2 =
      vp9_highbd_12_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x16_sse2 =
      vp9_highbd_12_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x32_sse2 =
      vp9_highbd_12_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x16_sse2 =
      vp9_highbd_12_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x32_sse2 =
      vp9_highbd_12_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x64_sse2 =
      vp9_highbd_12_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x32_sse2 =
      vp9_highbd_12_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x64_sse2 =
      vp9_highbd_12_sub_pixel_variance64x64_sse2;
  INSTANTIATE_TEST_CASE_P(
      SSE2, VP9SubpelVarianceHighTest,
@@ -1725,35 +1768,56 @@ INSTANTIATE_TEST_CASE_P(
  #endif  // HAVE_SSE2
  #endif  // CONFIG_VP9_ENCODER
  
+#if CONFIG_VP8
+#if HAVE_SSE2
+const SubpixVarMxNFunc vp8_subpel_variance16x16_sse2 =
+    vp8_sub_pixel_variance16x16_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance16x8_sse2 =
+    vp8_sub_pixel_variance16x8_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance8x16_sse2 =
+    vp8_sub_pixel_variance8x16_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance8x8_sse2 =
+    vp8_sub_pixel_variance8x8_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance4x4_sse2 =
+    vp8_sub_pixel_variance4x4_wmt;
+INSTANTIATE_TEST_CASE_P(
+    SSE2, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(2, 2, vp8_subpel_variance4x4_sse2, 0),
+                      make_tuple(3, 3, vp8_subpel_variance8x8_sse2, 0),
+                      make_tuple(3, 4, vp8_subpel_variance8x16_sse2, 0),
+                      make_tuple(4, 3, vp8_subpel_variance16x8_sse2, 0),
+                      make_tuple(4, 4, vp8_subpel_variance16x16_sse2, 0)));
+#endif  // HAVE_SSE2
+#endif  // CONFIG_VP8
+
  #if CONFIG_VP9_ENCODER
  #if HAVE_SSSE3
  #if CONFIG_USE_X86INC
-
-const vp9_subpixvariance_fn_t subpel_variance4x4_ssse3 =
+const SubpixVarMxNFunc subpel_variance4x4_ssse3 =
      vp9_sub_pixel_variance4x4_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance4x8_ssse3 =
+const SubpixVarMxNFunc subpel_variance4x8_ssse3 =
      vp9_sub_pixel_variance4x8_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance8x4_ssse3 =
+const SubpixVarMxNFunc subpel_variance8x4_ssse3 =
      vp9_sub_pixel_variance8x4_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance8x8_ssse3 =
+const SubpixVarMxNFunc subpel_variance8x8_ssse3 =
      vp9_sub_pixel_variance8x8_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance8x16_ssse3 =
+const SubpixVarMxNFunc subpel_variance8x16_ssse3 =
      vp9_sub_pixel_variance8x16_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance16x8_ssse3 =
+const SubpixVarMxNFunc subpel_variance16x8_ssse3 =
      vp9_sub_pixel_variance16x8_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance16x16_ssse3 =
+const SubpixVarMxNFunc subpel_variance16x16_ssse3 =
      vp9_sub_pixel_variance16x16_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance16x32_ssse3 =
+const SubpixVarMxNFunc subpel_variance16x32_ssse3 =
      vp9_sub_pixel_variance16x32_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance32x16_ssse3 =
+const SubpixVarMxNFunc subpel_variance32x16_ssse3 =
      vp9_sub_pixel_variance32x16_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance32x32_ssse3 =
+const SubpixVarMxNFunc subpel_variance32x32_ssse3 =
      vp9_sub_pixel_variance32x32_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance32x64_ssse3 =
+const SubpixVarMxNFunc subpel_variance32x64_ssse3 =
      vp9_sub_pixel_variance32x64_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance64x32_ssse3 =
+const SubpixVarMxNFunc subpel_variance64x32_ssse3 =
      vp9_sub_pixel_variance64x32_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance64x64_ssse3 =
+const SubpixVarMxNFunc subpel_variance64x64_ssse3 =
      vp9_sub_pixel_variance64x64_ssse3;
  INSTANTIATE_TEST_CASE_P(
      SSSE3, VP9SubpelVarianceTest,
@@ -1815,6 +1879,19 @@ INSTANTIATE_TEST_CASE_P(
  #endif  // HAVE_SSSE3
  #endif  // CONFIG_VP9_ENCODER
  
+#if CONFIG_VP8
+#if HAVE_SSSE3
+const SubpixVarMxNFunc vp8_subpel_variance16x16_ssse3 =
+    vp8_sub_pixel_variance16x16_ssse3;
+const SubpixVarMxNFunc vp8_subpel_variance16x8_ssse3 =
+    vp8_sub_pixel_variance16x8_ssse3;
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(4, 3, vp8_subpel_variance16x8_ssse3, 0),
+                      make_tuple(4, 4, vp8_subpel_variance16x16_ssse3, 0)));
+#endif  // HAVE_SSSE3
+#endif  // CONFIG_VP8
+
  #if HAVE_AVX2
  const VarianceMxNFunc mse16x16_avx2 = vpx_mse16x16_avx2;
  INSTANTIATE_TEST_CASE_P(AVX2, VpxMseTest,
@@ -1834,9 +1911,9 @@ INSTANTIATE_TEST_CASE_P(
                        make_tuple(4, 4, variance16x16_avx2, 0)));
  
  #if CONFIG_VP9_ENCODER
-const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 =
+const SubpixVarMxNFunc subpel_variance32x32_avx2 =
      vp9_sub_pixel_variance32x32_avx2;
-const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 =
+const SubpixVarMxNFunc subpel_variance64x64_avx2 =
      vp9_sub_pixel_variance64x64_avx2;
  INSTANTIATE_TEST_CASE_P(
      AVX2, VP9SubpelVarianceTest,
@@ -1854,6 +1931,19 @@ INSTANTIATE_TEST_CASE_P(
  #endif  // CONFIG_VP9_ENCODER
  #endif  // HAVE_AVX2
  
+#if CONFIG_VP8
+#if HAVE_MEDIA
+const SubpixVarMxNFunc subpel_variance16x16_media =
+    vp8_sub_pixel_variance16x16_armv6;
+const SubpixVarMxNFunc subpel_variance8x8_media =
+    vp8_sub_pixel_variance8x8_armv6;
+INSTANTIATE_TEST_CASE_P(
+    MEDIA, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(3, 3, subpel_variance8x8_media, 0),
+                      make_tuple(4, 4, subpel_variance16x16_media, 0)));
+#endif  // HAVE_MEDIA
+#endif  // CONFIG_VP8
+
  #if HAVE_NEON
  const Get4x4SseFunc get4x4sse_cs_neon = vpx_get4x4sse_cs_neon;
  INSTANTIATE_TEST_CASE_P(NEON, VpxSseTest,
@@ -1882,14 +1972,26 @@ INSTANTIATE_TEST_CASE_P(
                        make_tuple(3, 4, variance8x16_neon, 0),
                        make_tuple(3, 3, variance8x8_neon, 0)));
  
+#if CONFIG_VP8
+#if HAVE_NEON_ASM
+const SubpixVarMxNFunc vp8_subpel_variance16x16_neon =
+    vp8_sub_pixel_variance16x16_neon;
+const SubpixVarMxNFunc vp8_subpel_variance8x8_neon =
+    vp8_sub_pixel_variance8x8_neon;
+INSTANTIATE_TEST_CASE_P(
+    NEON, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(3, 3, vp8_subpel_variance8x8_neon, 0),
+                      make_tuple(4, 4, vp8_subpel_variance16x16_neon, 0)));
+#endif  // HAVE_NEON_ASM
+#endif  // CONFIG_VP8
+
  #if CONFIG_VP9_ENCODER
-const vp9_subpixvariance_fn_t subpel_variance8x8_neon =
-    vp9_sub_pixel_variance8x8_neon;
-const vp9_subpixvariance_fn_t subpel_variance16x16_neon =
+const SubpixVarMxNFunc subpel_variance8x8_neon = vp9_sub_pixel_variance8x8_neon;
+const SubpixVarMxNFunc subpel_variance16x16_neon =
      vp9_sub_pixel_variance16x16_neon;
-const vp9_subpixvariance_fn_t subpel_variance32x32_neon =
+const SubpixVarMxNFunc subpel_variance32x32_neon =
      vp9_sub_pixel_variance32x32_neon;
-const vp9_subpixvariance_fn_t subpel_variance64x64_neon =
+const SubpixVarMxNFunc subpel_variance64x64_neon =
      vp9_sub_pixel_variance64x64_neon;
  INSTANTIATE_TEST_CASE_P(
      NEON, VP9SubpelVarianceTest,
diff --git a/vp9/common/vp9_filter.h b/vp9/common/vp9_filter.h

index d963ee2..808a270 100644 (file)
--- a/vp9/common/vp9_filter.h
+++ b/vp9/common/vp9_filter.h
@@ -43,14 +43,6 @@ typedef int16_t InterpKernel[SUBPEL_TAPS];
  
  const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter);
  
-DECLARE_ALIGNED(256, extern const InterpKernel,
-                vp9_bilinear_filters[SUBPEL_SHIFTS]);
-
-// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
-// filter kernel as a 2 tap filter.
-#define BILINEAR_FILTERS_2TAP(x) \
-  (vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1)
-
  #ifdef __cplusplus
  }  // extern "C"
  #endif
diff --git a/vp9/encoder/arm/neon/vp9_variance_neon.c b/vp9/encoder/arm/neon/vp9_variance_neon.c

index 166156a..0ac194e 100644 (file)
--- a/vp9/encoder/arm/neon/vp9_variance_neon.c
+++ b/vp9/encoder/arm/neon/vp9_variance_neon.c
@@ -16,10 +16,18 @@
  #include "vpx_ports/mem.h"
  #include "vpx/vpx_integer.h"
  
-#include "vp9/common/vp9_common.h"
  #include "vp9/common/vp9_filter.h"
  
-#include "vp9/encoder/vp9_variance.h"
+static uint8_t bilinear_filters[8][2] = {
+  { 128,   0, },
+  { 112,  16, },
+  {  96,  32, },
+  {  80,  48, },
+  {  64,  64, },
+  {  48,  80, },
+  {  32,  96, },
+  {  16, 112, },
+};
  
  static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
                                        uint8_t *output_ptr,
@@ -27,9 +35,9 @@ static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
                                        int pixel_step,
                                        unsigned int output_height,
                                        unsigned int output_width,
-                                      const int16_t *vp9_filter) {
-  const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
-  const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
+                                      const uint8_t *vp9_filter) {
+  const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
+  const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
    unsigned int i;
    for (i = 0; i < output_height; ++i) {
      const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
@@ -50,9 +58,9 @@ static void var_filter_block2d_bil_w16(const uint8_t *src_ptr,
                                         int pixel_step,
                                         unsigned int output_height,
                                         unsigned int output_width,
-                                       const int16_t *vp9_filter) {
-  const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
-  const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
+                                       const uint8_t *vp9_filter) {
+  const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
+  const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
    unsigned int i, j;
    for (i = 0; i < output_height; ++i) {
      for (j = 0; j < output_width; j += 16) {
@@ -84,9 +92,9 @@ unsigned int vp9_sub_pixel_variance8x8_neon(const uint8_t *src,
  
    var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
                              9, 8,
-                            BILINEAR_FILTERS_2TAP(xoffset));
+                            bilinear_filters[xoffset]);
    var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
-                            8, BILINEAR_FILTERS_2TAP(yoffset));
+                            8, bilinear_filters[yoffset]);
    return vpx_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
  }
  
@@ -102,9 +110,9 @@ unsigned int vp9_sub_pixel_variance16x16_neon(const uint8_t *src,
  
    var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
                               17, 16,
-                             BILINEAR_FILTERS_2TAP(xoffset));
+                             bilinear_filters[xoffset]);
    var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16,
-                             16, BILINEAR_FILTERS_2TAP(yoffset));
+                             16, bilinear_filters[yoffset]);
    return vpx_variance16x16_neon(temp2, 16, dst, dst_stride, sse);
  }
  
@@ -120,9 +128,9 @@ unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src,
  
    var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
                               33, 32,
-                             BILINEAR_FILTERS_2TAP(xoffset));
+                             bilinear_filters[xoffset]);
    var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32,
-                             32, BILINEAR_FILTERS_2TAP(yoffset));
+                             32, bilinear_filters[yoffset]);
    return vpx_variance32x32_neon(temp2, 32, dst, dst_stride, sse);
  }
  
@@ -138,8 +146,8 @@ unsigned int vp9_sub_pixel_variance64x64_neon(const uint8_t *src,
  
    var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
                               65, 64,
-                             BILINEAR_FILTERS_2TAP(xoffset));
+                             bilinear_filters[xoffset]);
    var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64,
-                             64, BILINEAR_FILTERS_2TAP(yoffset));
+                             64, bilinear_filters[yoffset]);
    return vpx_variance64x64_neon(temp2, 64, dst, dst_stride, sse);
  }
diff --git a/vp9/encoder/vp9_mcomp.c b/vp9/encoder/vp9_mcomp.c

index 15f9582..2342726 100644 (file)
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -162,9 +162,9 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
        error_per_bit + 4096) >> 13 : 0)
  
  
-// convert motion vector component to offset for svf calc
+// convert motion vector component to offset for sv[a]f calc
  static INLINE int sp(int x) {
-  return (x & 7) << 1;
+  return x & 7;
  }
  
  static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
@@ -679,16 +679,14 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
        tc = bc + search_step[idx].col;
        if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
          const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
-        int row_offset = (tr & 0x07) << 1;
-        int col_offset = (tc & 0x07) << 1;
          MV this_mv;
          this_mv.row = tr;
          this_mv.col = tc;
          if (second_pred == NULL)
-          thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
+          thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
                               src_address, src_stride, &sse);
          else
-          thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
+          thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
                                src_address, src_stride, &sse, second_pred);
          cost_array[idx] = thismse +
              mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -709,14 +707,12 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
      tr = br + (cost_array[2] < cost_array[3] ? -hstep : hstep);
      if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
        const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
-      int row_offset = (tr & 0x07) << 1;
-      int col_offset = (tc & 0x07) << 1;
        MV this_mv = {tr, tc};
        if (second_pred == NULL)
-        thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
+        thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
                             src_address, src_stride, &sse);
        else
-        thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
+        thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
                              src_address, src_stride, &sse, second_pred);
        cost_array[4] = thismse +
            mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
diff --git a/vp9/encoder/vp9_variance.c b/vp9/encoder/vp9_variance.c

index 1f6b083..c571b7c 100644 (file)
--- a/vp9/encoder/vp9_variance.c
+++ b/vp9/encoder/vp9_variance.c
@@ -19,6 +19,17 @@
  
  #include "vp9/encoder/vp9_variance.h"
  
+static uint8_t bilinear_filters[8][2] = {
+  { 128,   0, },
+  { 112,  16, },
+  {  96,  32, },
+  {  80,  48, },
+  {  64,  64, },
+  {  48,  80, },
+  {  32,  96, },
+  {  16, 112, },
+};
+
  // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
  // or vertical direction to produce the filtered output block. Used to implement
  // first-pass of 2-D separable filter.
@@ -33,7 +44,7 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
                                                int pixel_step,
                                                unsigned int output_height,
                                                unsigned int output_width,
-                                              const int16_t *vp9_filter) {
+                                              const uint8_t *vp9_filter) {
    unsigned int i, j;
  
    for (i = 0; i < output_height; i++) {
@@ -65,7 +76,7 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
                                                 unsigned int pixel_step,
                                                 unsigned int output_height,
                                                 unsigned int output_width,
-                                               const int16_t *vp9_filter) {
+                                               const uint8_t *vp9_filter) {
    unsigned int  i, j;
  
    for (i = 0; i < output_height; i++) {
@@ -91,9 +102,9 @@ unsigned int vp9_sub_pixel_variance##W##x##H##_c( \
    uint8_t temp2[H * W]; \
  \
    var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
-                                    BILINEAR_FILTERS_2TAP(xoffset)); \
+                                    bilinear_filters[xoffset]); \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                     BILINEAR_FILTERS_2TAP(yoffset)); \
+                                     bilinear_filters[yoffset]); \
  \
    return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
  }
@@ -110,9 +121,9 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
    DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
  \
    var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
-                                    BILINEAR_FILTERS_2TAP(xoffset)); \
+                                    bilinear_filters[xoffset]); \
    var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                     BILINEAR_FILTERS_2TAP(yoffset)); \
+                                     bilinear_filters[yoffset]); \
  \
    vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
  \
@@ -166,7 +177,7 @@ static void highbd_var_filter_block2d_bil_first_pass(
      int pixel_step,
      unsigned int output_height,
      unsigned int output_width,
-    const int16_t *vp9_filter) {
+    const uint8_t *vp9_filter) {
    unsigned int i, j;
    uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
    for (i = 0; i < output_height; i++) {
@@ -192,7 +203,7 @@ static void highbd_var_filter_block2d_bil_second_pass(
      unsigned int pixel_step,
      unsigned int output_height,
      unsigned int output_width,
-    const int16_t *vp9_filter) {
+    const uint8_t *vp9_filter) {
    unsigned int  i, j;
  
    for (i = 0; i < output_height; i++) {
@@ -219,9 +230,9 @@ unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \
    uint16_t temp2[H * W]; \
  \
    highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
    highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
  \
    return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
                                            dst_stride, sse); \
@@ -236,9 +247,9 @@ unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \
    uint16_t temp2[H * W]; \
  \
    highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
    highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
  \
    return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
                                               W, dst, dst_stride, sse); \
@@ -253,9 +264,9 @@ unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \
    uint16_t temp2[H * W]; \
  \
    highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
    highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
  \
    return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
                                               W, dst, dst_stride, sse); \
@@ -273,9 +284,9 @@ unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
  \
    highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
    highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
  \
    vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
                             CONVERT_TO_BYTEPTR(temp2), W); \
@@ -295,9 +306,9 @@ unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
  \
    highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
    highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
  \
    vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
                             CONVERT_TO_BYTEPTR(temp2), W); \
@@ -317,9 +328,9 @@ unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
    DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
  \
    highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
    highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
  \
    vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
                             CONVERT_TO_BYTEPTR(temp2), W); \
diff --git a/vp9/encoder/x86/vp9_highbd_subpel_variance.asm b/vp9/encoder/x86/vp9_highbd_subpel_variance.asm

index 987729f..4594bb1 100644 (file)
--- a/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
+++ b/vp9/encoder/x86/vp9_highbd_subpel_variance.asm
@@ -14,35 +14,19 @@ SECTION_RODATA
  pw_8: times  8 dw  8
  bilin_filter_m_sse2: times  8 dw 16
                       times  8 dw  0
-                     times  8 dw 15
-                     times  8 dw  1
                       times  8 dw 14
                       times  8 dw  2
-                     times  8 dw 13
-                     times  8 dw  3
                       times  8 dw 12
                       times  8 dw  4
-                     times  8 dw 11
-                     times  8 dw  5
                       times  8 dw 10
                       times  8 dw  6
-                     times  8 dw  9
-                     times  8 dw  7
                       times 16 dw  8
-                     times  8 dw  7
-                     times  8 dw  9
                       times  8 dw  6
                       times  8 dw 10
-                     times  8 dw  5
-                     times  8 dw 11
                       times  8 dw  4
                       times  8 dw 12
-                     times  8 dw  3
-                     times  8 dw 13
                       times  8 dw  2
                       times  8 dw 14
-                     times  8 dw  1
-                     times  8 dw 15
  
  SECTION .text
  
diff --git a/vp9/encoder/x86/vp9_subpel_variance.asm b/vp9/encoder/x86/vp9_subpel_variance.asm

index 06b8b03..292cf34 100644 (file)
--- a/vp9/encoder/x86/vp9_subpel_variance.asm
+++ b/vp9/encoder/x86/vp9_subpel_variance.asm
@@ -14,52 +14,28 @@ SECTION_RODATA
  pw_8: times  8 dw  8
  bilin_filter_m_sse2: times  8 dw 16
                       times  8 dw  0
-                     times  8 dw 15
-                     times  8 dw  1
                       times  8 dw 14
                       times  8 dw  2
-                     times  8 dw 13
-                     times  8 dw  3
                       times  8 dw 12
                       times  8 dw  4
-                     times  8 dw 11
-                     times  8 dw  5
                       times  8 dw 10
                       times  8 dw  6
-                     times  8 dw  9
-                     times  8 dw  7
                       times 16 dw  8
-                     times  8 dw  7
-                     times  8 dw  9
                       times  8 dw  6
                       times  8 dw 10
-                     times  8 dw  5
-                     times  8 dw 11
                       times  8 dw  4
                       times  8 dw 12
-                     times  8 dw  3
-                     times  8 dw 13
                       times  8 dw  2
                       times  8 dw 14
-                     times  8 dw  1
-                     times  8 dw 15
  
  bilin_filter_m_ssse3: times  8 db 16,  0
-                      times  8 db 15,  1
                        times  8 db 14,  2
-                      times  8 db 13,  3
                        times  8 db 12,  4
-                      times  8 db 11,  5
                        times  8 db 10,  6
-                      times  8 db  9,  7
                        times 16 db  8
-                      times  8 db  7,  9
                        times  8 db  6, 10
-                      times  8 db  5, 11
                        times  8 db  4, 12
-                      times  8 db  3, 13
                        times  8 db  2, 14
-                      times  8 db  1, 15
  
  SECTION .text
  
diff --git a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c

index 19ac5c0..b1c7975 100644 (file)
--- a/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
+++ b/vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c
@@ -17,36 +17,20 @@
  DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
    16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
    16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
-  15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
-  15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
    14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
    14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
-  13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
-  13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
    12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
    12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
-  11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
-  11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
    10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
    10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
-  9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
-  9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-  7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
-  7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
    6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
    6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
-  5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
-  5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
    4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
    4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
-  3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
-  3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
    2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
    2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
-  1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15,
-  1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15
  };
  
  #define FILTER_SRC(filter) \
author	Johann <johannkoenig@google.com>
	Tue, 26 May 2015 18:30:25 +0000 (11:30 -0700)
committer	Johann <johannkoenig@google.com>
	Thu, 4 Jun 2015 05:10:51 +0000 (22:10 -0700)
test/variance_test.cc		patch \| blob \| history
vp9/common/vp9_filter.h		patch \| blob \| history
vp9/encoder/arm/neon/vp9_variance_neon.c		patch \| blob \| history
vp9/encoder/vp9_mcomp.c		patch \| blob \| history
vp9/encoder/vp9_variance.c		patch \| blob \| history
vp9/encoder/x86/vp9_highbd_subpel_variance.asm		patch \| blob \| history
vp9/encoder/x86/vp9_subpel_variance.asm		patch \| blob \| history
vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c		patch \| blob \| history