Make vp9 subpixel match vp8
authorJohann <johannkoenig@google.com>
Tue, 26 May 2015 18:30:25 +0000 (11:30 -0700)
committerJohann <johannkoenig@google.com>
Thu, 4 Jun 2015 05:10:51 +0000 (22:10 -0700)
The only difference between the two was that the vp9 function allowed
for every step in the bilinear filter (16 steps) while vp8 only allowed
for half of those. Since all the call sites in vp9 (<< 1) the input, it
only ever used the same steps as vp8.

This will allow moving the subpel variance to vpx_dsp with the rest of
the variance functions.

Change-Id: I6fa2509350a2dc610c46b3e15bde98a15a084b75

test/variance_test.cc
vp9/common/vp9_filter.h
vp9/encoder/arm/neon/vp9_variance_neon.c
vp9/encoder/vp9_mcomp.c
vp9/encoder/vp9_variance.c
vp9/encoder/x86/vp9_highbd_subpel_variance.asm
vp9/encoder/x86/vp9_subpel_variance.asm
vp9/encoder/x86/vp9_subpel_variance_impl_intrin_avx2.c

index 02d0a33..08a247d 100644 (file)
@@ -21,6 +21,9 @@
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem.h"
+#if CONFIG_VP8_ENCODER
+# include "./vp8_rtcd.h"
+#endif  // CONFIG_VP8_ENCODER
 #if CONFIG_VP9_ENCODER
 # include "./vp9_rtcd.h"
 # include "vp9/encoder/vp9_variance.h"
@@ -32,10 +35,13 @@ namespace {
 typedef unsigned int (*VarianceMxNFunc)(const uint8_t *a, int a_stride,
                                         const uint8_t *b, int b_stride,
                                         unsigned int *sse);
+typedef unsigned int (*SubpixVarMxNFunc)(const uint8_t *a, int a_stride,
+                                         int xoffset, int yoffset,
+                                         const uint8_t *b, int b_stride,
+                                         unsigned int *sse);
 typedef unsigned int (*Get4x4SseFunc)(const uint8_t *a, int a_stride,
                                       const uint8_t *b, int b_stride);
 
-
 using ::std::tr1::get;
 using ::std::tr1::make_tuple;
 using ::std::tr1::tuple;
@@ -102,6 +108,12 @@ static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref,
                                 (l2w + l2h)));
 }
 
+/* The subpel reference functions differ from the codec version in one aspect:
+ * they calculate the bilinear factors directly instead of using a lookup table
+ * and therefore upshift xoff and yoff by 1. Only every other calculated value
+ * is used so the codec version shrinks the table to save space and maintain
+ * compatibility with vp8.
+ */
 static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
                                     int l2w, int l2h, int xoff, int yoff,
                                     uint32_t *sse_ptr,
@@ -111,6 +123,10 @@ static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
   uint64_t sse = 0;
   const int w = 1 << l2w;
   const int h = 1 << l2h;
+
+  xoff <<= 1;
+  yoff <<= 1;
+
   for (int y = 0; y < h; y++) {
     for (int x = 0; x < w; x++) {
       // Bilinear interpolation at a 16th pel step.
@@ -480,6 +496,10 @@ static uint32_t subpel_avg_variance_ref(const uint8_t *ref,
   uint64_t sse = 0;
   const int w = 1 << l2w;
   const int h = 1 << l2h;
+
+  xoff <<= 1;
+  yoff <<= 1;
+
   for (int y = 0; y < h; y++) {
     for (int x = 0; x < w; x++) {
       // bilinear interpolation at a 16th pel step
@@ -598,8 +618,8 @@ class SubpelVarianceTest
 
 template<typename SubpelVarianceFunctionType>
 void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
-  for (int x = 0; x < 16; ++x) {
-    for (int y = 0; y < 16; ++y) {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
       if (!use_high_bit_depth_) {
         for (int j = 0; j < block_size_; j++) {
           src_[j] = rnd_.Rand8();
@@ -621,8 +641,9 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
       unsigned int var1;
       ASM_REGISTER_STATE_CHECK(var1 = subpel_variance_(ref_, width_ + 1, x, y,
                                                        src_, width_, &sse1));
-      const unsigned int var2 = subpel_variance_ref(ref_, src_, log2width_,
-                                                    log2height_, x, y, &sse2,
+      const unsigned int var2 = subpel_variance_ref(ref_, src_,
+                                                    log2width_, log2height_,
+                                                    x, y, &sse2,
                                                     use_high_bit_depth_,
                                                     bit_depth_);
       EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
@@ -636,8 +657,8 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
   // Compare against reference.
   // Src: Set the first half of values to 0, the second half to the maximum.
   // Ref: Set the first half of values to the maximum, the second half to 0.
-  for (int x = 0; x < 16; ++x) {
-    for (int y = 0; y < 16; ++y) {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
       const int half = block_size_ / 2;
       if (!use_high_bit_depth_) {
         memset(src_, 0, half);
@@ -658,10 +679,10 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
       ASM_REGISTER_STATE_CHECK(
           var1 = subpel_variance_(ref_, width_ + 1, x, y, src_, width_, &sse1));
       const unsigned int var2 =
-          subpel_variance_ref(ref_, src_, log2width_, log2height_, x, y, &sse2,
-                              use_high_bit_depth_, bit_depth_);
-      EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
-      EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
+          subpel_variance_ref(ref_, src_, log2width_, log2height_,
+                              x, y, &sse2, use_high_bit_depth_, bit_depth_);
+      EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y;
+      EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y;
     }
   }
 }
@@ -669,8 +690,8 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
 #if CONFIG_VP9_ENCODER
 template<>
 void SubpelVarianceTest<vp9_subp_avg_variance_fn_t>::RefTest() {
-  for (int x = 0; x < 16; ++x) {
-    for (int y = 0; y < 16; ++y) {
+  for (int x = 0; x < 8; ++x) {
+    for (int y = 0; y < 8; ++y) {
       if (!use_high_bit_depth_) {
         for (int j = 0; j < block_size_; j++) {
           src_[j] = rnd_.Rand8();
@@ -795,7 +816,6 @@ const VarianceMxNFunc highbd_8_mse16x16_c = vpx_highbd_8_mse16x16_c;
 const VarianceMxNFunc highbd_8_mse16x8_c = vpx_highbd_8_mse16x8_c;
 const VarianceMxNFunc highbd_8_mse8x16_c = vpx_highbd_8_mse8x16_c;
 const VarianceMxNFunc highbd_8_mse8x8_c = vpx_highbd_8_mse8x8_c;
-
 INSTANTIATE_TEST_CASE_P(
     C, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_c),
                                         make_tuple(4, 4, highbd_12_mse16x8_c),
@@ -811,7 +831,6 @@ INSTANTIATE_TEST_CASE_P(
                                         make_tuple(4, 4, highbd_8_mse8x8_c)));
 */
 
-
 const VarianceMxNFunc highbd_12_variance64x64_c = vpx_highbd_12_variance64x64_c;
 const VarianceMxNFunc highbd_12_variance64x32_c = vpx_highbd_12_variance64x32_c;
 const VarianceMxNFunc highbd_12_variance32x64_c = vpx_highbd_12_variance32x64_c;
@@ -976,7 +995,6 @@ const VarianceMxNFunc highbd_8_mse16x16_sse2 = vpx_highbd_8_mse16x16_sse2;
 const VarianceMxNFunc highbd_8_mse16x8_sse2 = vpx_highbd_8_mse16x8_sse2;
 const VarianceMxNFunc highbd_8_mse8x16_sse2 = vpx_highbd_8_mse8x16_sse2;
 const VarianceMxNFunc highbd_8_mse8x8_sse2 = vpx_highbd_8_mse8x8_sse2;
-
 INSTANTIATE_TEST_CASE_P(
     SSE2, VpxHBDMseTest, ::testing::Values(make_tuple(4, 4, highbd_12_mse16x16_sse2),
                                            make_tuple(4, 3, highbd_12_mse16x8_sse2),
@@ -1088,8 +1106,15 @@ INSTANTIATE_TEST_CASE_P(
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_SSE2
 
+#if CONFIG_VP8
+typedef SubpelVarianceTest<SubpixVarMxNFunc> VP8SubpelVarianceTest;
+
+TEST_P(VP8SubpelVarianceTest, Ref) { RefTest(); }
+TEST_P(VP8SubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
+#endif  // CONFIG_VP8
+
 #if CONFIG_VP9_ENCODER
-typedef SubpelVarianceTest<vp9_subpixvariance_fn_t> VP9SubpelVarianceTest;
+typedef SubpelVarianceTest<SubpixVarMxNFunc> VP9SubpelVarianceTest;
 typedef SubpelVarianceTest<vp9_subp_avg_variance_fn_t> VP9SubpelAvgVarianceTest;
 
 TEST_P(VP9SubpelVarianceTest, Ref) { RefTest(); }
@@ -1097,7 +1122,7 @@ TEST_P(VP9SubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); }
 TEST_P(VP9SubpelAvgVarianceTest, Ref) { RefTest(); }
 
 #if CONFIG_VP9_HIGHBITDEPTH
-typedef SubpelVarianceTest<vp9_subpixvariance_fn_t> VP9SubpelVarianceHighTest;
+typedef SubpelVarianceTest<SubpixVarMxNFunc> VP9SubpelVarianceHighTest;
 typedef SubpelVarianceTest<vp9_subp_avg_variance_fn_t>
     VP9SubpelAvgVarianceHighTest;
 
@@ -1106,32 +1131,19 @@ TEST_P(VP9SubpelVarianceHighTest, ExtremeRef) { ExtremeRefTest(); }
 TEST_P(VP9SubpelAvgVarianceHighTest, Ref) { RefTest(); }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 
-const vp9_subpixvariance_fn_t subpel_variance4x4_c =
-    vp9_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t subpel_variance4x8_c =
-    vp9_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t subpel_variance8x4_c =
-    vp9_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t subpel_variance8x8_c =
-    vp9_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t subpel_variance8x16_c =
-    vp9_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t subpel_variance16x8_c =
-    vp9_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t subpel_variance16x16_c =
-    vp9_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t subpel_variance16x32_c =
-    vp9_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t subpel_variance32x16_c =
-    vp9_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t subpel_variance32x32_c =
-    vp9_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t subpel_variance32x64_c =
-    vp9_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t subpel_variance64x32_c =
-    vp9_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t subpel_variance64x64_c =
-    vp9_sub_pixel_variance64x64_c;
+const SubpixVarMxNFunc subpel_variance4x4_c = vp9_sub_pixel_variance4x4_c;
+const SubpixVarMxNFunc subpel_variance4x8_c = vp9_sub_pixel_variance4x8_c;
+const SubpixVarMxNFunc subpel_variance8x4_c = vp9_sub_pixel_variance8x4_c;
+const SubpixVarMxNFunc subpel_variance8x8_c = vp9_sub_pixel_variance8x8_c;
+const SubpixVarMxNFunc subpel_variance8x16_c = vp9_sub_pixel_variance8x16_c;
+const SubpixVarMxNFunc subpel_variance16x8_c = vp9_sub_pixel_variance16x8_c;
+const SubpixVarMxNFunc subpel_variance16x16_c = vp9_sub_pixel_variance16x16_c;
+const SubpixVarMxNFunc subpel_variance16x32_c = vp9_sub_pixel_variance16x32_c;
+const SubpixVarMxNFunc subpel_variance32x16_c = vp9_sub_pixel_variance32x16_c;
+const SubpixVarMxNFunc subpel_variance32x32_c = vp9_sub_pixel_variance32x32_c;
+const SubpixVarMxNFunc subpel_variance32x64_c = vp9_sub_pixel_variance32x64_c;
+const SubpixVarMxNFunc subpel_variance64x32_c = vp9_sub_pixel_variance64x32_c;
+const SubpixVarMxNFunc subpel_variance64x64_c = vp9_sub_pixel_variance64x64_c;
 INSTANTIATE_TEST_CASE_P(
     C, VP9SubpelVarianceTest,
     ::testing::Values(make_tuple(2, 2, subpel_variance4x4_c, 0),
@@ -1147,6 +1159,23 @@ INSTANTIATE_TEST_CASE_P(
                       make_tuple(5, 6, subpel_variance32x64_c, 0),
                       make_tuple(6, 5, subpel_variance64x32_c, 0),
                       make_tuple(6, 6, subpel_variance64x64_c, 0)));
+
+#if CONFIG_VP8
+const SubpixVarMxNFunc vp8_subpel_variance16x16_c =
+    vp8_sub_pixel_variance16x16_c;
+const SubpixVarMxNFunc vp8_subpel_variance16x8_c = vp8_sub_pixel_variance16x8_c;
+const SubpixVarMxNFunc vp8_subpel_variance8x16_c = vp8_sub_pixel_variance8x16_c;
+const SubpixVarMxNFunc vp8_subpel_variance8x8_c = vp8_sub_pixel_variance8x8_c;
+const SubpixVarMxNFunc vp8_subpel_variance4x4_c = vp8_sub_pixel_variance4x4_c;
+INSTANTIATE_TEST_CASE_P(
+    C, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(2, 2, vp8_subpel_variance4x4_c, 0),
+                      make_tuple(3, 3, vp8_subpel_variance8x8_c, 0),
+                      make_tuple(3, 4, vp8_subpel_variance8x16_c, 0),
+                      make_tuple(4, 3, vp8_subpel_variance16x8_c, 0),
+                      make_tuple(4, 4, vp8_subpel_variance16x16_c, 0)));
+#endif  // CONFIG_VP8
+
 const vp9_subp_avg_variance_fn_t subpel_avg_variance4x4_c =
     vp9_sub_pixel_avg_variance4x4_c;
 const vp9_subp_avg_variance_fn_t subpel_avg_variance4x8_c =
@@ -1189,83 +1218,83 @@ INSTANTIATE_TEST_CASE_P(
                       make_tuple(6, 5, subpel_avg_variance64x32_c, 0),
                       make_tuple(6, 6, subpel_avg_variance64x64_c, 0)));
 #if CONFIG_VP9_HIGHBITDEPTH
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance4x4_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance4x4_c =
     vp9_highbd_10_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance4x8_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance4x8_c =
     vp9_highbd_10_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x4_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x4_c =
     vp9_highbd_10_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x8_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x8_c =
     vp9_highbd_10_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x16_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x16_c =
     vp9_highbd_10_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x8_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x8_c =
     vp9_highbd_10_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x16_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x16_c =
     vp9_highbd_10_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x32_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x32_c =
     vp9_highbd_10_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x16_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x16_c =
     vp9_highbd_10_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x32_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x32_c =
     vp9_highbd_10_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x64_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x64_c =
     vp9_highbd_10_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x32_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x32_c =
     vp9_highbd_10_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x64_c =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x64_c =
     vp9_highbd_10_sub_pixel_variance64x64_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance4x4_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance4x4_c =
     vp9_highbd_12_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance4x8_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance4x8_c =
     vp9_highbd_12_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x4_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x4_c =
     vp9_highbd_12_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x8_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x8_c =
     vp9_highbd_12_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x16_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x16_c =
     vp9_highbd_12_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x8_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x8_c =
     vp9_highbd_12_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x16_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x16_c =
     vp9_highbd_12_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x32_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x32_c =
     vp9_highbd_12_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x16_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x16_c =
     vp9_highbd_12_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x32_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x32_c =
     vp9_highbd_12_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x64_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x64_c =
     vp9_highbd_12_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x32_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x32_c =
     vp9_highbd_12_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x64_c =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x64_c =
     vp9_highbd_12_sub_pixel_variance64x64_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance4x4_c =
+const SubpixVarMxNFunc highbd_subpel_variance4x4_c =
     vp9_highbd_sub_pixel_variance4x4_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance4x8_c =
+const SubpixVarMxNFunc highbd_subpel_variance4x8_c =
     vp9_highbd_sub_pixel_variance4x8_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x4_c =
+const SubpixVarMxNFunc highbd_subpel_variance8x4_c =
     vp9_highbd_sub_pixel_variance8x4_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x8_c =
+const SubpixVarMxNFunc highbd_subpel_variance8x8_c =
     vp9_highbd_sub_pixel_variance8x8_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x16_c =
+const SubpixVarMxNFunc highbd_subpel_variance8x16_c =
     vp9_highbd_sub_pixel_variance8x16_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x8_c =
+const SubpixVarMxNFunc highbd_subpel_variance16x8_c =
     vp9_highbd_sub_pixel_variance16x8_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x16_c =
+const SubpixVarMxNFunc highbd_subpel_variance16x16_c =
     vp9_highbd_sub_pixel_variance16x16_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x32_c =
+const SubpixVarMxNFunc highbd_subpel_variance16x32_c =
     vp9_highbd_sub_pixel_variance16x32_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x16_c =
+const SubpixVarMxNFunc highbd_subpel_variance32x16_c =
     vp9_highbd_sub_pixel_variance32x16_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x32_c =
+const SubpixVarMxNFunc highbd_subpel_variance32x32_c =
     vp9_highbd_sub_pixel_variance32x32_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x64_c =
+const SubpixVarMxNFunc highbd_subpel_variance32x64_c =
     vp9_highbd_sub_pixel_variance32x64_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x32_c =
+const SubpixVarMxNFunc highbd_subpel_variance64x32_c =
     vp9_highbd_sub_pixel_variance64x32_c;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x64_c =
+const SubpixVarMxNFunc highbd_subpel_variance64x64_c =
     vp9_highbd_sub_pixel_variance64x64_c;
 INSTANTIATE_TEST_CASE_P(
     C, VP9SubpelVarianceHighTest,
@@ -1431,34 +1460,48 @@ INSTANTIATE_TEST_CASE_P(
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // CONFIG_VP9_ENCODER
 
+#if CONFIG_VP8
+#if HAVE_MMX
+const SubpixVarMxNFunc subpel_variance16x16_mmx =
+    vp8_sub_pixel_variance16x16_mmx;
+const SubpixVarMxNFunc subpel_variance16x8_mmx = vp8_sub_pixel_variance16x8_mmx;
+const SubpixVarMxNFunc subpel_variance8x16_mmx = vp8_sub_pixel_variance8x16_mmx;
+const SubpixVarMxNFunc subpel_variance8x8_mmx = vp8_sub_pixel_variance8x8_mmx;
+const SubpixVarMxNFunc subpel_variance4x4_mmx = vp8_sub_pixel_variance4x4_mmx;
+INSTANTIATE_TEST_CASE_P(
+    MMX, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(4, 4, subpel_variance16x16_mmx, 0),
+                      make_tuple(4, 3, subpel_variance16x8_mmx, 0),
+                      make_tuple(3, 4, subpel_variance8x16_mmx, 0),
+                      make_tuple(3, 3, subpel_variance8x8_mmx, 0),
+                      make_tuple(2, 2, subpel_variance4x4_mmx, 0)));
+#endif  // HAVE_MMX
+#endif  // CONFIG_VP8
+
 #if CONFIG_VP9_ENCODER
 #if HAVE_SSE2
 #if CONFIG_USE_X86INC
-const vp9_subpixvariance_fn_t subpel_variance4x4_sse =
-    vp9_sub_pixel_variance4x4_sse;
-const vp9_subpixvariance_fn_t subpel_variance4x8_sse =
-    vp9_sub_pixel_variance4x8_sse;
-const vp9_subpixvariance_fn_t subpel_variance8x4_sse2 =
-    vp9_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t subpel_variance8x8_sse2 =
-    vp9_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc subpel_variance4x4_sse = vp9_sub_pixel_variance4x4_sse;
+const SubpixVarMxNFunc subpel_variance4x8_sse = vp9_sub_pixel_variance4x8_sse;
+const SubpixVarMxNFunc subpel_variance8x4_sse2 = vp9_sub_pixel_variance8x4_sse2;
+const SubpixVarMxNFunc subpel_variance8x8_sse2 = vp9_sub_pixel_variance8x8_sse2;
+const SubpixVarMxNFunc subpel_variance8x16_sse2 =
     vp9_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc subpel_variance16x8_sse2 =
     vp9_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc subpel_variance16x16_sse2 =
     vp9_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc subpel_variance16x32_sse2 =
     vp9_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc subpel_variance32x16_sse2 =
     vp9_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc subpel_variance32x32_sse2 =
     vp9_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc subpel_variance32x64_sse2 =
     vp9_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc subpel_variance64x32_sse2 =
     vp9_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc subpel_variance64x64_sse2 =
     vp9_sub_pixel_variance64x64_sse2;
 INSTANTIATE_TEST_CASE_P(
     SSE2, VP9SubpelVarianceTest,
@@ -1517,71 +1560,71 @@ INSTANTIATE_TEST_CASE_P(
                       make_tuple(6, 5, subpel_avg_variance64x32_sse2, 0),
                       make_tuple(6, 6, subpel_avg_variance64x64_sse2, 0)));
 #if CONFIG_VP9_HIGHBITDEPTH
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x4_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance8x4_sse2 =
     vp9_highbd_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x8_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance8x8_sse2 =
     vp9_highbd_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance8x16_sse2 =
     vp9_highbd_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance16x8_sse2 =
     vp9_highbd_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance16x16_sse2 =
     vp9_highbd_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance16x32_sse2 =
     vp9_highbd_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance32x16_sse2 =
     vp9_highbd_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance32x32_sse2 =
     vp9_highbd_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance32x64_sse2 =
     vp9_highbd_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance64x32_sse2 =
     vp9_highbd_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t highbd_subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc highbd_subpel_variance64x64_sse2 =
     vp9_highbd_sub_pixel_variance64x64_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x4_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x4_sse2 =
     vp9_highbd_10_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x8_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x8_sse2 =
     vp9_highbd_10_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance8x16_sse2 =
     vp9_highbd_10_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x8_sse2 =
     vp9_highbd_10_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x16_sse2 =
     vp9_highbd_10_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance16x32_sse2 =
     vp9_highbd_10_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x16_sse2 =
     vp9_highbd_10_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x32_sse2 =
     vp9_highbd_10_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance32x64_sse2 =
     vp9_highbd_10_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x32_sse2 =
     vp9_highbd_10_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t highbd_10_subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc highbd_10_subpel_variance64x64_sse2 =
     vp9_highbd_10_sub_pixel_variance64x64_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x4_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x4_sse2 =
     vp9_highbd_12_sub_pixel_variance8x4_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x8_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x8_sse2 =
     vp9_highbd_12_sub_pixel_variance8x8_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance8x16_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance8x16_sse2 =
     vp9_highbd_12_sub_pixel_variance8x16_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x8_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x8_sse2 =
     vp9_highbd_12_sub_pixel_variance16x8_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x16_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x16_sse2 =
     vp9_highbd_12_sub_pixel_variance16x16_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance16x32_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance16x32_sse2 =
     vp9_highbd_12_sub_pixel_variance16x32_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x16_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x16_sse2 =
     vp9_highbd_12_sub_pixel_variance32x16_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x32_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x32_sse2 =
     vp9_highbd_12_sub_pixel_variance32x32_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance32x64_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance32x64_sse2 =
     vp9_highbd_12_sub_pixel_variance32x64_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x32_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x32_sse2 =
     vp9_highbd_12_sub_pixel_variance64x32_sse2;
-const vp9_subpixvariance_fn_t highbd_12_subpel_variance64x64_sse2 =
+const SubpixVarMxNFunc highbd_12_subpel_variance64x64_sse2 =
     vp9_highbd_12_sub_pixel_variance64x64_sse2;
 INSTANTIATE_TEST_CASE_P(
     SSE2, VP9SubpelVarianceHighTest,
@@ -1725,35 +1768,56 @@ INSTANTIATE_TEST_CASE_P(
 #endif  // HAVE_SSE2
 #endif  // CONFIG_VP9_ENCODER
 
+#if CONFIG_VP8
+#if HAVE_SSE2
+const SubpixVarMxNFunc vp8_subpel_variance16x16_sse2 =
+    vp8_sub_pixel_variance16x16_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance16x8_sse2 =
+    vp8_sub_pixel_variance16x8_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance8x16_sse2 =
+    vp8_sub_pixel_variance8x16_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance8x8_sse2 =
+    vp8_sub_pixel_variance8x8_wmt;
+const SubpixVarMxNFunc vp8_subpel_variance4x4_sse2 =
+    vp8_sub_pixel_variance4x4_wmt;
+INSTANTIATE_TEST_CASE_P(
+    SSE2, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(2, 2, vp8_subpel_variance4x4_sse2, 0),
+                      make_tuple(3, 3, vp8_subpel_variance8x8_sse2, 0),
+                      make_tuple(3, 4, vp8_subpel_variance8x16_sse2, 0),
+                      make_tuple(4, 3, vp8_subpel_variance16x8_sse2, 0),
+                      make_tuple(4, 4, vp8_subpel_variance16x16_sse2, 0)));
+#endif  // HAVE_SSE2
+#endif  // CONFIG_VP8
+
 #if CONFIG_VP9_ENCODER
 #if HAVE_SSSE3
 #if CONFIG_USE_X86INC
-
-const vp9_subpixvariance_fn_t subpel_variance4x4_ssse3 =
+const SubpixVarMxNFunc subpel_variance4x4_ssse3 =
     vp9_sub_pixel_variance4x4_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance4x8_ssse3 =
+const SubpixVarMxNFunc subpel_variance4x8_ssse3 =
     vp9_sub_pixel_variance4x8_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance8x4_ssse3 =
+const SubpixVarMxNFunc subpel_variance8x4_ssse3 =
     vp9_sub_pixel_variance8x4_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance8x8_ssse3 =
+const SubpixVarMxNFunc subpel_variance8x8_ssse3 =
     vp9_sub_pixel_variance8x8_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance8x16_ssse3 =
+const SubpixVarMxNFunc subpel_variance8x16_ssse3 =
     vp9_sub_pixel_variance8x16_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance16x8_ssse3 =
+const SubpixVarMxNFunc subpel_variance16x8_ssse3 =
     vp9_sub_pixel_variance16x8_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance16x16_ssse3 =
+const SubpixVarMxNFunc subpel_variance16x16_ssse3 =
     vp9_sub_pixel_variance16x16_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance16x32_ssse3 =
+const SubpixVarMxNFunc subpel_variance16x32_ssse3 =
     vp9_sub_pixel_variance16x32_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance32x16_ssse3 =
+const SubpixVarMxNFunc subpel_variance32x16_ssse3 =
     vp9_sub_pixel_variance32x16_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance32x32_ssse3 =
+const SubpixVarMxNFunc subpel_variance32x32_ssse3 =
     vp9_sub_pixel_variance32x32_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance32x64_ssse3 =
+const SubpixVarMxNFunc subpel_variance32x64_ssse3 =
     vp9_sub_pixel_variance32x64_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance64x32_ssse3 =
+const SubpixVarMxNFunc subpel_variance64x32_ssse3 =
     vp9_sub_pixel_variance64x32_ssse3;
-const vp9_subpixvariance_fn_t subpel_variance64x64_ssse3 =
+const SubpixVarMxNFunc subpel_variance64x64_ssse3 =
     vp9_sub_pixel_variance64x64_ssse3;
 INSTANTIATE_TEST_CASE_P(
     SSSE3, VP9SubpelVarianceTest,
@@ -1815,6 +1879,19 @@ INSTANTIATE_TEST_CASE_P(
 #endif  // HAVE_SSSE3
 #endif  // CONFIG_VP9_ENCODER
 
+#if CONFIG_VP8
+#if HAVE_SSSE3
+const SubpixVarMxNFunc vp8_subpel_variance16x16_ssse3 =
+    vp8_sub_pixel_variance16x16_ssse3;
+const SubpixVarMxNFunc vp8_subpel_variance16x8_ssse3 =
+    vp8_sub_pixel_variance16x8_ssse3;
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(4, 3, vp8_subpel_variance16x8_ssse3, 0),
+                      make_tuple(4, 4, vp8_subpel_variance16x16_ssse3, 0)));
+#endif  // HAVE_SSSE3
+#endif  // CONFIG_VP8
+
 #if HAVE_AVX2
 const VarianceMxNFunc mse16x16_avx2 = vpx_mse16x16_avx2;
 INSTANTIATE_TEST_CASE_P(AVX2, VpxMseTest,
@@ -1834,9 +1911,9 @@ INSTANTIATE_TEST_CASE_P(
                       make_tuple(4, 4, variance16x16_avx2, 0)));
 
 #if CONFIG_VP9_ENCODER
-const vp9_subpixvariance_fn_t subpel_variance32x32_avx2 =
+const SubpixVarMxNFunc subpel_variance32x32_avx2 =
     vp9_sub_pixel_variance32x32_avx2;
-const vp9_subpixvariance_fn_t subpel_variance64x64_avx2 =
+const SubpixVarMxNFunc subpel_variance64x64_avx2 =
     vp9_sub_pixel_variance64x64_avx2;
 INSTANTIATE_TEST_CASE_P(
     AVX2, VP9SubpelVarianceTest,
@@ -1854,6 +1931,19 @@ INSTANTIATE_TEST_CASE_P(
 #endif  // CONFIG_VP9_ENCODER
 #endif  // HAVE_AVX2
 
+#if CONFIG_VP8
+#if HAVE_MEDIA
+const SubpixVarMxNFunc subpel_variance16x16_media =
+    vp8_sub_pixel_variance16x16_armv6;
+const SubpixVarMxNFunc subpel_variance8x8_media =
+    vp8_sub_pixel_variance8x8_armv6;
+INSTANTIATE_TEST_CASE_P(
+    MEDIA, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(3, 3, subpel_variance8x8_media, 0),
+                      make_tuple(4, 4, subpel_variance16x16_media, 0)));
+#endif  // HAVE_MEDIA
+#endif  // CONFIG_VP8
+
 #if HAVE_NEON
 const Get4x4SseFunc get4x4sse_cs_neon = vpx_get4x4sse_cs_neon;
 INSTANTIATE_TEST_CASE_P(NEON, VpxSseTest,
@@ -1882,14 +1972,26 @@ INSTANTIATE_TEST_CASE_P(
                       make_tuple(3, 4, variance8x16_neon, 0),
                       make_tuple(3, 3, variance8x8_neon, 0)));
 
+#if CONFIG_VP8
+#if HAVE_NEON_ASM
+const SubpixVarMxNFunc vp8_subpel_variance16x16_neon =
+    vp8_sub_pixel_variance16x16_neon;
+const SubpixVarMxNFunc vp8_subpel_variance8x8_neon =
+    vp8_sub_pixel_variance8x8_neon;
+INSTANTIATE_TEST_CASE_P(
+    NEON, VP8SubpelVarianceTest,
+    ::testing::Values(make_tuple(3, 3, vp8_subpel_variance8x8_neon, 0),
+                      make_tuple(4, 4, vp8_subpel_variance16x16_neon, 0)));
+#endif  // HAVE_NEON_ASM
+#endif  // CONFIG_VP8
+
 #if CONFIG_VP9_ENCODER
-const vp9_subpixvariance_fn_t subpel_variance8x8_neon =
-    vp9_sub_pixel_variance8x8_neon;
-const vp9_subpixvariance_fn_t subpel_variance16x16_neon =
+const SubpixVarMxNFunc subpel_variance8x8_neon = vp9_sub_pixel_variance8x8_neon;
+const SubpixVarMxNFunc subpel_variance16x16_neon =
     vp9_sub_pixel_variance16x16_neon;
-const vp9_subpixvariance_fn_t subpel_variance32x32_neon =
+const SubpixVarMxNFunc subpel_variance32x32_neon =
     vp9_sub_pixel_variance32x32_neon;
-const vp9_subpixvariance_fn_t subpel_variance64x64_neon =
+const SubpixVarMxNFunc subpel_variance64x64_neon =
     vp9_sub_pixel_variance64x64_neon;
 INSTANTIATE_TEST_CASE_P(
     NEON, VP9SubpelVarianceTest,
index d963ee2..808a270 100644 (file)
@@ -43,14 +43,6 @@ typedef int16_t InterpKernel[SUBPEL_TAPS];
 
 const InterpKernel *vp9_get_interp_kernel(INTERP_FILTER filter);
 
-DECLARE_ALIGNED(256, extern const InterpKernel,
-                vp9_bilinear_filters[SUBPEL_SHIFTS]);
-
-// The VP9_BILINEAR_FILTERS_2TAP macro returns a pointer to the bilinear
-// filter kernel as a 2 tap filter.
-#define BILINEAR_FILTERS_2TAP(x) \
-  (vp9_bilinear_filters[(x)] + SUBPEL_TAPS/2 - 1)
-
 #ifdef __cplusplus
 }  // extern "C"
 #endif
index 166156a..0ac194e 100644 (file)
 #include "vpx_ports/mem.h"
 #include "vpx/vpx_integer.h"
 
-#include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_filter.h"
 
-#include "vp9/encoder/vp9_variance.h"
+static uint8_t bilinear_filters[8][2] = {
+  { 128,   0, },
+  { 112,  16, },
+  {  96,  32, },
+  {  80,  48, },
+  {  64,  64, },
+  {  48,  80, },
+  {  32,  96, },
+  {  16, 112, },
+};
 
 static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
                                       uint8_t *output_ptr,
@@ -27,9 +35,9 @@ static void var_filter_block2d_bil_w8(const uint8_t *src_ptr,
                                       int pixel_step,
                                       unsigned int output_height,
                                       unsigned int output_width,
-                                      const int16_t *vp9_filter) {
-  const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
-  const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
+                                      const uint8_t *vp9_filter) {
+  const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
+  const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
   unsigned int i;
   for (i = 0; i < output_height; ++i) {
     const uint8x8_t src_0 = vld1_u8(&src_ptr[0]);
@@ -50,9 +58,9 @@ static void var_filter_block2d_bil_w16(const uint8_t *src_ptr,
                                        int pixel_step,
                                        unsigned int output_height,
                                        unsigned int output_width,
-                                       const int16_t *vp9_filter) {
-  const uint8x8_t f0 = vmov_n_u8((uint8_t)vp9_filter[0]);
-  const uint8x8_t f1 = vmov_n_u8((uint8_t)vp9_filter[1]);
+                                       const uint8_t *vp9_filter) {
+  const uint8x8_t f0 = vmov_n_u8(vp9_filter[0]);
+  const uint8x8_t f1 = vmov_n_u8(vp9_filter[1]);
   unsigned int i, j;
   for (i = 0; i < output_height; ++i) {
     for (j = 0; j < output_width; j += 16) {
@@ -84,9 +92,9 @@ unsigned int vp9_sub_pixel_variance8x8_neon(const uint8_t *src,
 
   var_filter_block2d_bil_w8(src, fdata3, src_stride, 1,
                             9, 8,
-                            BILINEAR_FILTERS_2TAP(xoffset));
+                            bilinear_filters[xoffset]);
   var_filter_block2d_bil_w8(fdata3, temp2, 8, 8, 8,
-                            8, BILINEAR_FILTERS_2TAP(yoffset));
+                            8, bilinear_filters[yoffset]);
   return vpx_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
 }
 
@@ -102,9 +110,9 @@ unsigned int vp9_sub_pixel_variance16x16_neon(const uint8_t *src,
 
   var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
                              17, 16,
-                             BILINEAR_FILTERS_2TAP(xoffset));
+                             bilinear_filters[xoffset]);
   var_filter_block2d_bil_w16(fdata3, temp2, 16, 16, 16,
-                             16, BILINEAR_FILTERS_2TAP(yoffset));
+                             16, bilinear_filters[yoffset]);
   return vpx_variance16x16_neon(temp2, 16, dst, dst_stride, sse);
 }
 
@@ -120,9 +128,9 @@ unsigned int vp9_sub_pixel_variance32x32_neon(const uint8_t *src,
 
   var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
                              33, 32,
-                             BILINEAR_FILTERS_2TAP(xoffset));
+                             bilinear_filters[xoffset]);
   var_filter_block2d_bil_w16(fdata3, temp2, 32, 32, 32,
-                             32, BILINEAR_FILTERS_2TAP(yoffset));
+                             32, bilinear_filters[yoffset]);
   return vpx_variance32x32_neon(temp2, 32, dst, dst_stride, sse);
 }
 
@@ -138,8 +146,8 @@ unsigned int vp9_sub_pixel_variance64x64_neon(const uint8_t *src,
 
   var_filter_block2d_bil_w16(src, fdata3, src_stride, 1,
                              65, 64,
-                             BILINEAR_FILTERS_2TAP(xoffset));
+                             bilinear_filters[xoffset]);
   var_filter_block2d_bil_w16(fdata3, temp2, 64, 64, 64,
-                             64, BILINEAR_FILTERS_2TAP(yoffset));
+                             64, bilinear_filters[yoffset]);
   return vpx_variance64x64_neon(temp2, 64, dst, dst_stride, sse);
 }
index 15f9582..2342726 100644 (file)
@@ -162,9 +162,9 @@ void vp9_init3smotion_compensation(search_site_config *cfg, int stride) {
       error_per_bit + 4096) >> 13 : 0)
 
 
-// convert motion vector component to offset for svf calc
+// convert motion vector component to offset for sv[a]f calc
 static INLINE int sp(int x) {
-  return (x & 7) << 1;
+  return x & 7;
 }
 
 static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
@@ -679,16 +679,14 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
       tc = bc + search_step[idx].col;
       if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
         const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
-        int row_offset = (tr & 0x07) << 1;
-        int col_offset = (tc & 0x07) << 1;
         MV this_mv;
         this_mv.row = tr;
         this_mv.col = tc;
         if (second_pred == NULL)
-          thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
+          thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
                              src_address, src_stride, &sse);
         else
-          thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
+          thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
                               src_address, src_stride, &sse, second_pred);
         cost_array[idx] = thismse +
             mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
@@ -709,14 +707,12 @@ int vp9_find_best_sub_pixel_tree(const MACROBLOCK *x,
     tr = br + (cost_array[2] < cost_array[3] ? -hstep : hstep);
     if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
       const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
-      int row_offset = (tr & 0x07) << 1;
-      int col_offset = (tc & 0x07) << 1;
       MV this_mv = {tr, tc};
       if (second_pred == NULL)
-        thismse = vfp->svf(pre_address, y_stride, col_offset, row_offset,
+        thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
                            src_address, src_stride, &sse);
       else
-        thismse = vfp->svaf(pre_address, y_stride, col_offset, row_offset,
+        thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
                             src_address, src_stride, &sse, second_pred);
       cost_array[4] = thismse +
           mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit);
index 1f6b083..c571b7c 100644 (file)
 
 #include "vp9/encoder/vp9_variance.h"
 
+static uint8_t bilinear_filters[8][2] = {
+  { 128,   0, },
+  { 112,  16, },
+  {  96,  32, },
+  {  80,  48, },
+  {  64,  64, },
+  {  48,  80, },
+  {  32,  96, },
+  {  16, 112, },
+};
+
 // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal
 // or vertical direction to produce the filtered output block. Used to implement
 // first-pass of 2-D separable filter.
@@ -33,7 +44,7 @@ static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr,
                                               int pixel_step,
                                               unsigned int output_height,
                                               unsigned int output_width,
-                                              const int16_t *vp9_filter) {
+                                              const uint8_t *vp9_filter) {
   unsigned int i, j;
 
   for (i = 0; i < output_height; i++) {
@@ -65,7 +76,7 @@ static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr,
                                                unsigned int pixel_step,
                                                unsigned int output_height,
                                                unsigned int output_width,
-                                               const int16_t *vp9_filter) {
+                                               const uint8_t *vp9_filter) {
   unsigned int  i, j;
 
   for (i = 0; i < output_height; i++) {
@@ -91,9 +102,9 @@ unsigned int vp9_sub_pixel_variance##W##x##H##_c( \
   uint8_t temp2[H * W]; \
 \
   var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
-                                    BILINEAR_FILTERS_2TAP(xoffset)); \
+                                    bilinear_filters[xoffset]); \
   var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                     BILINEAR_FILTERS_2TAP(yoffset)); \
+                                     bilinear_filters[yoffset]); \
 \
   return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \
 }
@@ -110,9 +121,9 @@ unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \
   DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \
 \
   var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \
-                                    BILINEAR_FILTERS_2TAP(xoffset)); \
+                                    bilinear_filters[xoffset]); \
   var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                     BILINEAR_FILTERS_2TAP(yoffset)); \
+                                     bilinear_filters[yoffset]); \
 \
   vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \
 \
@@ -166,7 +177,7 @@ static void highbd_var_filter_block2d_bil_first_pass(
     int pixel_step,
     unsigned int output_height,
     unsigned int output_width,
-    const int16_t *vp9_filter) {
+    const uint8_t *vp9_filter) {
   unsigned int i, j;
   uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8);
   for (i = 0; i < output_height; i++) {
@@ -192,7 +203,7 @@ static void highbd_var_filter_block2d_bil_second_pass(
     unsigned int pixel_step,
     unsigned int output_height,
     unsigned int output_width,
-    const int16_t *vp9_filter) {
+    const uint8_t *vp9_filter) {
   unsigned int  i, j;
 
   for (i = 0; i < output_height; i++) {
@@ -219,9 +230,9 @@ unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \
   uint16_t temp2[H * W]; \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
   return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \
                                           dst_stride, sse); \
@@ -236,9 +247,9 @@ unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \
   uint16_t temp2[H * W]; \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
   return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
                                              W, dst, dst_stride, sse); \
@@ -253,9 +264,9 @@ unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \
   uint16_t temp2[H * W]; \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
   return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \
                                              W, dst, dst_stride, sse); \
@@ -273,9 +284,9 @@ unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \
   DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
   vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
                            CONVERT_TO_BYTEPTR(temp2), W); \
@@ -295,9 +306,9 @@ unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \
   DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
   vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
                            CONVERT_TO_BYTEPTR(temp2), W); \
@@ -317,9 +328,9 @@ unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \
   DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \
 \
   highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \
-                                           W, BILINEAR_FILTERS_2TAP(xoffset)); \
+                                           W, bilinear_filters[xoffset]); \
   highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \
-                                            BILINEAR_FILTERS_2TAP(yoffset)); \
+                                            bilinear_filters[yoffset]); \
 \
   vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \
                            CONVERT_TO_BYTEPTR(temp2), W); \
index 987729f..4594bb1 100644 (file)
@@ -14,35 +14,19 @@ SECTION_RODATA
 pw_8: times  8 dw  8
 bilin_filter_m_sse2: times  8 dw 16
                      times  8 dw  0
-                     times  8 dw 15
-                     times  8 dw  1
                      times  8 dw 14
                      times  8 dw  2
-                     times  8 dw 13
-                     times  8 dw  3
                      times  8 dw 12
                      times  8 dw  4
-                     times  8 dw 11
-                     times  8 dw  5
                      times  8 dw 10
                      times  8 dw  6
-                     times  8 dw  9
-                     times  8 dw  7
                      times 16 dw  8
-                     times  8 dw  7
-                     times  8 dw  9
                      times  8 dw  6
                      times  8 dw 10
-                     times  8 dw  5
-                     times  8 dw 11
                      times  8 dw  4
                      times  8 dw 12
-                     times  8 dw  3
-                     times  8 dw 13
                      times  8 dw  2
                      times  8 dw 14
-                     times  8 dw  1
-                     times  8 dw 15
 
 SECTION .text
 
index 06b8b03..292cf34 100644 (file)
@@ -14,52 +14,28 @@ SECTION_RODATA
 pw_8: times  8 dw  8
 bilin_filter_m_sse2: times  8 dw 16
                      times  8 dw  0
-                     times  8 dw 15
-                     times  8 dw  1
                      times  8 dw 14
                      times  8 dw  2
-                     times  8 dw 13
-                     times  8 dw  3
                      times  8 dw 12
                      times  8 dw  4
-                     times  8 dw 11
-                     times  8 dw  5
                      times  8 dw 10
                      times  8 dw  6
-                     times  8 dw  9
-                     times  8 dw  7
                      times 16 dw  8
-                     times  8 dw  7
-                     times  8 dw  9
                      times  8 dw  6
                      times  8 dw 10
-                     times  8 dw  5
-                     times  8 dw 11
                      times  8 dw  4
                      times  8 dw 12
-                     times  8 dw  3
-                     times  8 dw 13
                      times  8 dw  2
                      times  8 dw 14
-                     times  8 dw  1
-                     times  8 dw 15
 
 bilin_filter_m_ssse3: times  8 db 16,  0
-                      times  8 db 15,  1
                       times  8 db 14,  2
-                      times  8 db 13,  3
                       times  8 db 12,  4
-                      times  8 db 11,  5
                       times  8 db 10,  6
-                      times  8 db  9,  7
                       times 16 db  8
-                      times  8 db  7,  9
                       times  8 db  6, 10
-                      times  8 db  5, 11
                       times  8 db  4, 12
-                      times  8 db  3, 13
                       times  8 db  2, 14
-                      times  8 db  1, 15
 
 SECTION .text
 
index 19ac5c0..b1c7975 100644 (file)
 DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = {
   16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
   16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0,
-  15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
-  15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1,
   14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
   14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2,
-  13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
-  13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3,
   12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
   12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4,
-  11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
-  11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5,
   10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
   10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6,
-  9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
-  9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7,
   8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
   8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-  7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
-  7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9,
   6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
   6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10,
-  5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
-  5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11, 5, 11,
   4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
   4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12,
-  3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
-  3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13, 3, 13,
   2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
   2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14,
-  1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15,
-  1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15, 1, 15
 };
 
 #define FILTER_SRC(filter) \