Add Neon intrinsics for vp9_avg_8x8_neon

author Frank Galligan <fgalligan@google.com>

Thu, 15 Jan 2015 22:36:41 +0000 (14:36 -0800)

committer Frank Galligan <fgalligan@google.com>

Thu, 15 Jan 2015 23:32:40 +0000 (15:32 -0800)
author Frank Galligan <fgalligan@google.com>
Thu, 15 Jan 2015 22:36:41 +0000 (14:36 -0800)
committer Frank Galligan <fgalligan@google.com>
Thu, 15 Jan 2015 23:32:40 +0000 (15:32 -0800)
diff --git a/test/vp9_avg_test.cc b/test/vp9_avg_test.cc

index fa04528..252ed4e 100644 (file)
--- a/test/vp9_avg_test.cc
+++ b/test/vp9_avg_test.cc
@@ -165,4 +165,14 @@ INSTANTIATE_TEST_CASE_P(
  
  #endif
  
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, AverageTest,
+    ::testing::Values(
+        make_tuple(16, 16, 0, 8, &vp9_avg_8x8_neon),
+        make_tuple(16, 16, 5, 8, &vp9_avg_8x8_neon),
+        make_tuple(32, 32, 15, 8, &vp9_avg_8x8_neon)));
+
+#endif
+
  }  // namespace
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl

index 11df21f..4e9ec0f 100644 (file)
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -1098,7 +1098,7 @@ add_proto qw/unsigned int vp9_get_mb_ss/, "const int16_t *";
  specialize qw/vp9_get_mb_ss/, "$sse2_x86inc";
  
  add_proto qw/unsigned int vp9_avg_8x8/, "const uint8_t *, int p";
-specialize qw/vp9_avg_8x8 sse2/;
+specialize qw/vp9_avg_8x8 sse2 neon/;
  
  add_proto qw/unsigned int vp9_avg_4x4/, "const uint8_t *, int p";
  specialize qw/vp9_avg_4x4 sse2/;
diff --git a/vp9/encoder/arm/neon/vp9_avg_neon.c b/vp9/encoder/arm/neon/vp9_avg_neon.c

new file mode 100644 (file)

index 0000000..f505fcb
--- /dev/null
+++ b/vp9/encoder/arm/neon/vp9_avg_neon.c
@@ -0,0 +1,49 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+
+#include "vpx/vpx_integer.h"
+
+static INLINE unsigned int horizontal_add_u16x8(const uint16x8_t v_16x8) {
+  const uint32x4_t a = vpaddlq_u16(v_16x8);
+  const uint64x2_t b = vpaddlq_u32(a);
+  const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
+                                vreinterpret_u32_u64(vget_high_u64(b)));
+  return vget_lane_u32(c, 0);
+}
+
+unsigned int vp9_avg_8x8_neon(const uint8_t *s, int p) {
+  uint8x8_t v_s0 = vld1_u8(s);
+  const uint8x8_t v_s1 = vld1_u8(s + p);
+  uint16x8_t v_sum = vaddl_u8(v_s0, v_s1);
+
+  v_s0 = vld1_u8(s + 2 * p);
+  v_sum = vaddw_u8(v_sum, v_s0);
+
+  v_s0 = vld1_u8(s + 3 * p);
+  v_sum = vaddw_u8(v_sum, v_s0);
+
+  v_s0 = vld1_u8(s + 4 * p);
+  v_sum = vaddw_u8(v_sum, v_s0);
+
+  v_s0 = vld1_u8(s + 5 * p);
+  v_sum = vaddw_u8(v_sum, v_s0);
+
+  v_s0 = vld1_u8(s + 6 * p);
+  v_sum = vaddw_u8(v_sum, v_s0);
+
+  v_s0 = vld1_u8(s + 7 * p);
+  v_sum = vaddw_u8(v_sum, v_s0);
+
+  return (horizontal_add_u16x8(v_sum) + 32) >> 6;
+}
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk

index c75fd8a..33a1e67 100644 (file)
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -150,6 +150,7 @@ VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_dct_avx2.c
  VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_error_intrin_avx2.c
  VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_variance_avx2.c
  
+VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_avg_neon.c
  VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_sad_neon.c
  VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_dct_neon.c
  VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_variance_neon.c
author	Frank Galligan <fgalligan@google.com>
	Thu, 15 Jan 2015 22:36:41 +0000 (14:36 -0800)
committer	Frank Galligan <fgalligan@google.com>
	Thu, 15 Jan 2015 23:32:40 +0000 (15:32 -0800)
test/vp9_avg_test.cc		patch \| blob \| history
vp9/common/vp9_rtcd_defs.pl		patch \| blob \| history
vp9/encoder/arm/neon/vp9_avg_neon.c	[new file with mode: 0644]	patch \| blob
vp9/vp9cx.mk		patch \| blob \| history