block error avx2: use tran_low_t

author Johann <johannkoenig@google.com>

Thu, 16 Feb 2017 19:12:31 +0000 (11:12 -0800)

committer Johann <johannkoenig@google.com>

Thu, 16 Feb 2017 20:39:02 +0000 (12:39 -0800)
author Johann <johannkoenig@google.com>
Thu, 16 Feb 2017 19:12:31 +0000 (11:12 -0800)
committer Johann <johannkoenig@google.com>
Thu, 16 Feb 2017 20:39:02 +0000 (12:39 -0800)
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl

index 720e171..bf6de44 100644 (file)
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -125,6 +125,7 @@ if (vpx_config("CONFIG_VP9_TEMPORAL_DENOISING") eq "yes") {
  
  if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
    add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz";
+  specialize qw/vp9_block_error avx2/;
  
    add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
    specialize qw/vp9_highbd_block_error sse2/;
diff --git a/vp9/encoder/x86/vp9_error_intrin_avx2.c b/vp9/encoder/x86/vp9_error_intrin_avx2.c

index 453af2a..e39027f 100644 (file)
--- a/vp9/encoder/x86/vp9_error_intrin_avx2.c
+++ b/vp9/encoder/x86/vp9_error_intrin_avx2.c
@@ -12,8 +12,10 @@
  
  #include "./vp9_rtcd.h"
  #include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/x86/bitdepth_conversion_avx2.h"
  
-int64_t vp9_block_error_avx2(const int16_t *coeff, const int16_t *dqcoeff,
+int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff,
                               intptr_t block_size, int64_t *ssz) {
    __m256i sse_reg, ssz_reg, coeff_reg, dqcoeff_reg;
    __m256i exp_dqcoeff_lo, exp_dqcoeff_hi, exp_coeff_lo, exp_coeff_hi;
@@ -29,8 +31,8 @@ int64_t vp9_block_error_avx2(const int16_t *coeff, const int16_t *dqcoeff,
  
    for (i = 0; i < block_size; i += 16) {
      // load 32 bytes from coeff and dqcoeff
-    coeff_reg = _mm256_loadu_si256((const __m256i *)(coeff + i));
-    dqcoeff_reg = _mm256_loadu_si256((const __m256i *)(dqcoeff + i));
+    coeff_reg = load_tran_low(coeff + i);
+    dqcoeff_reg = load_tran_low(dqcoeff + i);
      // dqcoeff - coeff
      dqcoeff_reg = _mm256_sub_epi16(dqcoeff_reg, coeff_reg);
      // madd (dqcoeff - coeff)
diff --git a/vpx_dsp/vpx_dsp.mk b/vpx_dsp/vpx_dsp.mk

index 33c9e51..ae4f7d8 100644 (file)
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -13,6 +13,7 @@ DSP_SRCS-yes += vpx_dsp_common.h
  
  DSP_SRCS-$(HAVE_MSA)    += mips/macros_msa.h
  
+DSP_SRCS-$(HAVE_AVX2)   += x86/bitdepth_conversion_avx2.h
  DSP_SRCS-$(HAVE_SSE2)   += x86/bitdepth_conversion_sse2.h
  # This file is included in libs.mk. Including it here would cause it to be
  # compiled into an object. Even as an empty file, this would create an
diff --git a/vpx_dsp/x86/bitdepth_conversion_avx2.h b/vpx_dsp/x86/bitdepth_conversion_avx2.h

new file mode 100644 (file)

index 0000000..b9116f0
--- /dev/null
+++ b/vpx_dsp/x86/bitdepth_conversion_avx2.h
@@ -0,0 +1,30 @@
+/*
+ *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef VPX_DSP_X86_BITDEPTH_CONVERSION_AVX2_H_
+#define VPX_DSP_X86_BITDEPTH_CONVERSION_AVX2_H_
+
+#include <immintrin.h>
+
+#include "./vpx_config.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+
+// Load 16 16 bit values. If the source is 32 bits then pack down with
+// saturation.
+static INLINE __m256i load_tran_low(const tran_low_t *a) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  const __m256i a_low = _mm256_loadu_si256((const __m256i *)a);
+  return _mm256_packs_epi32(a_low, *(const __m256i *)(a + 8));
+#else
+  return _mm256_loadu_si256((const __m256i *)a);
+#endif
+}
+
+#endif  // VPX_DSP_X86_BITDEPTH_CONVERSION_AVX2_H_
author	Johann <johannkoenig@google.com>
	Thu, 16 Feb 2017 19:12:31 +0000 (11:12 -0800)
committer	Johann <johannkoenig@google.com>
	Thu, 16 Feb 2017 20:39:02 +0000 (12:39 -0800)
vp9/common/vp9_rtcd_defs.pl		patch \| blob \| history
vp9/encoder/x86/vp9_error_intrin_avx2.c		patch \| blob \| history
vpx_dsp/vpx_dsp.mk		patch \| blob \| history
vpx_dsp/x86/bitdepth_conversion_avx2.h	[new file with mode: 0644]	patch \| blob