Use uint32_t for accumulator

author Johann <johannkoenig@google.com>

Fri, 28 Apr 2017 13:34:21 +0000 (06:34 -0700)

committer Johann <johannkoenig@google.com>

Fri, 28 Apr 2017 13:36:59 +0000 (06:36 -0700)
author Johann <johannkoenig@google.com>
Fri, 28 Apr 2017 13:34:21 +0000 (06:34 -0700)
committer Johann <johannkoenig@google.com>
Fri, 28 Apr 2017 13:36:59 +0000 (06:36 -0700)
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl

index 47983ae..da449e2 100644 (file)
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -197,7 +197,7 @@ $vp9_full_search_sad_sse4_1=vp9_full_search_sadx8;
  add_proto qw/int vp9_diamond_search_sad/, "const struct macroblock *x, const struct search_site_config *cfg,  struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv";
  specialize qw/vp9_diamond_search_sad avx/;
  
-add_proto qw/void vp9_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
+add_proto qw/void vp9_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, uint32_t *accumulator, uint16_t *count";
  specialize qw/vp9_temporal_filter_apply sse4_1/;
  
  if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
@@ -217,7 +217,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
  
    add_proto qw/void vp9_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
  
-  add_proto qw/void vp9_highbd_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, unsigned int *accumulator, uint16_t *count";
+  add_proto qw/void vp9_highbd_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, uint32_t *accumulator, uint16_t *count";
  
  }
  # End vp9_high encoder functions
diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c

index 6ca5be0..cc6b36c 100644 (file)
--- a/vp9/encoder/vp9_temporal_filter.c
+++ b/vp9/encoder/vp9_temporal_filter.c
@@ -13,6 +13,7 @@
  #include <limits.h>
  
  #include "vp9/common/vp9_alloccommon.h"
+#include "vp9/common/vp9_common.h"
  #include "vp9/common/vp9_onyxc_int.h"
  #include "vp9/common/vp9_quant_common.h"
  #include "vp9/common/vp9_reconinter.h"
@@ -94,7 +95,7 @@ void vp9_temporal_filter_apply_c(const uint8_t *frame1, unsigned int stride,
                                   const uint8_t *frame2,
                                   unsigned int block_width,
                                   unsigned int block_height, int strength,
-                                 int filter_weight, unsigned int *accumulator,
+                                 int filter_weight, uint32_t *accumulator,
                                   uint16_t *count) {
    unsigned int i, j, k;
    int modifier;
@@ -162,7 +163,7 @@ void vp9_temporal_filter_apply_c(const uint8_t *frame1, unsigned int stride,
  void vp9_highbd_temporal_filter_apply_c(
      const uint8_t *frame1_8, unsigned int stride, const uint8_t *frame2_8,
      unsigned int block_width, unsigned int block_height, int strength,
-    int filter_weight, unsigned int *accumulator, uint16_t *count) {
+    int filter_weight, uint32_t *accumulator, uint16_t *count) {
    const uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8);
    const uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8);
    unsigned int i, j, k;
@@ -292,7 +293,7 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
    unsigned int filter_weight;
    int mb_cols = (frames[alt_ref_index]->y_crop_width + 15) >> 4;
    int mb_rows = (frames[alt_ref_index]->y_crop_height + 15) >> 4;
-  DECLARE_ALIGNED(16, unsigned int, accumulator[16 * 16 * 3]);
+  DECLARE_ALIGNED(16, uint32_t, accumulator[16 * 16 * 3]);
    DECLARE_ALIGNED(16, uint16_t, count[16 * 16 * 3]);
    MACROBLOCKD *mbd = &td->mb.e_mbd;
    YV12_BUFFER_CONFIG *f = frames[alt_ref_index];
@@ -339,8 +340,8 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
      int stride;
      MV ref_mv;
  
-    memset(accumulator, 0, 16 * 16 * 3 * sizeof(accumulator[0]));
-    memset(count, 0, 16 * 16 * 3 * sizeof(count[0]));
+    vp9_zero_array(accumulator, 16 * 16 * 3);
+    vp9_zero_array(count, 16 * 16 * 3);
  
      td->mb.mv_limits.col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND));
      td->mb.mv_limits.col_max =
diff --git a/vp9/encoder/x86/temporal_filter_sse4.c b/vp9/encoder/x86/temporal_filter_sse4.c

index 0419aa1..be4cd86 100644 (file)
--- a/vp9/encoder/x86/temporal_filter_sse4.c
+++ b/vp9/encoder/x86/temporal_filter_sse4.c
@@ -165,7 +165,7 @@ static void average_16(__m128i *sum_0_u16, __m128i *sum_1_u16,
  
  // Add 'sum_u16' to 'count'. Multiply by 'pred' and add to 'accumulator.'
  static void accumulate_and_store_8(const __m128i sum_u16, const uint8_t *pred,
-                                   uint16_t *count, unsigned int *accumulator) {
+                                   uint16_t *count, uint32_t *accumulator) {
    const __m128i pred_u8 = _mm_loadl_epi64((const __m128i *)pred);
    const __m128i zero = _mm_setzero_si128();
    __m128i count_u16 = _mm_loadu_si128((const __m128i *)count);
@@ -194,7 +194,7 @@ static void accumulate_and_store_8(const __m128i sum_u16, const uint8_t *pred,
  static void accumulate_and_store_16(const __m128i sum_0_u16,
                                      const __m128i sum_1_u16,
                                      const uint8_t *pred, uint16_t *count,
-                                    unsigned int *accumulator) {
+                                    uint32_t *accumulator) {
    const __m128i pred_u8 = _mm_loadu_si128((const __m128i *)pred);
    const __m128i zero = _mm_setzero_si128();
    __m128i count_0_u16 = _mm_loadu_si128((const __m128i *)count),
@@ -237,7 +237,7 @@ static void accumulate_and_store_16(const __m128i sum_0_u16,
  void vp9_temporal_filter_apply_sse4_1(const uint8_t *a, unsigned int stride,
                                        const uint8_t *b, unsigned int width,
                                        unsigned int height, int strength,
-                                      int weight, unsigned int *accumulator,
+                                      int weight, uint32_t *accumulator,
                                        uint16_t *count) {
    unsigned int h;
    const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
@@ -250,9 +250,6 @@ void vp9_temporal_filter_apply_sse4_1(const uint8_t *a, unsigned int stride,
  
    assert(width == 8 || width == 16);
  
-  // TODO(johannkoenig) Use uint32_t for accumulator.
-  assert(sizeof(*accumulator) == sizeof(uint32_t));
-
    if (width == 8) {
      __m128i sum_row_a, sum_row_b, sum_row_c;
      __m128i mul_constants = _mm_setr_epi16(
author	Johann <johannkoenig@google.com>
	Fri, 28 Apr 2017 13:34:21 +0000 (06:34 -0700)
committer	Johann <johannkoenig@google.com>
	Fri, 28 Apr 2017 13:36:59 +0000 (06:36 -0700)
vp9/common/vp9_rtcd_defs.pl		patch \| blob \| history
vp9/encoder/vp9_temporal_filter.c		patch \| blob \| history
vp9/encoder/x86/temporal_filter_sse4.c		patch \| blob \| history