From 022c848b4d04a2c1f56a54748eee503767269023 Mon Sep 17 00:00:00 2001
From: Alex Converse <aconverse@google.com>
Date: Fri, 20 Nov 2015 13:48:51 -0800
Subject: [PATCH] Change highbd variance rounding to prevent negative variance.

Always round sum error and sum square error toward zero in variance
calculations. This prevents variance from becoming negative.
Avoiding rounding variance at all might be better but would be far
more invasive.

Change-Id: Icf24e0e75ff94952fc026ba6a4d26adf8d373f1c
---
 test/variance_test.cc              |  4 ++--
 vpx_dsp/variance.c                 | 20 ++++++++------------
 vpx_dsp/variance.h                 |  2 ++
 vpx_dsp/x86/highbd_variance_sse2.c | 21 +++++++++++----------
 4 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/test/variance_test.cc b/test/variance_test.cc
index 6f50f78..1e10cde 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -54,11 +54,11 @@ static void RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) {
   switch (bit_depth) {
     case VPX_BITS_12:
       *sse = (*sse + 128) >> 8;
-      *se = (*se + 8) >> 4;
+      *se = *se / (1 << 4);
       break;
     case VPX_BITS_10:
       *sse = (*sse + 8) >> 4;
-      *se = (*se + 2) >> 2;
+      *se = *se / (1 << 2);
       break;
     case VPX_BITS_8:
     default:
diff --git a/vpx_dsp/variance.c b/vpx_dsp/variance.c
index e8bddb0..2d70c7b 100644
--- a/vpx_dsp/variance.c
+++ b/vpx_dsp/variance.c
@@ -275,7 +275,7 @@ void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred,
 #if CONFIG_VP9_HIGHBITDEPTH
 static void highbd_variance64(const uint8_t *a8, int  a_stride,
                               const uint8_t *b8, int  b_stride,
-                              int w, int h, uint64_t *sse, uint64_t *sum) {
+                              int w, int h, uint64_t *sse, int *sum) {
   int i, j;
 
   uint16_t *a = CONVERT_TO_SHORTPTR(a8);
@@ -298,30 +298,26 @@ static void highbd_8_variance(const uint8_t *a8, int  a_stride,
                               const uint8_t *b8, int  b_stride,
                               int w, int h, uint32_t *sse, int *sum) {
   uint64_t sse_long = 0;
-  uint64_t sum_long = 0;
-  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
+  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, sum);
   *sse = (uint32_t)sse_long;
-  *sum = (int)sum_long;
 }
 
 static void highbd_10_variance(const uint8_t *a8, int  a_stride,
                                const uint8_t *b8, int  b_stride,
                                int w, int h, uint32_t *sse, int *sum) {
   uint64_t sse_long = 0;
-  uint64_t sum_long = 0;
-  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
-  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
-  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2);
+  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, sum);
+  *sse = (uint32_t)ROUND_ZERO_POWER_OF_TWO(sse_long, 4);
+  *sum = ROUND_ZERO_POWER_OF_TWO(*sum, 2);
 }
 
 static void highbd_12_variance(const uint8_t *a8, int  a_stride,
                                const uint8_t *b8, int  b_stride,
                                int w, int h, uint32_t *sse, int *sum) {
   uint64_t sse_long = 0;
-  uint64_t sum_long = 0;
-  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long);
-  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
-  *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4);
+  highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, sum);
+  *sse = (uint32_t)ROUND_ZERO_POWER_OF_TWO(sse_long, 8);
+  *sum = ROUND_ZERO_POWER_OF_TWO(*sum, 4);
 }
 
 #define HIGHBD_VAR(W, H) \
diff --git a/vpx_dsp/variance.h b/vpx_dsp/variance.h
index cd0fd98..fd7d30d 100644
--- a/vpx_dsp/variance.h
+++ b/vpx_dsp/variance.h
@@ -19,6 +19,8 @@
 extern "C" {
 #endif
 
+#define ROUND_ZERO_POWER_OF_TWO(value, n) ((value) / (1 << (n)))
+
 #define FILTER_BITS 7
 #define FILTER_WEIGHT 128
 
diff --git a/vpx_dsp/x86/highbd_variance_sse2.c b/vpx_dsp/x86/highbd_variance_sse2.c
index b45331c..c45e7db 100644
--- a/vpx_dsp/x86/highbd_variance_sse2.c
+++ b/vpx_dsp/x86/highbd_variance_sse2.c
@@ -9,6 +9,7 @@
  */
 #include "./vpx_config.h"
 
+#include "vpx_dsp/variance.h"
 #include "vpx_ports/mem.h"
 
 typedef uint32_t (*high_variance_fn_t) (const uint16_t *src, int src_stride,
@@ -62,8 +63,8 @@ static void highbd_10_variance_sse2(const uint16_t *src, int src_stride,
       sum_long += sum0;
     }
   }
-  *sum = ROUND_POWER_OF_TWO(sum_long, 2);
-  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4);
+  *sum = ROUND_ZERO_POWER_OF_TWO(sum_long, 2);
+  *sse = (uint32_t)ROUND_ZERO_POWER_OF_TWO(sse_long, 4);
 }
 
 static void highbd_12_variance_sse2(const uint16_t *src, int src_stride,
@@ -84,8 +85,8 @@ static void highbd_12_variance_sse2(const uint16_t *src, int src_stride,
       sum_long += sum0;
     }
   }
-  *sum = ROUND_POWER_OF_TWO(sum_long, 4);
-  *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8);
+  *sum = ROUND_ZERO_POWER_OF_TWO(sum_long, 4);
+  *sse = (uint32_t)ROUND_ZERO_POWER_OF_TWO(sse_long, 8);
 }
 
 
@@ -106,7 +107,7 @@ void vpx_highbd_10_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
   vpx_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
                                      sse, sum); \
-  *sum = ROUND_POWER_OF_TWO(*sum, 2); \
+  *sum = ROUND_ZERO_POWER_OF_TWO(*sum, 2); \
   *sse = ROUND_POWER_OF_TWO(*sse, 4); \
 } \
 \
@@ -117,7 +118,7 @@ void vpx_highbd_12_get##S##x##S##var_sse2(const uint8_t *src8, int src_stride, \
   uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \
   vpx_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, \
                                      sse, sum); \
-  *sum = ROUND_POWER_OF_TWO(*sum, 4); \
+  *sum = ROUND_ZERO_POWER_OF_TWO(*sum, 4); \
   *sse = ROUND_POWER_OF_TWO(*sse, 8); \
 }
 
@@ -338,7 +339,7 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
       sse += sse2; \
     } \
   } \
-  se = ROUND_POWER_OF_TWO(se, 2); \
+  se = ROUND_ZERO_POWER_OF_TWO(se, 2); \
   sse = ROUND_POWER_OF_TWO(sse, 4); \
   *sse_ptr = sse; \
   return sse - ((cast se * se) >> (wlog2 + hlog2)); \
@@ -385,7 +386,7 @@ uint32_t vpx_highbd_12_sub_pixel_variance##w##x##h##_##opt( \
       }\
     } \
   } \
-  se = ROUND_POWER_OF_TWO(se, 4); \
+  se = ROUND_ZERO_POWER_OF_TWO(se, 4); \
   sse = (uint32_t)ROUND_POWER_OF_TWO(long_sse, 8); \
   *sse_ptr = sse; \
   return sse - ((cast se * se) >> (wlog2 + hlog2)); \
@@ -502,7 +503,7 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
       sse += sse2; \
     } \
   } \
-  se = ROUND_POWER_OF_TWO(se, 2); \
+  se = ROUND_ZERO_POWER_OF_TWO(se, 2); \
   sse = ROUND_POWER_OF_TWO(sse, 4); \
   *sse_ptr = sse; \
   return sse - ((cast se * se) >> (wlog2 + hlog2)); \
@@ -554,7 +555,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
       } \
     } \
   } \
-  se = ROUND_POWER_OF_TWO(se, 4); \
+  se = ROUND_ZERO_POWER_OF_TWO(se, 4); \
   sse = (uint32_t)ROUND_POWER_OF_TWO(long_sse, 8); \
   *sse_ptr = sse; \
   return sse - ((cast se * se) >> (wlog2 + hlog2)); \
-- 
2.7.4