From d8500344430112d794ae6858802c628c79ce20d8 Mon Sep 17 00:00:00 2001
From: Stefan Holmer <holmer@google.com>
Date: Wed, 30 May 2012 12:17:06 +0200
Subject: [PATCH] Added another denoising threshold for finding DC shifts.

Compares the sum of differences between the input block and the averaged
block. If they differ too much the block will not be filtered. Negligible
perfomance hit.

Change-Id: Ib1c31a265efd4d100b3abc4a1ea6675038c8ddde
---
 vp8/common/rtcd_defs.sh          |  2 +-
 vp8/encoder/denoising.c          | 56 +++++++++++++++++++++++++++-------------
 vp8/encoder/denoising.h          |  7 +++++
 vp8/encoder/x86/denoising_sse2.c | 34 ++++++++++++++++++++----
 4 files changed, 75 insertions(+), 24 deletions(-)

diff --git a/vp8/common/rtcd_defs.sh b/vp8/common/rtcd_defs.sh
index e315fcd..d6cbd4a 100644
--- a/vp8/common/rtcd_defs.sh
+++ b/vp8/common/rtcd_defs.sh
@@ -505,7 +505,7 @@ specialize vp8_yv12_copy_partial_frame neon
 # Denoiser filter
 #
 if [ "$CONFIG_TEMPORAL_DENOISING" = "yes" ]; then
-    prototype void vp8_denoiser_filter "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset"
+    prototype int vp8_denoiser_filter "struct yv12_buffer_config* mc_running_avg, struct yv12_buffer_config* running_avg, struct macroblock* signal, unsigned int motion_magnitude2, int y_offset, int uv_offset"
     specialize vp8_denoiser_filter sse2
 fi
 
diff --git a/vp8/encoder/denoising.c b/vp8/encoder/denoising.c
index 194d422..f392396 100644
--- a/vp8/encoder/denoising.c
+++ b/vp8/encoder/denoising.c
@@ -15,14 +15,11 @@
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_rtcd.h"
 
-
-static const unsigned int NOISE_MOTION_THRESHOLD = 20 * 20;
+static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25;
 // SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming var(noise) ~= 100.
 static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20;
 static const unsigned int SSE_THRESHOLD = 16 * 16 * 40;
 
-
-
 // The filtering coefficients used for denoizing are adjusted for static
 // blocks, or blocks with very small motion vectors. This is done through
 // the motion magnitude parameter.
@@ -82,11 +79,15 @@ union coeff_pair *vp8_get_filter_coeff_LUT(unsigned int motion_magnitude)
 
 
 
-void vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg,
-                           YV12_BUFFER_CONFIG *running_avg, MACROBLOCK *signal,
-                           unsigned int motion_magnitude, int y_offset,
-                           int uv_offset)
+int vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg,
+                          YV12_BUFFER_CONFIG *running_avg,
+                          MACROBLOCK *signal,
+                          unsigned int motion_magnitude,
+                          int y_offset,
+                          int uv_offset)
 {
+    unsigned char filtered_buf[16*16];
+    unsigned char *filtered = filtered_buf;
     unsigned char *sig = signal->thismb;
     int sig_stride = 16;
     unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
@@ -95,6 +96,7 @@ void vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg,
     int avg_y_stride = running_avg->y_stride;
     const union coeff_pair *LUT = vp8_get_filter_coeff_LUT(motion_magnitude);
     int r, c;
+    int sum_diff = 0;
 
     for (r = 0; r < 16; ++r)
     {
@@ -133,22 +135,31 @@ void vp8_denoiser_filter_c(YV12_BUFFER_CONFIG *mc_running_avg,
         for (c = 0; c < 16; ++c)
         {
             const int diff = sig[c] - running_avg_y[c];
+            sum_diff += diff;
 
             if (diff * diff < NOISE_DIFF2_THRESHOLD)
             {
-                sig[c] = running_avg_y[c];
+                filtered[c] = running_avg_y[c];
             }
             else
             {
+                filtered[c] = sig[c];
                 running_avg_y[c] = sig[c];
             }
         }
 
         // Update pointers for next iteration.
         sig += sig_stride;
+        filtered += 16;
         mc_running_avg_y += mc_avg_y_stride;
         running_avg_y += avg_y_stride;
     }
+    if (abs(sum_diff) > SUM_DIFF_THRESHOLD)
+    {
+        return COPY_BLOCK;
+    }
+    vp8_copy_mem16x16(filtered_buf, 16, signal->thismb, sig_stride);
+    return FILTER_BLOCK;
 }
 
 
@@ -215,8 +226,10 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
     MV_REFERENCE_FRAME frame = x->best_reference_frame;
     MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame;
 
+    enum vp8_denoiser_decision decision = FILTER_BLOCK;
+
     // Motion compensate the running average.
-    if(zero_frame)
+    if (zero_frame)
     {
         YV12_BUFFER_CONFIG *src = &denoiser->yv12_running_avg[frame];
         YV12_BUFFER_CONFIG *dst = &denoiser->yv12_mc_running_avg;
@@ -238,7 +251,7 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
         mv_col = x->best_sse_mv.as_mv.col;
         mv_row = x->best_sse_mv.as_mv.row;
 
-        if(frame == INTRA_FRAME ||
+        if (frame == INTRA_FRAME ||
             (mv_row *mv_row + mv_col *mv_col <= NOISE_MOTION_THRESHOLD &&
              sse_diff < SSE_DIFF_THRESHOLD))
         {
@@ -296,18 +309,25 @@ void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
     if (best_sse > SSE_THRESHOLD || motion_magnitude2
            > 8 * NOISE_MOTION_THRESHOLD)
     {
+        decision = COPY_BLOCK;
+    }
+
+    if (decision == FILTER_BLOCK)
+    {
+        // Filter.
+        decision = vp8_denoiser_filter(&denoiser->yv12_mc_running_avg,
+                                       &denoiser->yv12_running_avg[LAST_FRAME],
+                                       x,
+                                       motion_magnitude2,
+                                       recon_yoffset, recon_uvoffset);
+    }
+    if (decision == COPY_BLOCK)
+    {
         // No filtering of this block; it differs too much from the predictor,
         // or the motion vector magnitude is considered too big.
         vp8_copy_mem16x16(
                 x->thismb, 16,
                 denoiser->yv12_running_avg[LAST_FRAME].y_buffer + recon_yoffset,
                 denoiser->yv12_running_avg[LAST_FRAME].y_stride);
-        return;
     }
-
-    // Filter.
-    vp8_denoiser_filter(&denoiser->yv12_mc_running_avg,
-                        &denoiser->yv12_running_avg[LAST_FRAME], x,
-                        motion_magnitude2,
-                        recon_yoffset, recon_uvoffset);
 }
diff --git a/vp8/encoder/denoising.h b/vp8/encoder/denoising.h
index 441bccd..dc78e65 100644
--- a/vp8/encoder/denoising.h
+++ b/vp8/encoder/denoising.h
@@ -14,6 +14,13 @@
 #include "block.h"
 
 #define NOISE_DIFF2_THRESHOLD (75)
+#define SUM_DIFF_THRESHOLD (16 * 16 * 2)
+
+enum vp8_denoiser_decision
+{
+  COPY_BLOCK,
+  FILTER_BLOCK,
+};
 
 typedef struct vp8_denoiser
 {
diff --git a/vp8/encoder/x86/denoising_sse2.c b/vp8/encoder/x86/denoising_sse2.c
index 9bf9382..34ad916 100644
--- a/vp8/encoder/x86/denoising_sse2.c
+++ b/vp8/encoder/x86/denoising_sse2.c
@@ -17,11 +17,25 @@
 
 #include <emmintrin.h>
 
-void vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
-                              YV12_BUFFER_CONFIG *running_avg,
-                              MACROBLOCK *signal, unsigned int motion_magnitude,
-                              int y_offset, int uv_offset)
+union sum_union {
+    __m128i v;
+    short e[8];
+};
+
+inline int sum_vec_128i(__m128i vec)
+{
+    union sum_union s = { .v = vec };
+    return s.e[0] + s.e[1] + s.e[2] + s.e[3] +
+        s.e[4] + s.e[5] + s.e[6] + s.e[7];
+}
+
+int vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
+                             YV12_BUFFER_CONFIG *running_avg,
+                             MACROBLOCK *signal, unsigned int motion_magnitude,
+                             int y_offset, int uv_offset)
 {
+    unsigned char filtered_buf[16*16];
+    unsigned char *filtered = filtered_buf;
     unsigned char *sig = signal->thismb;
     int sig_stride = 16;
     unsigned char *mc_running_avg_y = mc_running_avg->y_buffer + y_offset;
@@ -30,6 +44,7 @@ void vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
     int avg_y_stride = running_avg->y_stride;
     const union coeff_pair *LUT = vp8_get_filter_coeff_LUT(motion_magnitude);
     int r, c;
+    __m128i sum_diff = { 0 };
 
     for (r = 0; r < 16; ++r)
     {
@@ -110,6 +125,8 @@ void vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
         // isn't classified as noise.
         diff0 = _mm_sub_epi16(v_sig0, res0);
         diff1 = _mm_sub_epi16(v_sig1, res2);
+        sum_diff = _mm_add_epi16(sum_diff, _mm_add_epi16(diff0, diff1));
+
         diff0sq = _mm_mullo_epi16(diff0, diff0);
         diff1sq = _mm_mullo_epi16(diff1, diff1);
         diff_sq = _mm_packus_epi16(diff0sq, diff1sq);
@@ -118,11 +135,18 @@ void vp8_denoiser_filter_sse2(YV12_BUFFER_CONFIG *mc_running_avg,
         p1 = _mm_andnot_si128(take_running, v_sig);
         p2 = _mm_or_si128(p0, p1);
         _mm_storeu_si128((__m128i *)(&running_avg_y[0]), p2);
-        _mm_storeu_si128((__m128i *)(&sig[0]), p2);
+        _mm_storeu_si128((__m128i *)(&filtered[0]), p2);
 
         // Update pointers for next iteration.
         sig += sig_stride;
+        filtered += 16;
         mc_running_avg_y += mc_avg_y_stride;
         running_avg_y += avg_y_stride;
     }
+    if (abs(sum_vec_128i(sum_diff)) > SUM_DIFF_THRESHOLD)
+    {
+        return COPY_BLOCK;
+    }
+    vp8_copy_mem16x16(filtered_buf, 16, signal->thismb, sig_stride);
+    return FILTER_BLOCK;
 }
-- 
2.7.4