From: sdeng <sdeng@google.com>
Date: Sat, 1 Jun 2019 00:25:09 +0000 (-0700)
Subject: Hierarchical rdmult scaling when tune=ssim
X-Git-Tag: v1.8.1~35^2
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b1072a793d4957e5a504d125ae5ded808b90060d;p=platform%2Fupstream%2Flibvpx.git

Hierarchical rdmult scaling when tune=ssim

Use different lagrangian multiplier scaling factor for different block
size. The blocks whose sizes are less than 16x16 share the same multiplier
of their parent block.

The additional gains/loss on top of the tune=ssim are:
Data Set   Overall PSNR   SSIM    MS-SSIM
Lowres         2.918     -3.691   -2.596
Midres         1.708     -2.656   -2.624
HDres          1.619     -2.496   -2.391
Midres_10bd    1.518     -3.263   -3.561

The overall gains/loss comparing to tune=psnr are:
Data Set   Overall PSNR   SSIM    MS-SSIM
Lowres         5.583     -6.208   -4.978
Midres         4.024     -5.610   -6.411
HDres          4.102     -6.614   -7.457
Midres_10bd    4.647     -7.181   -8.614

Change-Id: I0e6c5008488734e979b2dacde9fc2a17f3aa620f
---

diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c
index 1009732..f64763e 100644
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -268,21 +268,38 @@ static INLINE void set_mode_info_offsets(VP9_COMMON *const cm,
 }
 
 static void set_ssim_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x,
-                            int mi_row, int mi_col, int *rdmult) {
+                            const BLOCK_SIZE bsize, const int mi_row,
+                            const int mi_col, int *const rdmult) {
   const VP9_COMMON *const cm = &cpi->common;
 
-  // SSIM rdmult scaling factors are currently 64x64 based.
-  const int num_8x8_w = 8;
-  const int num_8x8_h = 8;
+  const int bsize_base = BLOCK_16X16;
+  const int num_8x8_w = num_8x8_blocks_wide_lookup[bsize_base];
+  const int num_8x8_h = num_8x8_blocks_high_lookup[bsize_base];
   const int num_cols = (cm->mi_cols + num_8x8_w - 1) / num_8x8_w;
-  const int row = mi_row / num_8x8_h;
-  const int col = mi_col / num_8x8_w;
-  const int index = row * num_cols + col;
+  const int num_rows = (cm->mi_rows + num_8x8_h - 1) / num_8x8_h;
+  const int num_bcols =
+      (num_8x8_blocks_wide_lookup[bsize] + num_8x8_w - 1) / num_8x8_w;
+  const int num_brows =
+      (num_8x8_blocks_high_lookup[bsize] + num_8x8_h - 1) / num_8x8_h;
+  int row, col;
+  double num_of_mi = 0.0;
+  double geom_mean_of_scale = 0.0;
 
   assert(cpi->oxcf.tuning == VP8_TUNE_SSIM);
-  *rdmult =
-      (int)((double)(*rdmult) * cpi->mi_ssim_rdmult_scaling_factors[index]);
-  *rdmult = VPXMAX(*rdmult, 1);
+
+  for (row = mi_row / num_8x8_w;
+       row < num_rows && row < mi_row / num_8x8_w + num_brows; ++row) {
+    for (col = mi_col / num_8x8_h;
+         col < num_cols && col < mi_col / num_8x8_h + num_bcols; ++col) {
+      const int index = row * num_cols + col;
+      geom_mean_of_scale += log(cpi->mi_ssim_rdmult_scaling_factors[index]);
+      num_of_mi += 1.0;
+    }
+  }
+  geom_mean_of_scale = exp(geom_mean_of_scale / num_of_mi);
+
+  *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale);
+  *rdmult = VPXMAX(*rdmult, 0);
   set_error_per_bit(x, *rdmult);
   vpx_clear_system_state();
 }
@@ -323,7 +340,7 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
   x->rddiv = cpi->rd.RDDIV;
   x->rdmult = cpi->rd.RDMULT;
   if (oxcf->tuning == VP8_TUNE_SSIM) {
-    set_ssim_rdmult(cpi, x, mi_row, mi_col, &x->rdmult);
+    set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
   }
 
   // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs()
@@ -1970,7 +1987,7 @@ static void set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x,
   }
 
   if (oxcf->tuning == VP8_TUNE_SSIM) {
-    set_ssim_rdmult(cpi, x, mi_row, mi_col, &x->rdmult);
+    set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
   }
 }
 
@@ -2221,7 +2238,7 @@ static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, ThreadData *td,
     const VP9EncoderConfig *const oxcf = &cpi->oxcf;
     x->rdmult = x->cb_rdmult;
     if (oxcf->tuning == VP8_TUNE_SSIM) {
-      set_ssim_rdmult(cpi, x, mi_row, mi_col, &x->rdmult);
+      set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult);
     }
   }
 
@@ -3845,7 +3862,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
   const int rd_div = x->rddiv;
   int partition_mul = x->cb_rdmult;
   if (oxcf->tuning == VP8_TUNE_SSIM) {
-    set_ssim_rdmult(cpi, x, mi_row, mi_col, &partition_mul);
+    set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &partition_mul);
   }
   vp9_rd_cost_update(partition_mul, rd_div, &best_rdc);
 
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index 900d92f..04ae7d1 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2397,7 +2397,7 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
   vp9_set_speed_features_framesize_dependent(cpi, oxcf->speed);
 
   {
-    const int bsize = BLOCK_64X64;
+    const int bsize = BLOCK_16X16;
     const int w = num_8x8_blocks_wide_lookup[bsize];
     const int h = num_8x8_blocks_high_lookup[bsize];
     const int num_cols = (cm->mi_cols + w - 1) / w;
@@ -4759,15 +4759,9 @@ static void set_frame_index(VP9_COMP *cpi, VP9_COMMON *cm) {
 // Some sample values are:
 // (for midres test set)
 // SSIM_VAR_SCALE  avg_psnr   ssim   ms_ssim
-//     16.0          2.312   -3.062  -3.882
-//     32.0          0.852   -2.260  -2.821
-//     64.0          0.294   -1.606  -1.925
-// (for midres_10bd test set)
-// SSIM_VAR_SCALE  avg_psnr   ssim   ms_ssim
-//      8.0          6.782   -3.872  -5.464
-//     16.0          3.189   -4.083  -5.258
-//     32.0          1.113   -3.423  -4.309
-//     64.0          0.241   -2.515  -3.074
+//      8.0          8.980   -5.767  -7.069
+//     16.0          4.315   -5.610  -6.411
+//     32.0          1.559   -4.541  -4.980
 #define SSIM_VAR_SCALE 16.0
 static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) {
   VP9_COMMON *cm = &cpi->common;
@@ -4776,7 +4770,7 @@ static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) {
   MACROBLOCKD *xd = &x->e_mbd;
   uint8_t *y_buffer = cpi->Source->y_buffer;
   const int y_stride = cpi->Source->y_stride;
-  const int block_size = BLOCK_64X64;
+  const int block_size = BLOCK_16X16;
 
   const int num_8x8_w = num_8x8_blocks_wide_lookup[block_size];
   const int num_8x8_h = num_8x8_blocks_high_lookup[block_size];