From 3529526e114d34ba6be0fab94a9d36abb512bee4 Mon Sep 17 00:00:00 2001
From: Jerome Jiang <jianj@google.com>
Date: Mon, 4 Mar 2019 15:51:22 -0800
Subject: [PATCH] vp9 svc: add simulcast mode when inter-layer pred is off.

Force all upper spatial layers to be key frame if the base layer is key.
Mode only works for inter-layer pred=off and non-flexible mode.

Add flag to write out bitstream for each spatial layer in example
encoder.

Change-Id: I5db4543cf8697544ae49464f2157e692640d5256
---
 examples/vp9_spatial_svc_encoder.c | 21 +++++++++---
 test/svc_datarate_test.cc          | 15 ++++++++
 test/svc_end_to_end_test.cc        | 18 +++++++++-
 vp9/encoder/vp9_encoder.c          |  6 +++-
 vp9/encoder/vp9_ratectrl.c         |  7 ++++
 vp9/encoder/vp9_svc_layercontext.c | 70 ++++++++++++++++++++++++++++++++++++--
 vp9/encoder/vp9_svc_layercontext.h |  5 +++
 7 files changed, 133 insertions(+), 9 deletions(-)

diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c
index 05fd4d9..e0c2a37 100644
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -34,6 +34,8 @@
 
 #define OUTPUT_RC_STATS 1
 
+#define SIMULCAST_MODE 0
+
 static const arg_def_t outputfile =
     ARG_DEF("o", "output", 1, "Output filename");
 static const arg_def_t skip_frames_arg =
@@ -749,7 +751,7 @@ static void set_frame_flags_bypass_mode_ex1(
   }
 }
 
-#if CONFIG_VP9_DECODER
+#if CONFIG_VP9_DECODER && !SIMULCAST_MODE
 static void test_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder,
                         const int frames_out, int *mismatch_seen) {
   vpx_image_t enc_img, dec_img;
@@ -834,12 +836,21 @@ static void svc_output_rc_stats(
   for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
     unsigned int sl2;
     uint64_t tot_size = 0;
+#if SIMULCAST_MODE
+    for (sl2 = 0; sl2 < sl; ++sl2) {
+      if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2];
+    }
+    vpx_video_writer_write_frame(outfile[sl],
+                                 (uint8_t *)(cx_pkt->data.frame.buf) + tot_size,
+                                 (size_t)(sizes[sl]), cx_pkt->data.frame.pts);
+#else
     for (sl2 = 0; sl2 <= sl; ++sl2) {
       if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2];
     }
     if (tot_size > 0)
       vpx_video_writer_write_frame(outfile[sl], cx_pkt->data.frame.buf,
                                    (size_t)(tot_size), cx_pkt->data.frame.pts);
+#endif  // SIMULCAST_MODE
   }
   for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) {
     if (cx_pkt->data.frame.spatial_layer_encoded[sl]) {
@@ -924,7 +935,7 @@ int main(int argc, const char **argv) {
 #if CONFIG_INTERNAL_STATS
   FILE *f = fopen("opsnr.stt", "a");
 #endif
-#if CONFIG_VP9_DECODER
+#if CONFIG_VP9_DECODER && !SIMULCAST_MODE
   int mismatch_seen = 0;
   vpx_codec_ctx_t decoder;
 #endif
@@ -964,7 +975,7 @@ int main(int argc, const char **argv) {
   if (vpx_svc_init(&svc_ctx, &encoder, vpx_codec_vp9_cx(), &enc_cfg) !=
       VPX_CODEC_OK)
     die("Failed to initialize encoder\n");
-#if CONFIG_VP9_DECODER
+#if CONFIG_VP9_DECODER && !SIMULCAST_MODE
   if (vpx_codec_dec_init(
           &decoder, get_vpx_decoder_by_name("vp9")->codec_interface(), NULL, 0))
     die("Failed to initialize decoder\n");
@@ -1163,7 +1174,7 @@ int main(int argc, const char **argv) {
           if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1)
             si->bytes_sum[0] += (int)cx_pkt->data.frame.sz;
           ++frames_received;
-#if CONFIG_VP9_DECODER
+#if CONFIG_VP9_DECODER && !SIMULCAST_MODE
           if (vpx_codec_decode(&decoder, cx_pkt->data.frame.buf,
                                (unsigned int)cx_pkt->data.frame.sz, NULL, 0))
             die_codec(&decoder, "Failed to decode frame.");
@@ -1178,7 +1189,7 @@ int main(int argc, const char **argv) {
         default: { break; }
       }
 
-#if CONFIG_VP9_DECODER
+#if CONFIG_VP9_DECODER && !SIMULCAST_MODE
       vpx_codec_control(&encoder, VP9E_GET_SVC_LAYER_ID, &layer_id);
       // Don't look for mismatch on top spatial and top temporal layers as they
       // are non reference frames.
diff --git a/test/svc_datarate_test.cc b/test/svc_datarate_test.cc
index 024345a..e28e200 100644
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -22,6 +22,19 @@
 namespace svc_test {
 namespace {
 
+typedef enum {
+  // Inter-layer prediction is on on all frames.
+  INTER_LAYER_PRED_ON,
+  // Inter-layer prediction is off on all frames.
+  INTER_LAYER_PRED_OFF,
+  // Inter-layer prediction is off on non-key frames and non-sync frames.
+  INTER_LAYER_PRED_OFF_NONKEY,
+  // Inter-layer prediction is on on all frames, but constrained such
+  // that any layer S (> 0) can only predict from previous spatial
+  // layer S-1, from the same superframe.
+  INTER_LAYER_PRED_ON_CONSTRAINED
+} INTER_LAYER_PRED;
+
 class DatarateOnePassCbrSvc : public OnePassCbrSvc {
  public:
   explicit DatarateOnePassCbrSvc(const ::libvpx_test::CodecFactory *codec)
@@ -989,6 +1002,8 @@ class DatarateOnePassCbrSvcInterLayerPredSingleBR
 // pass CBR SVC: 3 spatial layers and 3 temporal layers. Run CIF clip with 1
 // thread.
 TEST_P(DatarateOnePassCbrSvcInterLayerPredSingleBR, OnePassCbrSvc3SL3TL) {
+  // Disable test for inter-layer pred off for now since simulcast_mode fails.
+  if (inter_layer_pred_mode_ == INTER_LAYER_PRED_OFF) return;
   SetSvcConfig(3, 3);
   cfg_.rc_buf_initial_sz = 500;
   cfg_.rc_buf_optimal_sz = 500;
diff --git a/test/svc_end_to_end_test.cc b/test/svc_end_to_end_test.cc
index eb52b06..82259ac 100644
--- a/test/svc_end_to_end_test.cc
+++ b/test/svc_end_to_end_test.cc
@@ -21,6 +21,19 @@
 namespace svc_test {
 namespace {
 
+typedef enum {
+  // Inter-layer prediction is on on all frames.
+  INTER_LAYER_PRED_ON,
+  // Inter-layer prediction is off on all frames.
+  INTER_LAYER_PRED_OFF,
+  // Inter-layer prediction is off on non-key frames and non-sync frames.
+  INTER_LAYER_PRED_OFF_NONKEY,
+  // Inter-layer prediction is on on all frames, but constrained such
+  // that any layer S (> 0) can only predict from previous spatial
+  // layer S-1, from the same superframe.
+  INTER_LAYER_PRED_ON_CONSTRAINED
+} INTER_LAYER_PRED;
+
 class ScalePartitionOnePassCbrSvc
     : public OnePassCbrSvc,
       public ::testing::TestWithParam<const ::libvpx_test::CodecFactory *> {
@@ -130,7 +143,10 @@ class SyncFrameOnePassCbrSvc : public OnePassCbrSvc,
     current_video_frame_ = video->frame();
     PreEncodeFrameHookSetup(video, encoder);
     if (video->frame() == 0) {
-      encoder->Control(VP9E_SET_SVC_INTER_LAYER_PRED, inter_layer_pred_mode_);
+      // Do not turn off inter-layer pred completely because simulcast mode
+      // fails.
+      if (inter_layer_pred_mode_ != INTER_LAYER_PRED_OFF)
+        encoder->Control(VP9E_SET_SVC_INTER_LAYER_PRED, inter_layer_pred_mode_);
       encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_);
       if (intra_only_test_)
         // Decoder sets the color_space for Intra-only frames
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c
index ec52d74..72e7217 100644
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -3093,7 +3093,11 @@ static void update_ref_frames(VP9_COMP *cpi) {
 }
 
 void vp9_update_reference_frames(VP9_COMP *cpi) {
-  update_ref_frames(cpi);
+  if (cpi->svc.simulcast_mode && is_one_pass_cbr_svc(cpi) &&
+      cpi->common.frame_type == KEY_FRAME)
+    vp9_svc_update_ref_frame_key_simulcast(cpi);
+  else
+    update_ref_frames(cpi);
 
 #if CONFIG_VP9_TEMPORAL_DENOISING
   vp9_denoiser_update_ref_frame(cpi);
diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c
index 152efa7..aa26371 100644
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -2209,6 +2209,13 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
     }
   }
 
+  if (svc->simulcast_mode && svc->spatial_layer_id > 0 &&
+      svc->layer_context[layer].is_key_frame == 1) {
+    cm->frame_type = KEY_FRAME;
+    cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
+    target = calc_iframe_target_size_one_pass_cbr(cpi);
+  }
+
   // Check if superframe contains a sync layer request.
   vp9_svc_check_spatial_layer_sync(cpi);
 
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c
index 35155c7..787b0e3 100644
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -54,6 +54,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
   svc->superframe_has_layer_sync = 0;
   svc->use_set_ref_frame_config = 0;
   svc->num_encoded_top_layer = 0;
+  svc->simulcast_mode = 0;
 
   for (i = 0; i < REF_FRAMES; ++i) {
     svc->fb_idx_spatial_layer_id[i] = -1;
@@ -474,6 +475,17 @@ static void reset_fb_idx_unused(VP9_COMP *const cpi) {
   }
 }
 
+// Never refresh any reference frame buffers on top temporal layers in
+// simulcast mode, which has interlayer prediction disabled.
+static void non_reference_frame_simulcast(VP9_COMP *const cpi) {
+  if (cpi->svc.temporal_layer_id == cpi->svc.number_temporal_layers - 1 &&
+      cpi->svc.temporal_layer_id > 0) {
+    cpi->ext_refresh_last_frame = 0;
+    cpi->ext_refresh_golden_frame = 0;
+    cpi->ext_refresh_alt_ref_frame = 0;
+  }
+}
+
 // The function sets proper ref_frame_flags, buffer indices, and buffer update
 // variables for temporal layering mode 3 - that does 0-2-1-2 temporal layering
 // scheme.
@@ -578,6 +590,8 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) {
     cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
   }
 
+  if (cpi->svc.simulcast_mode) non_reference_frame_simulcast(cpi);
+
   reset_fb_idx_unused(cpi);
 }
 
@@ -639,6 +653,8 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) {
     cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
   }
 
+  if (cpi->svc.simulcast_mode) non_reference_frame_simulcast(cpi);
+
   reset_fb_idx_unused(cpi);
 }
 
@@ -673,6 +689,8 @@ static void set_flags_and_fb_idx_for_temporal_mode_noLayering(
     cpi->gld_fb_idx = 0;
   }
 
+  if (cpi->svc.simulcast_mode) non_reference_frame_simulcast(cpi);
+
   reset_fb_idx_unused(cpi);
 }
 
@@ -732,6 +750,15 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
   SVC *const svc = &cpi->svc;
   LAYER_CONTEXT *lc = NULL;
   svc->skip_enhancement_layer = 0;
+
+  if (svc->disable_inter_layer_pred == INTER_LAYER_PRED_OFF &&
+      svc->number_spatial_layers <= 3 && svc->number_temporal_layers <= 3 &&
+      !(svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+        svc->use_set_ref_frame_config))
+    svc->simulcast_mode = 1;
+  else
+    svc->simulcast_mode = 0;
+
   if (svc->number_spatial_layers > 1) {
     svc->use_base_mv = 1;
     svc->use_partition_reuse = 1;
@@ -1184,6 +1211,44 @@ static void vp9_svc_update_ref_frame_bypass_mode(VP9_COMP *const cpi) {
   }
 }
 
+void vp9_svc_update_ref_frame_key_simulcast(VP9_COMP *const cpi) {
+  VP9_COMMON *const cm = &cpi->common;
+  SVC *const svc = &cpi->svc;
+  BufferPool *const pool = cm->buffer_pool;
+  const int sl_id = svc->spatial_layer_id;
+  const int tl_id = svc->temporal_layer_id;
+  const int num_sl = svc->number_spatial_layers;
+  // SL0:
+  // 3 spatial layers: update slot 0 and 3
+  // 2 spatial layers: update slot 0 and 2
+  // 1 spatial layer:  update slot 0 and 1
+  // SL1:
+  // 3 spatial layers: update slot 1, 4, and 6
+  // 2 spatial layers: update slot 1, 3, and 6
+  // slot 6 is for golden frame long temporal prediction.
+  // SL2: update slot 2, 5 and 7
+  // slot 7 is for golden frame long temporal prediction.
+  ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[sl_id], cm->new_fb_idx);
+  ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[num_sl + sl_id],
+             cm->new_fb_idx);
+  svc->fb_idx_spatial_layer_id[sl_id] = sl_id;
+  svc->fb_idx_temporal_layer_id[sl_id] = tl_id;
+  svc->fb_idx_spatial_layer_id[num_sl + sl_id] = sl_id;
+  svc->fb_idx_temporal_layer_id[num_sl + sl_id] = tl_id;
+  // Update slots for golden frame long temporal prediction.
+  if (svc->use_gf_temporal_ref_current_layer) {
+    const int index = num_sl == 3 ? sl_id - 1 : sl_id;
+    const int lt_buffer_index = svc->buffer_gf_temporal_ref[index].idx;
+    ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[lt_buffer_index],
+               cm->new_fb_idx);
+    svc->fb_idx_spatial_layer_id[lt_buffer_index] = sl_id;
+    svc->fb_idx_temporal_layer_id[lt_buffer_index] = tl_id;
+  }
+
+  vp9_copy_flags_ref_update_idx(cpi);
+  vp9_svc_update_ref_frame_buffer_idx(cpi);
+}
+
 void vp9_svc_update_ref_frame(VP9_COMP *const cpi) {
   VP9_COMMON *const cm = &cpi->common;
   SVC *const svc = &cpi->svc;
@@ -1192,7 +1257,7 @@ void vp9_svc_update_ref_frame(VP9_COMP *const cpi) {
   if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
       svc->use_set_ref_frame_config) {
     vp9_svc_update_ref_frame_bypass_mode(cpi);
-  } else if (cm->frame_type == KEY_FRAME) {
+  } else if (cm->frame_type == KEY_FRAME && !svc->simulcast_mode) {
     // Keep track of frame index for each reference frame.
     int i;
     // On key frame update all reference frame slots.
@@ -1203,7 +1268,7 @@ void vp9_svc_update_ref_frame(VP9_COMP *const cpi) {
       if (i != cpi->lst_fb_idx && i != cpi->gld_fb_idx && i != cpi->alt_fb_idx)
         ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[i], cm->new_fb_idx);
     }
-  } else {
+  } else if (cm->frame_type != KEY_FRAME) {
     if (cpi->refresh_last_frame) {
       svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] = svc->spatial_layer_id;
       svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] = svc->temporal_layer_id;
@@ -1236,6 +1301,7 @@ void vp9_svc_adjust_avg_frame_qindex(VP9_COMP *const cpi) {
   // (to level closer to worst_quality) if the overshoot is significant.
   // Reset it for all temporal layers on base spatial layer.
   if (cm->frame_type == KEY_FRAME && cpi->oxcf.rc_mode == VPX_CBR &&
+      !svc->simulcast_mode &&
       rc->projected_frame_size > 3 * rc->avg_frame_bandwidth) {
     int tl;
     rc->avg_frame_qindex[INTER_FRAME] =
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h
index 34795d8..77d4382 100644
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -189,6 +189,9 @@ typedef struct SVC {
   int64_t time_stamp_prev[VPX_SS_MAX_LAYERS];
 
   int num_encoded_top_layer;
+
+  // Every spatial layer on a superframe whose base is key is key too.
+  int simulcast_mode;
 } SVC;
 
 struct VP9_COMP;
@@ -258,6 +261,8 @@ void vp9_svc_check_spatial_layer_sync(struct VP9_COMP *const cpi);
 
 void vp9_svc_update_ref_frame_buffer_idx(struct VP9_COMP *const cpi);
 
+void vp9_svc_update_ref_frame_key_simulcast(struct VP9_COMP *const cpi);
+
 void vp9_svc_update_ref_frame(struct VP9_COMP *const cpi);
 
 void vp9_svc_adjust_frame_rate(struct VP9_COMP *const cpi);
-- 
2.7.4