[spatial svc]Multiple frame context feature

author Minghai Shang <minghai@google.com>

Mon, 18 Aug 2014 21:51:04 +0000 (14:51 -0700)

committer Minghai Shang <minghai@google.com>

Mon, 25 Aug 2014 17:02:45 +0000 (10:02 -0700)
author Minghai Shang <minghai@google.com>
Mon, 18 Aug 2014 21:51:04 +0000 (14:51 -0700)
committer Minghai Shang <minghai@google.com>
Mon, 25 Aug 2014 17:02:45 +0000 (10:02 -0700)
diff --git a/test/svc_test.cc b/test/svc_test.cc

index 1cb01a4..fa95608 100644 (file)
--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -112,7 +112,7 @@ class SvcTest : public ::testing::Test {
        video.Next();
      }
  
-    // Flush encoder and test EOS packet
+    // Flush encoder and test EOS packet.
      res = vpx_svc_encode(&svc_, &codec_, NULL, video.pts(),
                           video.duration(), VPX_DL_GOOD_QUALITY);
      stats_size = vpx_svc_get_rc_stats_buffer_size(&svc_);
@@ -135,7 +135,7 @@ class SvcTest : public ::testing::Test {
          EXPECT_EQ(1, vpx_svc_is_keyframe(&svc_));
        }
  
-      outputs[*frame_received].buf = malloc(frame_size);
+      outputs[*frame_received].buf = malloc(frame_size + 16);
        ASSERT_TRUE(outputs[*frame_received].buf != NULL);
        memcpy(outputs[*frame_received].buf, vpx_svc_get_buffer(&svc_),
               frame_size);
@@ -176,13 +176,13 @@ class SvcTest : public ::testing::Test {
        video.Next();
      }
  
-    // Flush Encoder
+    // Flush encoder.
      res = vpx_svc_encode(&svc_, &codec_, NULL, 0,
                           video.duration(), VPX_DL_GOOD_QUALITY);
      EXPECT_EQ(VPX_CODEC_OK, res);
      StoreFrames(n, outputs, &frame_received);
  
-    EXPECT_EQ(frame_received, (size_t)n);
+    EXPECT_EQ(frame_received, static_cast<size_t>(n));
  
      ReleaseEncoder();
    }
@@ -204,7 +204,7 @@ class SvcTest : public ::testing::Test {
        ++decoded_frames;
  
        DxDataIterator dec_iter = decoder_->GetDxData();
-      while (dec_iter.Next()) {
+      while (dec_iter.Next() != NULL) {
          ++received_frames;
        }
      }
@@ -214,7 +214,8 @@ class SvcTest : public ::testing::Test {
  
    void DropEnhancementLayers(struct vpx_fixed_buf *const inputs,
                               const int num_super_frames,
-                             const int remained_layers) {
+                             const int remained_layers,
+                             const bool is_multiple_frame_context) {
      ASSERT_TRUE(inputs != NULL);
      ASSERT_GT(num_super_frames, 0);
      ASSERT_GT(remained_layers, 0);
@@ -236,7 +237,7 @@ class SvcTest : public ::testing::Test {
        uint8_t *frame_data = static_cast<uint8_t *>(inputs[i].buf);
        uint8_t *frame_start = frame_data;
        for (frame = 0; frame < frame_count; ++frame) {
-        // Looking for a visible frame
+        // Looking for a visible frame.
          if (frame_data[0] & 0x02) {
            ++frames_found;
            if (frames_found == remained_layers)
@@ -244,11 +245,17 @@ class SvcTest : public ::testing::Test {
          }
          frame_data += frame_sizes[frame];
        }
-      ASSERT_LT(frame, frame_count);
-      if (frame == frame_count - 1)
+      ASSERT_LT(frame, frame_count) << "Couldn't find a visible frame. "
+          << "remaining_layers: " << remained_layers
+          << "    super_frame: " << i
+          << "    is_multiple_frame_context: " << is_multiple_frame_context;
+      if (frame == frame_count - 1 && !is_multiple_frame_context)
          continue;
  
        frame_data += frame_sizes[frame];
+      // We need to add one more frame for multiple frame context.
+      if (is_multiple_frame_context)
+        ++frame;
        uint8_t marker =
            static_cast<const uint8_t *>(inputs[i].buf)[inputs[i].sz - 1];
        const uint32_t mag = ((marker >> 3) & 0x3) + 1;
@@ -256,11 +263,37 @@ class SvcTest : public ::testing::Test {
        const size_t new_index_sz = 2 + mag * (frame + 1);
        marker &= 0x0f8;
        marker |= frame;
+
+      // Copy existing frame sizes.
+      memmove(frame_data + (is_multiple_frame_context ? 2 : 1),
+              frame_start + inputs[i].sz - index_sz + 1, new_index_sz - 2);
+      if (is_multiple_frame_context) {
+        // Add a one byte frame with flag show_existing frame.
+        *frame_data++ = 0x88 | (remained_layers - 1);
+      }
+      // New marker.
        frame_data[0] = marker;
-      memcpy(frame_data + 1, frame_start + inputs[i].sz - index_sz + 1,
-             new_index_sz - 2);
-      frame_data[new_index_sz - 1] = marker;
-      inputs[i].sz = frame_data - frame_start + new_index_sz;
+      frame_data += (mag * (frame + 1) + 1);
+
+      if (is_multiple_frame_context) {
+        // Write the frame size for the one byte frame.
+        frame_data -= mag;
+        *frame_data++ = 1;
+        for (uint32_t j = 1; j < mag; ++j) {
+          *frame_data++ = 0;
+        }
+      }
+
+      *frame_data++ = marker;
+      inputs[i].sz = frame_data - frame_start;
+
+      if (is_multiple_frame_context) {
+        // Change the show frame flag to 0 for all frames.
+        for (int j = 0; j < frame; ++j) {
+          frame_start[0] &= ~2;
+          frame_start += frame_sizes[j];
+        }
+      }
      }
    }
  
@@ -507,7 +540,7 @@ TEST_F(SvcTest, TwoPassEncode2LayersDecodeBaseLayerOnly) {
    vpx_fixed_buf outputs[10];
    memset(&outputs[0], 0, sizeof(outputs));
    Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
-  DropEnhancementLayers(&outputs[0], 10, 1);
+  DropEnhancementLayers(&outputs[0], 10, 1, false);
    DecodeNFrames(&outputs[0], 10);
    FreeBitstreamBuffers(&outputs[0], 10);
  }
@@ -525,13 +558,13 @@ TEST_F(SvcTest, TwoPassEncode5LayersDecode54321Layers) {
    Pass2EncodeNFrames(&stats_buf, 10, 5, &outputs[0]);
  
    DecodeNFrames(&outputs[0], 10);
-  DropEnhancementLayers(&outputs[0], 10, 4);
+  DropEnhancementLayers(&outputs[0], 10, 4, false);
    DecodeNFrames(&outputs[0], 10);
-  DropEnhancementLayers(&outputs[0], 10, 3);
+  DropEnhancementLayers(&outputs[0], 10, 3, false);
    DecodeNFrames(&outputs[0], 10);
-  DropEnhancementLayers(&outputs[0], 10, 2);
+  DropEnhancementLayers(&outputs[0], 10, 2, false);
    DecodeNFrames(&outputs[0], 10);
-  DropEnhancementLayers(&outputs[0], 10, 1);
+  DropEnhancementLayers(&outputs[0], 10, 1, false);
    DecodeNFrames(&outputs[0], 10);
  
    FreeBitstreamBuffers(&outputs[0], 10);
@@ -568,12 +601,121 @@ TEST_F(SvcTest, TwoPassEncode3SNRLayersDecode321Layers) {
    memset(&outputs[0], 0, sizeof(outputs));
    Pass2EncodeNFrames(&stats_buf, 20, 3, &outputs[0]);
    DecodeNFrames(&outputs[0], 20);
-  DropEnhancementLayers(&outputs[0], 20, 2);
+  DropEnhancementLayers(&outputs[0], 20, 2, false);
    DecodeNFrames(&outputs[0], 20);
-  DropEnhancementLayers(&outputs[0], 20, 1);
+  DropEnhancementLayers(&outputs[0], 20, 1, false);
    DecodeNFrames(&outputs[0], 20);
  
    FreeBitstreamBuffers(&outputs[0], 20);
  }
  
+TEST_F(SvcTest, SetMultipleFrameContextOption) {
+  svc_.spatial_layers = 5;
+  vpx_codec_err_t res =
+      vpx_svc_set_options(&svc_, "multi-frame-contexts=1");
+  EXPECT_EQ(VPX_CODEC_OK, res);
+  res = vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
+  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, res);
+
+  svc_.spatial_layers = 2;
+  res = vpx_svc_set_options(&svc_, "multi-frame-contexts=1");
+  InitializeEncoder();
+}
+
+TEST_F(SvcTest, TwoPassEncode2LayersWithMultipleFrameContext) {
+  // First pass encode
+  std::string stats_buf;
+  Pass1EncodeNFrames(10, 2, &stats_buf);
+
+  // Second pass encode
+  codec_enc_.g_pass = VPX_RC_LAST_PASS;
+  codec_enc_.g_error_resilient = 0;
+  vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 multi-frame-contexts=1");
+  vpx_fixed_buf outputs[10];
+  memset(&outputs[0], 0, sizeof(outputs));
+  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
+  DropEnhancementLayers(&outputs[0], 10, 2, true);
+  DecodeNFrames(&outputs[0], 10);
+  FreeBitstreamBuffers(&outputs[0], 10);
+}
+
+TEST_F(SvcTest, TwoPassEncode2LayersWithMultipleFrameContextDecodeBaselayer) {
+  // First pass encode
+  std::string stats_buf;
+  Pass1EncodeNFrames(10, 2, &stats_buf);
+
+  // Second pass encode
+  codec_enc_.g_pass = VPX_RC_LAST_PASS;
+  codec_enc_.g_error_resilient = 0;
+  vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 multi-frame-contexts=1");
+  vpx_fixed_buf outputs[10];
+  memset(&outputs[0], 0, sizeof(outputs));
+  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
+  DropEnhancementLayers(&outputs[0], 10, 1, true);
+  DecodeNFrames(&outputs[0], 10);
+  FreeBitstreamBuffers(&outputs[0], 10);
+}
+
+TEST_F(SvcTest, TwoPassEncode2SNRLayersWithMultipleFrameContext) {
+  // First pass encode
+  std::string stats_buf;
+  vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1");
+  Pass1EncodeNFrames(10, 2, &stats_buf);
+
+  // Second pass encode
+  codec_enc_.g_pass = VPX_RC_LAST_PASS;
+  codec_enc_.g_error_resilient = 0;
+  vpx_svc_set_options(&svc_, "auto-alt-refs=1,1 scale-factors=1/1,1/1 "
+                      "multi-frame-contexts=1");
+  vpx_fixed_buf outputs[10];
+  memset(&outputs[0], 0, sizeof(outputs));
+  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
+  DropEnhancementLayers(&outputs[0], 10, 2, true);
+  DecodeNFrames(&outputs[0], 10);
+  FreeBitstreamBuffers(&outputs[0], 10);
+}
+
+TEST_F(SvcTest, TwoPassEncode3SNRLayersWithMultipleFrameContextDecode321Layer) {
+  // First pass encode
+  std::string stats_buf;
+  vpx_svc_set_options(&svc_, "scale-factors=1/1,1/1,1/1");
+  Pass1EncodeNFrames(10, 3, &stats_buf);
+
+  // Second pass encode
+  codec_enc_.g_pass = VPX_RC_LAST_PASS;
+  codec_enc_.g_error_resilient = 0;
+  vpx_svc_set_options(&svc_, "auto-alt-refs=1,1,1 scale-factors=1/1,1/1,1/1 "
+                      "multi-frame-contexts=1");
+  vpx_fixed_buf outputs[10];
+  memset(&outputs[0], 0, sizeof(outputs));
+  Pass2EncodeNFrames(&stats_buf, 10, 3, &outputs[0]);
+
+  vpx_fixed_buf outputs_new[10];
+  for (int i = 0; i < 10; ++i) {
+    outputs_new[i].buf = malloc(outputs[i].sz + 16);
+    ASSERT_TRUE(outputs_new[i].buf != NULL);
+    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
+    outputs_new[i].sz = outputs[i].sz;
+  }
+  DropEnhancementLayers(&outputs_new[0], 10, 3, true);
+  DecodeNFrames(&outputs_new[0], 10);
+
+  for (int i = 0; i < 10; ++i) {
+    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
+    outputs_new[i].sz = outputs[i].sz;
+  }
+  DropEnhancementLayers(&outputs_new[0], 10, 2, true);
+  DecodeNFrames(&outputs_new[0], 10);
+
+  for (int i = 0; i < 10; ++i) {
+    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
+    outputs_new[i].sz = outputs[i].sz;
+  }
+  DropEnhancementLayers(&outputs_new[0], 10, 1, true);
+  DecodeNFrames(&outputs_new[0], 10);
+
+  FreeBitstreamBuffers(&outputs[0], 10);
+  FreeBitstreamBuffers(&outputs_new[0], 10);
+}
+
  }  // namespace
diff --git a/vp9/encoder/vp9_bitstream.c b/vp9/encoder/vp9_bitstream.c

index b0ff0fa..c8bb49c 100644 (file)
--- a/vp9/encoder/vp9_bitstream.c
+++ b/vp9/encoder/vp9_bitstream.c
@@ -1081,7 +1081,16 @@ static void write_uncompressed_header(VP9_COMP *cpi,
      write_bitdepth_colorspace_sampling(cm, wb);
      write_frame_size(cm, wb);
    } else {
-    if (!cm->show_frame)
+    // In spatial svc if it's not error_resilient_mode then we need to code all
+    // visible frames as invisible. But we need to keep the show_frame flag so
+    // that the publisher could know whether it is supposed to be visible.
+    // So we will code the show_frame flag as it is. Then code the intra_only
+    // bit here. This will make the bitstream incompatible. In the player we
+    // will change to show_frame flag to 0, then add an one byte frame with
+    // show_existing_frame flag which tells the decoder which frame we want to
+    // show.
+    if (!cm->show_frame ||
+        (is_spatial_svc(cpi) && cm->error_resilient_mode == 0))
        vp9_wb_write_bit(wb, cm->intra_only);
  
      if (!cm->error_resilient_mode)
diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c

index 8d7b07b..798da9b 100644 (file)
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2102,6 +2102,19 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
        cm->reset_frame_context = 2;
      }
    }
+  if (is_spatial_svc(cpi) && cm->error_resilient_mode == 0) {
+    cm->frame_context_idx = cpi->svc.spatial_layer_id;
+
+    // The probs will be updated based on the frame type of its previous
+    // frame if frame_parallel_decoding_mode is 0. The type may vary for
+    // the frame after a key frame in base layer since we may drop enhancement
+    // layers. So set frame_parallel_decoding_mode to 1 in this case.
+    if (cpi->svc.spatial_layer_id == 0 &&
+        cpi->svc.layer_context[0].last_frame_type == KEY_FRAME)
+      cm->frame_parallel_decoding_mode = 1;
+    else
+      cm->frame_parallel_decoding_mode = 0;
+  }
  
    // Configure experimental use of segmentation for enhanced coding of
    // static regions if indicated.
@@ -2277,8 +2290,12 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
    cm->last_height = cm->height;
  
    // reset to normal state now that we are done.
-  if (!cm->show_existing_frame)
-    cm->last_show_frame = cm->show_frame;
+  if (!cm->show_existing_frame) {
+    if (is_spatial_svc(cpi) && cm->error_resilient_mode == 0)
+      cm->last_show_frame = 0;
+    else
+      cm->last_show_frame = cm->show_frame;
+  }
  
    if (cm->show_frame) {
      vp9_swap_mi_and_prev_mi(cm);
@@ -2289,6 +2306,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi,
      if (cpi->use_svc)
        vp9_inc_frame_in_layer(&cpi->svc);
    }
+
+  if (is_spatial_svc(cpi))
+    cpi->svc.layer_context[cpi->svc.spatial_layer_id].last_frame_type =
+        cm->frame_type;
  }
  
  static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c

index bf949c4..91ac3d0 100644 (file)
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -36,6 +36,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
      int i;
      lc->current_video_frame_in_layer = 0;
      lc->layer_size = 0;
+    lc->last_frame_type = FRAME_TYPES;
      lrc->ni_av_qi = oxcf->worst_allowed_q;
      lrc->total_actual_bits = 0;
      lrc->total_target_vs_actual = 0;
diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h

index 801449b..bc306d3 100644 (file)
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -28,6 +28,7 @@ typedef struct {
    struct vpx_fixed_buf rc_twopass_stats_in;
    unsigned int current_video_frame_in_layer;
    int is_key_frame;
+  FRAME_TYPE last_frame_type;
    vpx_svc_parameters_t svc_params_received;
    struct lookahead_entry  *alt_ref_source;
    int alt_ref_idx;
diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c

index dc89b4e..2d7f8e4 100644 (file)
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -188,6 +188,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
      if (alt_ref_sum > REF_FRAMES - cfg->ss_number_layers)
        ERROR("Not enough ref buffers for svc alt ref frames");
    }
+  if (cfg->ss_number_layers > 3 && cfg->g_error_resilient == 0)
+    ERROR("Multiple frame contexts are not supported for more than 3 layers");
  #endif
  
    RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS);
diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c

index 7828615..45b0dca 100644 (file)
--- a/vpx/src/svc_encodeframe.c
+++ b/vpx/src/svc_encodeframe.c
@@ -86,6 +86,7 @@ typedef struct SvcInternal {
    int layers;
    int layer;
    int is_keyframe;
+  int use_multiple_frame_contexts;
  
    FrameData *frame_list;
    FrameData *frame_temp;
@@ -366,6 +367,7 @@ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) {
    char *option_name;
    char *option_value;
    char *input_ptr;
+  SvcInternal *const si = get_svc_internal(svc_ctx);
    vpx_codec_err_t res = VPX_CODEC_OK;
  
    if (options == NULL) return VPX_CODEC_OK;
@@ -393,6 +395,8 @@ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) {
      } else if (strcmp("auto-alt-refs", option_name) == 0) {
        res = parse_auto_alt_ref(svc_ctx, option_value);
        if (res != VPX_CODEC_OK) break;
+    } else if (strcmp("multi-frame-contexts", option_name) == 0) {
+      si->use_multiple_frame_contexts = atoi(option_value);
      } else {
        svc_log(svc_ctx, SVC_LOG_ERROR, "invalid option: %s\n", option_name);
        res = VPX_CODEC_INVALID_PARAM;
@@ -401,6 +405,10 @@ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) {
      option_name = strtok_r(NULL, "=", &input_ptr);
    }
    free(input_string);
+
+  if (si->use_multiple_frame_contexts && svc_ctx->spatial_layers > 3)
+    res = VPX_CODEC_INVALID_PARAM;
+
    return res;
  }
  
@@ -534,7 +542,8 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx,
    enc_cfg->rc_buf_initial_sz = 500;
    enc_cfg->rc_buf_optimal_sz = 600;
    enc_cfg->rc_buf_sz = 1000;
-  enc_cfg->g_error_resilient = 1;
+  if (enc_cfg->g_error_resilient == 0 && si->use_multiple_frame_contexts == 0)
+    enc_cfg->g_error_resilient = 1;
  
    // Initialize codec
    res = vpx_codec_enc_init(codec_ctx, iface, enc_cfg, VPX_CODEC_USE_PSNR);
author	Minghai Shang <minghai@google.com>
	Mon, 18 Aug 2014 21:51:04 +0000 (14:51 -0700)
committer	Minghai Shang <minghai@google.com>
	Mon, 25 Aug 2014 17:02:45 +0000 (10:02 -0700)
test/svc_test.cc		patch \| blob \| history
vp9/encoder/vp9_bitstream.c		patch \| blob \| history
vp9/encoder/vp9_encoder.c		patch \| blob \| history
vp9/encoder/vp9_svc_layercontext.c		patch \| blob \| history
vp9/encoder/vp9_svc_layercontext.h		patch \| blob \| history
vp9/vp9_cx_iface.c		patch \| blob \| history
vpx/src/svc_encodeframe.c		patch \| blob \| history