From 3529526e114d34ba6be0fab94a9d36abb512bee4 Mon Sep 17 00:00:00 2001 From: Jerome Jiang Date: Mon, 4 Mar 2019 15:51:22 -0800 Subject: [PATCH] vp9 svc: add simulcast mode when inter-layer pred is off. Force all upper spatial layers to be key frame if the base layer is key. Mode only works for inter-layer pred=off and non-flexible mode. Add flag to write out bitstream for each spatial layer in example encoder. Change-Id: I5db4543cf8697544ae49464f2157e692640d5256 --- examples/vp9_spatial_svc_encoder.c | 21 +++++++++--- test/svc_datarate_test.cc | 15 ++++++++ test/svc_end_to_end_test.cc | 18 +++++++++- vp9/encoder/vp9_encoder.c | 6 +++- vp9/encoder/vp9_ratectrl.c | 7 ++++ vp9/encoder/vp9_svc_layercontext.c | 70 ++++++++++++++++++++++++++++++++++++-- vp9/encoder/vp9_svc_layercontext.h | 5 +++ 7 files changed, 133 insertions(+), 9 deletions(-) diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c index 05fd4d9..e0c2a37 100644 --- a/examples/vp9_spatial_svc_encoder.c +++ b/examples/vp9_spatial_svc_encoder.c @@ -34,6 +34,8 @@ #define OUTPUT_RC_STATS 1 +#define SIMULCAST_MODE 0 + static const arg_def_t outputfile = ARG_DEF("o", "output", 1, "Output filename"); static const arg_def_t skip_frames_arg = @@ -749,7 +751,7 @@ static void set_frame_flags_bypass_mode_ex1( } } -#if CONFIG_VP9_DECODER +#if CONFIG_VP9_DECODER && !SIMULCAST_MODE static void test_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder, const int frames_out, int *mismatch_seen) { vpx_image_t enc_img, dec_img; @@ -834,12 +836,21 @@ static void svc_output_rc_stats( for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { unsigned int sl2; uint64_t tot_size = 0; +#if SIMULCAST_MODE + for (sl2 = 0; sl2 < sl; ++sl2) { + if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2]; + } + vpx_video_writer_write_frame(outfile[sl], + (uint8_t *)(cx_pkt->data.frame.buf) + tot_size, + (size_t)(sizes[sl]), cx_pkt->data.frame.pts); +#else for (sl2 = 0; sl2 <= sl; ++sl2) { if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2]; } if (tot_size > 0) vpx_video_writer_write_frame(outfile[sl], cx_pkt->data.frame.buf, (size_t)(tot_size), cx_pkt->data.frame.pts); +#endif // SIMULCAST_MODE } for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { if (cx_pkt->data.frame.spatial_layer_encoded[sl]) { @@ -924,7 +935,7 @@ int main(int argc, const char **argv) { #if CONFIG_INTERNAL_STATS FILE *f = fopen("opsnr.stt", "a"); #endif -#if CONFIG_VP9_DECODER +#if CONFIG_VP9_DECODER && !SIMULCAST_MODE int mismatch_seen = 0; vpx_codec_ctx_t decoder; #endif @@ -964,7 +975,7 @@ int main(int argc, const char **argv) { if (vpx_svc_init(&svc_ctx, &encoder, vpx_codec_vp9_cx(), &enc_cfg) != VPX_CODEC_OK) die("Failed to initialize encoder\n"); -#if CONFIG_VP9_DECODER +#if CONFIG_VP9_DECODER && !SIMULCAST_MODE if (vpx_codec_dec_init( &decoder, get_vpx_decoder_by_name("vp9")->codec_interface(), NULL, 0)) die("Failed to initialize decoder\n"); @@ -1163,7 +1174,7 @@ int main(int argc, const char **argv) { if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1) si->bytes_sum[0] += (int)cx_pkt->data.frame.sz; ++frames_received; -#if CONFIG_VP9_DECODER +#if CONFIG_VP9_DECODER && !SIMULCAST_MODE if (vpx_codec_decode(&decoder, cx_pkt->data.frame.buf, (unsigned int)cx_pkt->data.frame.sz, NULL, 0)) die_codec(&decoder, "Failed to decode frame."); @@ -1178,7 +1189,7 @@ int main(int argc, const char **argv) { default: { break; } } -#if CONFIG_VP9_DECODER +#if CONFIG_VP9_DECODER && !SIMULCAST_MODE vpx_codec_control(&encoder, VP9E_GET_SVC_LAYER_ID, &layer_id); // Don't look for mismatch on top spatial and top temporal layers as they // are non reference frames. diff --git a/test/svc_datarate_test.cc b/test/svc_datarate_test.cc index 024345a..e28e200 100644 --- a/test/svc_datarate_test.cc +++ b/test/svc_datarate_test.cc @@ -22,6 +22,19 @@ namespace svc_test { namespace { +typedef enum { + // Inter-layer prediction is on on all frames. + INTER_LAYER_PRED_ON, + // Inter-layer prediction is off on all frames. + INTER_LAYER_PRED_OFF, + // Inter-layer prediction is off on non-key frames and non-sync frames. + INTER_LAYER_PRED_OFF_NONKEY, + // Inter-layer prediction is on on all frames, but constrained such + // that any layer S (> 0) can only predict from previous spatial + // layer S-1, from the same superframe. + INTER_LAYER_PRED_ON_CONSTRAINED +} INTER_LAYER_PRED; + class DatarateOnePassCbrSvc : public OnePassCbrSvc { public: explicit DatarateOnePassCbrSvc(const ::libvpx_test::CodecFactory *codec) @@ -989,6 +1002,8 @@ class DatarateOnePassCbrSvcInterLayerPredSingleBR // pass CBR SVC: 3 spatial layers and 3 temporal layers. Run CIF clip with 1 // thread. TEST_P(DatarateOnePassCbrSvcInterLayerPredSingleBR, OnePassCbrSvc3SL3TL) { + // Disable test for inter-layer pred off for now since simulcast_mode fails. + if (inter_layer_pred_mode_ == INTER_LAYER_PRED_OFF) return; SetSvcConfig(3, 3); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; diff --git a/test/svc_end_to_end_test.cc b/test/svc_end_to_end_test.cc index eb52b06..82259ac 100644 --- a/test/svc_end_to_end_test.cc +++ b/test/svc_end_to_end_test.cc @@ -21,6 +21,19 @@ namespace svc_test { namespace { +typedef enum { + // Inter-layer prediction is on on all frames. + INTER_LAYER_PRED_ON, + // Inter-layer prediction is off on all frames. + INTER_LAYER_PRED_OFF, + // Inter-layer prediction is off on non-key frames and non-sync frames. + INTER_LAYER_PRED_OFF_NONKEY, + // Inter-layer prediction is on on all frames, but constrained such + // that any layer S (> 0) can only predict from previous spatial + // layer S-1, from the same superframe. + INTER_LAYER_PRED_ON_CONSTRAINED +} INTER_LAYER_PRED; + class ScalePartitionOnePassCbrSvc : public OnePassCbrSvc, public ::testing::TestWithParam { @@ -130,7 +143,10 @@ class SyncFrameOnePassCbrSvc : public OnePassCbrSvc, current_video_frame_ = video->frame(); PreEncodeFrameHookSetup(video, encoder); if (video->frame() == 0) { - encoder->Control(VP9E_SET_SVC_INTER_LAYER_PRED, inter_layer_pred_mode_); + // Do not turn off inter-layer pred completely because simulcast mode + // fails. + if (inter_layer_pred_mode_ != INTER_LAYER_PRED_OFF) + encoder->Control(VP9E_SET_SVC_INTER_LAYER_PRED, inter_layer_pred_mode_); encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_); if (intra_only_test_) // Decoder sets the color_space for Intra-only frames diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index ec52d74..72e7217 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -3093,7 +3093,11 @@ static void update_ref_frames(VP9_COMP *cpi) { } void vp9_update_reference_frames(VP9_COMP *cpi) { - update_ref_frames(cpi); + if (cpi->svc.simulcast_mode && is_one_pass_cbr_svc(cpi) && + cpi->common.frame_type == KEY_FRAME) + vp9_svc_update_ref_frame_key_simulcast(cpi); + else + update_ref_frames(cpi); #if CONFIG_VP9_TEMPORAL_DENOISING vp9_denoiser_update_ref_frame(cpi); diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 152efa7..aa26371 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -2209,6 +2209,13 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { } } + if (svc->simulcast_mode && svc->spatial_layer_id > 0 && + svc->layer_context[layer].is_key_frame == 1) { + cm->frame_type = KEY_FRAME; + cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); + target = calc_iframe_target_size_one_pass_cbr(cpi); + } + // Check if superframe contains a sync layer request. vp9_svc_check_spatial_layer_sync(cpi); diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 35155c7..787b0e3 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -54,6 +54,7 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { svc->superframe_has_layer_sync = 0; svc->use_set_ref_frame_config = 0; svc->num_encoded_top_layer = 0; + svc->simulcast_mode = 0; for (i = 0; i < REF_FRAMES; ++i) { svc->fb_idx_spatial_layer_id[i] = -1; @@ -474,6 +475,17 @@ static void reset_fb_idx_unused(VP9_COMP *const cpi) { } } +// Never refresh any reference frame buffers on top temporal layers in +// simulcast mode, which has interlayer prediction disabled. +static void non_reference_frame_simulcast(VP9_COMP *const cpi) { + if (cpi->svc.temporal_layer_id == cpi->svc.number_temporal_layers - 1 && + cpi->svc.temporal_layer_id > 0) { + cpi->ext_refresh_last_frame = 0; + cpi->ext_refresh_golden_frame = 0; + cpi->ext_refresh_alt_ref_frame = 0; + } +} + // The function sets proper ref_frame_flags, buffer indices, and buffer update // variables for temporal layering mode 3 - that does 0-2-1-2 temporal layering // scheme. @@ -578,6 +590,8 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) { cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; } + if (cpi->svc.simulcast_mode) non_reference_frame_simulcast(cpi); + reset_fb_idx_unused(cpi); } @@ -639,6 +653,8 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) { cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; } + if (cpi->svc.simulcast_mode) non_reference_frame_simulcast(cpi); + reset_fb_idx_unused(cpi); } @@ -673,6 +689,8 @@ static void set_flags_and_fb_idx_for_temporal_mode_noLayering( cpi->gld_fb_idx = 0; } + if (cpi->svc.simulcast_mode) non_reference_frame_simulcast(cpi); + reset_fb_idx_unused(cpi); } @@ -732,6 +750,15 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; LAYER_CONTEXT *lc = NULL; svc->skip_enhancement_layer = 0; + + if (svc->disable_inter_layer_pred == INTER_LAYER_PRED_OFF && + svc->number_spatial_layers <= 3 && svc->number_temporal_layers <= 3 && + !(svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS && + svc->use_set_ref_frame_config)) + svc->simulcast_mode = 1; + else + svc->simulcast_mode = 0; + if (svc->number_spatial_layers > 1) { svc->use_base_mv = 1; svc->use_partition_reuse = 1; @@ -1184,6 +1211,44 @@ static void vp9_svc_update_ref_frame_bypass_mode(VP9_COMP *const cpi) { } } +void vp9_svc_update_ref_frame_key_simulcast(VP9_COMP *const cpi) { + VP9_COMMON *const cm = &cpi->common; + SVC *const svc = &cpi->svc; + BufferPool *const pool = cm->buffer_pool; + const int sl_id = svc->spatial_layer_id; + const int tl_id = svc->temporal_layer_id; + const int num_sl = svc->number_spatial_layers; + // SL0: + // 3 spatial layers: update slot 0 and 3 + // 2 spatial layers: update slot 0 and 2 + // 1 spatial layer: update slot 0 and 1 + // SL1: + // 3 spatial layers: update slot 1, 4, and 6 + // 2 spatial layers: update slot 1, 3, and 6 + // slot 6 is for golden frame long temporal prediction. + // SL2: update slot 2, 5 and 7 + // slot 7 is for golden frame long temporal prediction. + ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[sl_id], cm->new_fb_idx); + ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[num_sl + sl_id], + cm->new_fb_idx); + svc->fb_idx_spatial_layer_id[sl_id] = sl_id; + svc->fb_idx_temporal_layer_id[sl_id] = tl_id; + svc->fb_idx_spatial_layer_id[num_sl + sl_id] = sl_id; + svc->fb_idx_temporal_layer_id[num_sl + sl_id] = tl_id; + // Update slots for golden frame long temporal prediction. + if (svc->use_gf_temporal_ref_current_layer) { + const int index = num_sl == 3 ? sl_id - 1 : sl_id; + const int lt_buffer_index = svc->buffer_gf_temporal_ref[index].idx; + ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[lt_buffer_index], + cm->new_fb_idx); + svc->fb_idx_spatial_layer_id[lt_buffer_index] = sl_id; + svc->fb_idx_temporal_layer_id[lt_buffer_index] = tl_id; + } + + vp9_copy_flags_ref_update_idx(cpi); + vp9_svc_update_ref_frame_buffer_idx(cpi); +} + void vp9_svc_update_ref_frame(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; SVC *const svc = &cpi->svc; @@ -1192,7 +1257,7 @@ void vp9_svc_update_ref_frame(VP9_COMP *const cpi) { if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS && svc->use_set_ref_frame_config) { vp9_svc_update_ref_frame_bypass_mode(cpi); - } else if (cm->frame_type == KEY_FRAME) { + } else if (cm->frame_type == KEY_FRAME && !svc->simulcast_mode) { // Keep track of frame index for each reference frame. int i; // On key frame update all reference frame slots. @@ -1203,7 +1268,7 @@ void vp9_svc_update_ref_frame(VP9_COMP *const cpi) { if (i != cpi->lst_fb_idx && i != cpi->gld_fb_idx && i != cpi->alt_fb_idx) ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[i], cm->new_fb_idx); } - } else { + } else if (cm->frame_type != KEY_FRAME) { if (cpi->refresh_last_frame) { svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] = svc->spatial_layer_id; svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] = svc->temporal_layer_id; @@ -1236,6 +1301,7 @@ void vp9_svc_adjust_avg_frame_qindex(VP9_COMP *const cpi) { // (to level closer to worst_quality) if the overshoot is significant. // Reset it for all temporal layers on base spatial layer. if (cm->frame_type == KEY_FRAME && cpi->oxcf.rc_mode == VPX_CBR && + !svc->simulcast_mode && rc->projected_frame_size > 3 * rc->avg_frame_bandwidth) { int tl; rc->avg_frame_qindex[INTER_FRAME] = diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index 34795d8..77d4382 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -189,6 +189,9 @@ typedef struct SVC { int64_t time_stamp_prev[VPX_SS_MAX_LAYERS]; int num_encoded_top_layer; + + // Every spatial layer on a superframe whose base is key is key too. + int simulcast_mode; } SVC; struct VP9_COMP; @@ -258,6 +261,8 @@ void vp9_svc_check_spatial_layer_sync(struct VP9_COMP *const cpi); void vp9_svc_update_ref_frame_buffer_idx(struct VP9_COMP *const cpi); +void vp9_svc_update_ref_frame_key_simulcast(struct VP9_COMP *const cpi); + void vp9_svc_update_ref_frame(struct VP9_COMP *const cpi); void vp9_svc_adjust_frame_rate(struct VP9_COMP *const cpi); -- 2.7.4