From d205335060940b527639a69a2ea15eaaf263a821 Mon Sep 17 00:00:00 2001 From: Minghai Shang Date: Wed, 19 Mar 2014 11:06:20 -0700 Subject: [PATCH] [svc] Finalize spatial svc first pass rate control 1. Save stats for each spatial layer 2. Add frame buffer management for svc first pass rc 3. Set default spatial layer to 1 4. Flush encoder at the end of stream in test app This only supports spatial svc. Change-Id: Ia89cfa87bb6394e6c0405b921d86c426d0a0c9ae --- examples/vp9_spatial_scalable_encoder.c | 20 +++++--- test/svc_test.cc | 35 ++++++++++++++ vp9/encoder/vp9_firstpass.c | 81 ++++++++++++++++++++++++++------- vp9/encoder/vp9_firstpass.h | 1 + vp9/encoder/vp9_onyx_if.c | 17 +++---- vp9/encoder/vp9_onyx_int.h | 4 ++ vp9/encoder/vp9_svc_layercontext.h | 8 ++-- vpx/src/svc_encodeframe.c | 68 +++++++++++++++------------ vpx/vpx_encoder.h | 2 +- 9 files changed, 172 insertions(+), 64 deletions(-) diff --git a/examples/vp9_spatial_scalable_encoder.c b/examples/vp9_spatial_scalable_encoder.c index 5333b11..5c80d34 100644 --- a/examples/vp9_spatial_scalable_encoder.c +++ b/examples/vp9_spatial_scalable_encoder.c @@ -264,6 +264,7 @@ int main(int argc, const char **argv) { int pts = 0; /* PTS starts at 0 */ int frame_duration = 1; /* 1 timebase tick per frame */ FILE *infile = NULL; + int end_of_stream = 0; memset(&svc_ctx, 0, sizeof(svc_ctx)); svc_ctx.log_print = 1; @@ -305,12 +306,15 @@ int main(int argc, const char **argv) { vpx_img_read(&raw, infile); // Encode frames - while (frame_cnt < app_input.frames_to_code) { - if (!vpx_img_read(&raw, infile)) - break; + while (!end_of_stream) { + if (frame_cnt >= app_input.frames_to_code || !vpx_img_read(&raw, infile)) { + // We need one extra vpx_svc_encode call at end of stream to flush + // encoder and get remaining data + end_of_stream = 1; + } - res = vpx_svc_encode(&svc_ctx, &codec, &raw, pts, frame_duration, - VPX_DL_REALTIME); + res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw), + pts, frame_duration, VPX_DL_REALTIME); printf("%s", vpx_svc_get_message(&svc_ctx)); if (res != VPX_CODEC_OK) { die_codec(&codec, "Failed to encode frame"); @@ -328,8 +332,10 @@ int main(int argc, const char **argv) { vpx_svc_get_rc_stats_buffer(&svc_ctx), vpx_svc_get_rc_stats_buffer_size(&svc_ctx)); } - ++frame_cnt; - pts += frame_duration; + if (!end_of_stream) { + ++frame_cnt; + pts += frame_duration; + } } printf("Processed %d frames\n", frame_cnt); diff --git a/test/svc_test.cc b/test/svc_test.cc index 2e56534..dff2ec7 100644 --- a/test/svc_test.cc +++ b/test/svc_test.cc @@ -362,4 +362,39 @@ TEST_F(SvcTest, GetLayerResolution) { EXPECT_EQ(kHeight * 8 / 16, layer_height); } +TEST_F(SvcTest, FirstPassEncode) { + svc_.spatial_layers = 2; + codec_enc_.g_pass = VPX_RC_FIRST_PASS; + vpx_svc_set_scale_factors(&svc_, "4/16,16/16"); + vpx_svc_set_quantizers(&svc_, "40,30", 0); + + vpx_codec_err_t res = + vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_); + ASSERT_EQ(VPX_CODEC_OK, res); + codec_initialized_ = true; + + libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight, + codec_enc_.g_timebase.den, + codec_enc_.g_timebase.num, 0, 30); + // FRAME 0 + video.Begin(); + res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), + video.duration(), VPX_DL_GOOD_QUALITY); + ASSERT_EQ(VPX_CODEC_OK, res); + EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U); + + // FRAME 1 + video.Next(); + res = vpx_svc_encode(&svc_, &codec_, video.img(), video.pts(), + video.duration(), VPX_DL_GOOD_QUALITY); + ASSERT_EQ(VPX_CODEC_OK, res); + EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U); + + // Flush encoder and test EOS packet + res = vpx_svc_encode(&svc_, &codec_, NULL, video.pts(), + video.duration(), VPX_DL_GOOD_QUALITY); + ASSERT_EQ(VPX_CODEC_OK, res); + EXPECT_GT(vpx_svc_get_rc_stats_buffer_size(&svc_), 0U); +} + } // namespace diff --git a/vp9/encoder/vp9_firstpass.c b/vp9/encoder/vp9_firstpass.c index cd4e3d6..0535775 100644 --- a/vp9/encoder/vp9_firstpass.c +++ b/vp9/encoder/vp9_firstpass.c @@ -174,11 +174,13 @@ static void zero_stats(FIRSTPASS_STATS *section) { section->new_mv_count = 0.0; section->count = 0.0; section->duration = 1.0; + section->spatial_layer_id = 0; } static void accumulate_stats(FIRSTPASS_STATS *section, const FIRSTPASS_STATS *frame) { section->frame += frame->frame; + section->spatial_layer_id = frame->spatial_layer_id; section->intra_error += frame->intra_error; section->coded_error += frame->coded_error; section->sr_coded_error += frame->sr_coded_error; @@ -335,7 +337,15 @@ void vp9_init_first_pass(VP9_COMP *cpi) { } void vp9_end_first_pass(VP9_COMP *cpi) { - output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list); + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { + int i; + for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { + output_stats(&cpi->svc.layer_context[i].twopass.total_stats, + cpi->output_pkt_list); + } + } else { + output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list); + } } static vp9_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) { @@ -457,11 +467,11 @@ void vp9_first_pass(VP9_COMP *cpi) { int recon_yoffset, recon_uvoffset; YV12_BUFFER_CONFIG *const lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); - YV12_BUFFER_CONFIG *const gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME); + YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME); YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm); - const int recon_y_stride = lst_yv12->y_stride; - const int recon_uv_stride = lst_yv12->uv_stride; - const int uv_mb_height = 16 >> (lst_yv12->y_height > lst_yv12->uv_height); + int recon_y_stride = lst_yv12->y_stride; + int recon_uv_stride = lst_yv12->uv_stride; + int uv_mb_height = 16 >> (lst_yv12->y_height > lst_yv12->uv_height); int64_t intra_error = 0; int64_t coded_error = 0; int64_t sr_coded_error = 0; @@ -477,13 +487,43 @@ void vp9_first_pass(VP9_COMP *cpi) { int new_mv_count = 0; int sum_in_vectors = 0; uint32_t lastmv_as_int = 0; - struct twopass_rc *const twopass = &cpi->twopass; + struct twopass_rc *twopass = &cpi->twopass; const MV zero_mv = {0, 0}; + const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12; vp9_clear_system_state(); + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { + MV_REFERENCE_FRAME ref_frame = LAST_FRAME; + const YV12_BUFFER_CONFIG *scaled_ref_buf = NULL; + twopass = &cpi->svc.layer_context[cpi->svc.spatial_layer_id].twopass; + + vp9_scale_references(cpi); + + // Use either last frame or alt frame for motion search. + if (cpi->ref_frame_flags & VP9_LAST_FLAG) { + scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME); + ref_frame = LAST_FRAME; + } else if (cpi->ref_frame_flags & VP9_ALT_FLAG) { + scaled_ref_buf = vp9_get_scaled_ref_frame(cpi, ALTREF_FRAME); + ref_frame = ALTREF_FRAME; + } + + if (scaled_ref_buf != NULL) { + // Update the stride since we are using scaled reference buffer + first_ref_buf = scaled_ref_buf; + recon_y_stride = first_ref_buf->y_stride; + recon_uv_stride = first_ref_buf->uv_stride; + uv_mb_height = 16 >> (first_ref_buf->y_height > first_ref_buf->uv_height); + } + + // Disable golden frame for svc first pass for now. + gld_yv12 = NULL; + set_ref_ptrs(cm, xd, ref_frame, NONE); + } + vp9_setup_src_planes(x, cpi->Source, 0, 0); - vp9_setup_pre_planes(xd, 0, lst_yv12, 0, 0, NULL); + vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL); vp9_setup_dst_planes(xd, new_yv12, 0, 0); xd->mi_8x8 = cm->mi_grid_visible; @@ -576,7 +616,7 @@ void vp9_first_pass(VP9_COMP *cpi) { int tmp_err, motion_error; int_mv mv, tmp_mv; - xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset; + xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; motion_error = zz_motion_search(x); // Assume 0,0 motion with no mv overhead. mv.as_int = tmp_mv.as_int = 0; @@ -608,7 +648,7 @@ void vp9_first_pass(VP9_COMP *cpi) { } // Search in an older reference frame. - if (cm->current_video_frame > 1) { + if (cm->current_video_frame > 1 && gld_yv12 != NULL) { // Assume 0,0 motion with no mv overhead. int gf_motion_error; @@ -626,9 +666,9 @@ void vp9_first_pass(VP9_COMP *cpi) { ++second_ref_count; // Reset to last frame as reference buffer. - xd->plane[0].pre[0].buf = lst_yv12->y_buffer + recon_yoffset; - xd->plane[1].pre[0].buf = lst_yv12->u_buffer + recon_uvoffset; - xd->plane[2].pre[0].buf = lst_yv12->v_buffer + recon_uvoffset; + xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; + xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset; + xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset; // In accumulating a score for the older reference frame take the // best of the motion predicted score and the intra coded error @@ -736,6 +776,7 @@ void vp9_first_pass(VP9_COMP *cpi) { FIRSTPASS_STATS fps; fps.frame = cm->current_video_frame; + fps.spatial_layer_id = cpi->svc.spatial_layer_id; fps.intra_error = (double)(intra_error >> 8); fps.coded_error = (double)(coded_error >> 8); fps.sr_coded_error = (double)(sr_coded_error >> 8); @@ -785,20 +826,28 @@ void vp9_first_pass(VP9_COMP *cpi) { (twopass->this_frame_stats.pcnt_inter > 0.20) && ((twopass->this_frame_stats.intra_error / DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) { - vp8_yv12_copy_frame(lst_yv12, gld_yv12); + if (gld_yv12 != NULL) { + vp8_yv12_copy_frame(lst_yv12, gld_yv12); + } twopass->sr_update_lag = 1; } else { ++twopass->sr_update_lag; } - // Swap frame pointers so last frame refers to the frame we just compressed. - swap_yv12(lst_yv12, new_yv12); + + if (cpi->use_svc && cpi->svc.number_temporal_layers == 1) { + vp9_update_reference_frames(cpi); + } else { + // Swap frame pointers so last frame refers to the frame we just compressed. + swap_yv12(lst_yv12, new_yv12); + } vp9_extend_frame_borders(lst_yv12); // Special case for the first frame. Copy into the GF buffer as a second // reference. - if (cm->current_video_frame == 0) + if (cm->current_video_frame == 0 && gld_yv12 != NULL) { vp8_yv12_copy_frame(lst_yv12, gld_yv12); + } // Use this to see what the first pass reconstruction looks like. if (0) { diff --git a/vp9/encoder/vp9_firstpass.h b/vp9/encoder/vp9_firstpass.h index 278b22c..bf7b5a1 100644 --- a/vp9/encoder/vp9_firstpass.h +++ b/vp9/encoder/vp9_firstpass.h @@ -35,6 +35,7 @@ typedef struct { double new_mv_count; double duration; double count; + int spatial_layer_id; } FIRSTPASS_STATS; struct twopass_rc { diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index ca91a67..c040cfa 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -2460,7 +2460,7 @@ static int recode_loop_test(const VP9_COMP *cpi, return force_recode; } -static void update_reference_frames(VP9_COMP * const cpi) { +void vp9_update_reference_frames(VP9_COMP *cpi) { VP9_COMMON * const cm = &cpi->common; // At this point the new frame has been encoded. @@ -2544,7 +2544,7 @@ static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { vp9_extend_frame_inner_borders(cm->frame_to_show); } -static void scale_references(VP9_COMP *cpi) { +void vp9_scale_references(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; MV_REFERENCE_FRAME ref_frame; @@ -2981,7 +2981,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } else { cpi->Source = cpi->un_scaled_source; } - scale_references(cpi); + vp9_scale_references(cpi); vp9_clear_system_state(); @@ -3159,7 +3159,7 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, update_reference_segmentation_map(cpi); release_scaled_references(cpi); - update_reference_frames(cpi); + vp9_update_reference_frames(cpi); for (t = TX_4X4; t <= TX_32X32; t++) full_to_model_counts(cm->counts.coef[t], cpi->coef_counts[t]); @@ -3585,12 +3585,13 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, vp9_vaq_init(); } - if (cpi->use_svc) { - SvcEncode(cpi, size, dest, frame_flags); - } else if (cpi->pass == 1) { + if (cpi->pass == 1 && + (!cpi->use_svc || cpi->svc.number_temporal_layers == 1)) { Pass1Encode(cpi, size, dest, frame_flags); - } else if (cpi->pass == 2) { + } else if (cpi->pass == 2 && !cpi->use_svc) { Pass2Encode(cpi, size, dest, frame_flags); + } else if (cpi->use_svc) { + SvcEncode(cpi, size, dest, frame_flags); } else { // One pass encode Pass0Encode(cpi, size, dest, frame_flags); diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index ad1dd9b..e55a2f3 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -938,6 +938,10 @@ int vp9_compute_qdelta(const VP9_COMP *cpi, double qstart, double qtarget); int vp9_compute_qdelta_by_rate(VP9_COMP *cpi, int base_q_index, double rate_target_ratio); +void vp9_scale_references(VP9_COMP *cpi); + +void vp9_update_reference_frames(VP9_COMP *cpi); + static int get_token_alloc(int mb_rows, int mb_cols) { return mb_rows * mb_cols * (48 * 16 + 4); } diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index c7a4c06..e81b0b7 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -27,6 +27,7 @@ typedef struct { int64_t maximum_buffer_size; double framerate; int avg_frame_size; + struct twopass_rc twopass; } LAYER_CONTEXT; typedef struct { @@ -34,9 +35,10 @@ typedef struct { int temporal_layer_id; int number_spatial_layers; int number_temporal_layers; - // Layer context used for rate control in CBR mode, only defined for - // temporal layers for now. - LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS]; + // Layer context used for rate control in temporal CBR mode or spatial + // two pass mode. Defined for temporal or spatial layers for now. + // Does not support temporal combined with spatial RC. + LAYER_CONTEXT layer_context[MAX(VPX_TS_MAX_LAYERS, VPX_SS_MAX_LAYERS)]; } SVC; struct VP9_COMP; diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c index 4f3f7ec..d48a761 100644 --- a/vpx/src/svc_encodeframe.c +++ b/vpx/src/svc_encodeframe.c @@ -850,7 +850,7 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, struct LayerData *layer_data; struct Superframe superframe; SvcInternal *const si = get_svc_internal(svc_ctx); - if (svc_ctx == NULL || codec_ctx == NULL || rawimg == NULL || si == NULL) { + if (svc_ctx == NULL || codec_ctx == NULL || si == NULL) { return VPX_CODEC_INVALID_PARAM; } @@ -866,9 +866,12 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, si->is_keyframe = (si->frame_within_gop == 0); si->frame_size = 0; - svc_log(svc_ctx, SVC_LOG_DEBUG, - "vpx_svc_encode layers: %d, frame_count: %d, frame_within_gop: %d\n", - si->layers, si->encode_frame_count, si->frame_within_gop); + if (rawimg != NULL) { + svc_log(svc_ctx, SVC_LOG_DEBUG, + "vpx_svc_encode layers: %d, frame_count: %d, " + "frame_within_gop: %d\n", si->layers, si->encode_frame_count, + si->frame_within_gop); + } // encode each layer for (si->layer = 0; si->layer < si->layers; ++si->layer) { @@ -877,9 +880,11 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, svc_log(svc_ctx, SVC_LOG_DEBUG, "Skip encoding layer %d\n", si->layer); continue; } - calculate_enc_frame_flags(svc_ctx); - set_svc_parameters(svc_ctx, codec_ctx); + if (rawimg != NULL) { + calculate_enc_frame_flags(svc_ctx); + set_svc_parameters(svc_ctx, codec_ctx); + } res = vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration, si->enc_frame_flags, deadline); @@ -953,34 +958,39 @@ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, } } } + if (rawimg == NULL) { + break; + } } - // add superframe index to layer data list - sf_create_index(&superframe); - layer_data = ld_create(superframe.buffer, superframe.index_size); - ld_list_add(&cx_layer_list, layer_data); - - // get accumulated size of layer data - si->frame_size = ld_list_get_buffer_size(cx_layer_list); - if (si->frame_size == 0) return VPX_CODEC_ERROR; + if (codec_ctx->config.enc->g_pass != VPX_RC_FIRST_PASS) { + // add superframe index to layer data list + sf_create_index(&superframe); + layer_data = ld_create(superframe.buffer, superframe.index_size); + ld_list_add(&cx_layer_list, layer_data); + + // get accumulated size of layer data + si->frame_size = ld_list_get_buffer_size(cx_layer_list); + if (si->frame_size > 0) { + // all layers encoded, create single buffer with concatenated layers + if (si->frame_size > si->buffer_size) { + free(si->buffer); + si->buffer = malloc(si->frame_size); + if (si->buffer == NULL) { + ld_list_free(cx_layer_list); + return VPX_CODEC_MEM_ERROR; + } + si->buffer_size = si->frame_size; + } + // copy layer data into packet + ld_list_copy_to_buffer(cx_layer_list, (uint8_t *)si->buffer); - // all layers encoded, create single buffer with concatenated layers - if (si->frame_size > si->buffer_size) { - free(si->buffer); - si->buffer = malloc(si->frame_size); - if (si->buffer == NULL) { ld_list_free(cx_layer_list); - return VPX_CODEC_MEM_ERROR; + + svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, " + "pts: %d\n", si->encode_frame_count, si->is_keyframe, + (int)si->frame_size, (int)pts); } - si->buffer_size = si->frame_size; } - // copy layer data into packet - ld_list_copy_to_buffer(cx_layer_list, (uint8_t *)si->buffer); - - ld_list_free(cx_layer_list); - - svc_log(svc_ctx, SVC_LOG_DEBUG, "SVC frame: %d, kf: %d, size: %d, pts: %d\n", - si->encode_frame_count, si->is_keyframe, (int)si->frame_size, - (int)pts); ++si->frame_within_gop; ++si->encode_frame_count; diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h index 851ff1a..2c882c1 100644 --- a/vpx/vpx_encoder.h +++ b/vpx/vpx_encoder.h @@ -49,7 +49,7 @@ extern "C" { #define VPX_SS_MAX_LAYERS 5 /*! Spatial Scalability: Default number of coding layers */ -#define VPX_SS_DEFAULT_LAYERS 3 +#define VPX_SS_DEFAULT_LAYERS 1 /*!\brief Current ABI version number * -- 2.7.4