From e479379abb071050d45273c614c37253522bf7b0 Mon Sep 17 00:00:00 2001 From: Adrian Grange Date: Fri, 13 Jan 2012 14:09:40 -0800 Subject: [PATCH] Fixed bugs in multi-layer code related to changing params When running multi-layer (ML) encodes and dynamically changing coding parameters on the fly (e.g. frame duration/rate, bandwidths allocated to each layer) the encoder would not produce sensible output. In certain cases the rate targeting would be hideously inaccurate. These fixes make it possible to change these coding parameters correctly and to maintain accurate control of the rate targeting. I also added the specification of the input timebase into the test program, vp8_scalable_patterns.c. Patch 2: Moved declaration to appease MS compiler) Change-Id: Ic8bb5a16daa924bb64974e740696e040d07ae363 --- vp8/common/onyx.h | 6 ++- vp8/encoder/onyx_if.c | 137 ++++++++++++++++++++++++++++++++++++++---------- vp8/encoder/onyx_int.h | 4 ++ vp8/vp8_cx_iface.c | 4 ++ vp8_scalable_patterns.c | 43 +++++++++------ 5 files changed, 147 insertions(+), 47 deletions(-) diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h index d17a32b..eb7d545 100644 --- a/vp8/common/onyx.h +++ b/vp8/common/onyx.h @@ -147,10 +147,14 @@ extern "C" int over_shoot_pct; // buffering parameters - int64_t starting_buffer_level; // in seconds + int64_t starting_buffer_level; // in bytes int64_t optimal_buffer_level; int64_t maximum_buffer_size; + int64_t starting_buffer_level_in_ms; // in milli-seconds + int64_t optimal_buffer_level_in_ms; + int64_t maximum_buffer_size_in_ms; + // controlling quality int fixed_q; int worst_allowed_q; diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c index e3f9519..9223781 100644 --- a/vp8/encoder/onyx_if.c +++ b/vp8/encoder/onyx_if.c @@ -250,6 +250,9 @@ static void save_layer_context(VP8_COMP *cpi) lc->starting_buffer_level = cpi->oxcf.starting_buffer_level; lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level; lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size; + lc->starting_buffer_level_in_ms = cpi->oxcf.starting_buffer_level_in_ms; + lc->optimal_buffer_level_in_ms = cpi->oxcf.optimal_buffer_level_in_ms; + lc->maximum_buffer_size_in_ms = cpi->oxcf.maximum_buffer_size_in_ms; lc->buffer_level = cpi->buffer_level; lc->bits_off_target = cpi->bits_off_target; lc->total_actual_bits = cpi->total_actual_bits; @@ -287,6 +290,9 @@ static void restore_layer_context(VP8_COMP *cpi, const int layer) cpi->oxcf.starting_buffer_level = lc->starting_buffer_level; cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level; cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size; + cpi->oxcf.starting_buffer_level_in_ms = lc->starting_buffer_level_in_ms; + cpi->oxcf.optimal_buffer_level_in_ms = lc->optimal_buffer_level_in_ms; + cpi->oxcf.maximum_buffer_size_in_ms = lc->maximum_buffer_size_in_ms; cpi->buffer_level = lc->buffer_level; cpi->bits_off_target = lc->bits_off_target; cpi->total_actual_bits = lc->total_actual_bits; @@ -1254,6 +1260,8 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) if (cpi->frame_rate > 180) cpi->frame_rate = 30; + cpi->ref_frame_rate = cpi->frame_rate; + // change includes all joint functionality vp8_change_config(cpi, oxcf); @@ -1289,6 +1297,10 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) cpi->output_frame_rate / cpi->oxcf.rate_decimator[i]; lc->target_bandwidth = cpi->oxcf.target_bitrate[i] * 1000; + lc->starting_buffer_level_in_ms = oxcf->starting_buffer_level; + lc->optimal_buffer_level_in_ms = oxcf->optimal_buffer_level; + lc->maximum_buffer_size_in_ms = oxcf->maximum_buffer_size; + lc->starting_buffer_level = rescale(oxcf->starting_buffer_level, lc->target_bandwidth, 1000); @@ -1345,6 +1357,56 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) #endif } +void update_layer_contexts (VP8_COMP *cpi) +{ + VP8_CONFIG *oxcf = &cpi->oxcf; + + /* Update snapshots of the layer contexts to reflect new parameters */ + if (oxcf->number_of_layers > 1) + { + unsigned int i; + double prev_layer_frame_rate=0; + + for (i=0; inumber_of_layers; i++) + { + LAYER_CONTEXT *lc = &cpi->layer_context[i]; + + lc->frame_rate = + cpi->ref_frame_rate / oxcf->rate_decimator[i]; + lc->target_bandwidth = oxcf->target_bitrate[i] * 1000; + + lc->starting_buffer_level = rescale( + oxcf->starting_buffer_level_in_ms, + lc->target_bandwidth, 1000); + + if (oxcf->optimal_buffer_level == 0) + lc->optimal_buffer_level = lc->target_bandwidth / 8; + else + lc->optimal_buffer_level = rescale( + oxcf->optimal_buffer_level_in_ms, + lc->target_bandwidth, 1000); + + if (oxcf->maximum_buffer_size == 0) + lc->maximum_buffer_size = lc->target_bandwidth / 8; + else + lc->maximum_buffer_size = rescale( + oxcf->maximum_buffer_size_in_ms, + lc->target_bandwidth, 1000); + + // Work out the average size of a frame within this layer + if (i > 0) + lc->avg_frame_size_for_layer = (oxcf->target_bitrate[i] - + oxcf->target_bitrate[i-1]) * 1000 / + (lc->frame_rate - prev_layer_frame_rate); + + lc->active_worst_quality = oxcf->worst_allowed_q; + lc->active_best_quality = oxcf->best_allowed_q; + lc->avg_frame_qindex = oxcf->worst_allowed_q; + + prev_layer_frame_rate = lc->frame_rate; + } + } +} void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) { @@ -1485,9 +1547,12 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) // local file playback mode == really big buffer if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) { - cpi->oxcf.starting_buffer_level = 60000; - cpi->oxcf.optimal_buffer_level = 60000; - cpi->oxcf.maximum_buffer_size = 240000; + cpi->oxcf.starting_buffer_level = 60000; + cpi->oxcf.optimal_buffer_level = 60000; + cpi->oxcf.maximum_buffer_size = 240000; + cpi->oxcf.starting_buffer_level_in_ms = 60000; + cpi->oxcf.optimal_buffer_level_in_ms = 60000; + cpi->oxcf.maximum_buffer_size_in_ms = 240000; } // Convert target bandwidth from Kbit/s to Bit/s @@ -4256,14 +4321,15 @@ static void encode_frame_to_data_rate vp8_clear_system_state(); //__asm emms; - if (cpi->twopass.total_left_stats->coded_error != 0.0) - fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6d %6d" + if (cpi->twopass.total_left_stats.coded_error != 0.0) + fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %10d %6d %6d" "%6d %6d %6d %5d %5d %5d %8d %8.2f %10d %10.3f" "%10.3f %8d\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, (cpi->projected_frame_size - cpi->this_frame_target), (int)cpi->total_target_vs_actual, + cpi->buffer_level, (cpi->oxcf.starting_buffer_level-cpi->bits_off_target), (int)cpi->total_actual_bits, cm->base_qindex, cpi->active_best_quality, cpi->active_worst_quality, @@ -4274,18 +4340,19 @@ static void encode_frame_to_data_rate cm->frame_type, cpi->gfu_boost, cpi->twopass.est_max_qcorrection_factor, (int)cpi->twopass.bits_left, - cpi->twopass.total_left_stats->coded_error, + cpi->twopass.total_left_stats.coded_error, (double)cpi->twopass.bits_left / - cpi->twopass.total_left_stats->coded_error, + cpi->twopass.total_left_stats.coded_error, cpi->tot_recode_hits); else - fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6d %6d" + fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %10d %6d %6d" "%6d %6d %6d %5d %5d %5d %8d %8.2f %10d %10.3f" "%8d\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, (cpi->projected_frame_size - cpi->this_frame_target), (int)cpi->total_target_vs_actual, + cpi->buffer_level, (cpi->oxcf.starting_buffer_level-cpi->bits_off_target), (int)cpi->total_actual_bits, cm->base_qindex, cpi->active_best_quality, cpi->active_worst_quality, @@ -4296,7 +4363,7 @@ static void encode_frame_to_data_rate cm->frame_type, cpi->gfu_boost, cpi->twopass.est_max_qcorrection_factor, (int)cpi->twopass.bits_left, - cpi->twopass.total_left_stats->coded_error, + cpi->twopass.total_left_stats.coded_error, cpi->tot_recode_hits); fclose(f); @@ -4669,13 +4736,6 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l return -1; } - // Restore layer specific context if necessary - if (cpi->oxcf.number_of_layers > 1) - { - restore_layer_context (cpi, - cpi->oxcf.layer_id[cm->current_video_frame % cpi->oxcf.periodicity]); - } - if (cpi->source->ts_start < cpi->first_time_stamp_ever) { cpi->first_time_stamp_ever = cpi->source->ts_start; @@ -4683,16 +4743,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l } // adjust frame rates based on timestamps given - if (cpi->oxcf.number_of_layers > 1 ) - { - vp8_new_frame_rate ( - cpi, cpi->layer_context[cpi->current_layer].frame_rate); - - cpi->last_time_stamp_seen = cpi->source->ts_start; - cpi->last_end_time_stamp_seen = cpi->source->ts_end; - - } - else if (!cm->refresh_alt_ref_frame) + if (!cm->refresh_alt_ref_frame || (cpi->oxcf.number_of_layers > 1)) { int64_t this_duration; int step = 0; @@ -4717,7 +4768,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if (this_duration) { if (step) - vp8_new_frame_rate(cpi, 10000000.0 / this_duration); + cpi->ref_frame_rate = 10000000.0 / this_duration; else { double avg_duration, interval; @@ -4730,18 +4781,46 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l if(interval > 10000000.0) interval = 10000000; - avg_duration = 10000000.0 / cpi->frame_rate; + avg_duration = 10000000.0 / cpi->ref_frame_rate; avg_duration *= (interval - avg_duration + this_duration); avg_duration /= interval; - vp8_new_frame_rate(cpi, 10000000.0 / avg_duration); + cpi->ref_frame_rate = 10000000.0 / avg_duration; + } + + if (cpi->oxcf.number_of_layers > 1) + { + int i; + + // Update frame rates for each layer + for (i=0; ioxcf.number_of_layers; i++) + { + LAYER_CONTEXT *lc = &cpi->layer_context[i]; + lc->frame_rate = cpi->ref_frame_rate / + cpi->oxcf.rate_decimator[i]; + } } + else + vp8_new_frame_rate(cpi, cpi->ref_frame_rate); } cpi->last_time_stamp_seen = cpi->source->ts_start; cpi->last_end_time_stamp_seen = cpi->source->ts_end; } + if (cpi->oxcf.number_of_layers > 1) + { + int layer; + + update_layer_contexts (cpi); + + // Restore layer specific context & set frame rate + layer = cpi->oxcf.layer_id[ + cm->current_video_frame % cpi->oxcf.periodicity]; + restore_layer_context (cpi, layer); + vp8_new_frame_rate (cpi, cpi->layer_context[layer].frame_rate); + } + if (cpi->compressor_speed == 2) { if (cpi->oxcf.number_of_layers == 1) diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h index 46951e3..35efd3a 100644 --- a/vp8/encoder/onyx_int.h +++ b/vp8/encoder/onyx_int.h @@ -253,6 +253,9 @@ typedef struct int starting_buffer_level; int optimal_buffer_level; int maximum_buffer_size; + int starting_buffer_level_in_ms; + int optimal_buffer_level_in_ms; + int maximum_buffer_size_in_ms; int avg_frame_size_for_layer; @@ -421,6 +424,7 @@ typedef struct VP8_COMP int buffered_mode; double frame_rate; + double ref_frame_rate; int64_t buffer_level; int bits_off_target; diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c index abaf85b..f2f376a 100644 --- a/vp8/vp8_cx_iface.c +++ b/vp8/vp8_cx_iface.c @@ -335,6 +335,10 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, oxcf->under_shoot_pct = cfg.rc_undershoot_pct; oxcf->over_shoot_pct = cfg.rc_overshoot_pct; + oxcf->maximum_buffer_size_in_ms = cfg.rc_buf_sz; + oxcf->starting_buffer_level_in_ms = cfg.rc_buf_initial_sz; + oxcf->optimal_buffer_level_in_ms = cfg.rc_buf_optimal_sz; + oxcf->maximum_buffer_size = cfg.rc_buf_sz; oxcf->starting_buffer_level = cfg.rc_buf_initial_sz; oxcf->optimal_buffer_level = cfg.rc_buf_optimal_sz; diff --git a/vp8_scalable_patterns.c b/vp8_scalable_patterns.c index 41ecaa7..65883ff 100644 --- a/vp8_scalable_patterns.c +++ b/vp8_scalable_patterns.c @@ -129,27 +129,29 @@ int main(int argc, char **argv) { int got_data; int flags = 0; int i; + int pts = 0; // PTS starts at 0 + int frame_duration = 1; // 1 timebase tick per frame int layering_mode = 0; int frames_in_layer[MAX_LAYERS] = {0}; int layer_flags[MAX_PERIODICITY] = {0}; // Check usage and arguments - if (argc < 7) - die("Usage: %s " - " ... \n", argv[0]); + if (argc < 9) + die("Usage: %s " + " ... \n", argv[0]); width = strtol (argv[3], NULL, 0); height = strtol (argv[4], NULL, 0); if (width < 16 || width%2 || height <16 || height%2) die ("Invalid resolution: %d x %d", width, height); - if (!sscanf(argv[5], "%d", &layering_mode)) - die ("Invalid mode %s", argv[5]); + if (!sscanf(argv[7], "%d", &layering_mode)) + die ("Invalid mode %s", argv[7]); if (layering_mode<0 || layering_mode>6) - die ("Invalid mode (0..6) %s", argv[5]); + die ("Invalid mode (0..6) %s", argv[7]); - if (argc != 6+mode_to_num_layers[layering_mode]) + if (argc != 8+mode_to_num_layers[layering_mode]) die ("Invalid number of arguments"); if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 1)) @@ -168,8 +170,14 @@ int main(int argc, char **argv) { cfg.g_w = width; cfg.g_h = height; - for (i=6; i<6+mode_to_num_layers[layering_mode]; i++) - if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-6])) + // Timebase format e.g. 30fps: numerator=1, demoninator=30 + if (!sscanf (argv[5], "%d", &cfg.g_timebase.num )) + die ("Invalid timebase numerator %s", argv[5]); + if (!sscanf (argv[6], "%d", &cfg.g_timebase.den )) + die ("Invalid timebase denominator %s", argv[6]); + + for (i=8; i<8+mode_to_num_layers[layering_mode]; i++) + if (!sscanf(argv[i], "%d", &cfg.ts_target_bitrate[i-8])) die ("Invalid data rate %s", argv[i]); // Real time parameters @@ -193,7 +201,7 @@ int main(int argc, char **argv) { cfg.kf_min_dist = cfg.kf_max_dist = 1000; // Temporal scaling parameters: - // NOTE: The 3 prediction frames cannot be used interchangebly due to + // NOTE: The 3 prediction frames cannot be used interchangeably due to // differences in the way they are handled throughout the code. The // frames should be allocated to layers in the order LAST, GF, ARF. // Other combinations work, but may produce slightly inferior results. @@ -210,14 +218,15 @@ int main(int argc, char **argv) { cfg.ts_rate_decimator[1] = 1; memcpy(cfg.ts_layer_id, ids, sizeof(ids)); +#if 1 // 0=L, 1=GF, Intra-layer prediction enabled layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_REF_ARF; -#if 0 - // 0=L, 1=GF, Intra-layer 1 prediction disabled +#else + // 0=L, 1=GF, Intra-layer prediction disabled layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; @@ -275,7 +284,7 @@ int main(int argc, char **argv) { case 3: { // 3-layers, 4-frame period - int ids[6] = {0,2,1,2}; + int ids[4] = {0,2,1,2}; cfg.ts_number_layers = 3; cfg.ts_periodicity = 4; cfg.ts_rate_decimator[0] = 4; @@ -295,13 +304,12 @@ int main(int argc, char **argv) { VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; break; - cfg.ts_rate_decimator[2] = 1; } case 4: { // 3-layers, 4-frame period - int ids[6] = {0,2,1,2}; + int ids[4] = {0,2,1,2}; cfg.ts_number_layers = 3; cfg.ts_periodicity = 4; cfg.ts_rate_decimator[0] = 4; @@ -326,7 +334,7 @@ int main(int argc, char **argv) { case 5: { // 3-layers, 4-frame period - int ids[6] = {0,2,1,2}; + int ids[4] = {0,2,1,2}; cfg.ts_number_layers = 3; cfg.ts_periodicity = 4; cfg.ts_rate_decimator[0] = 4; @@ -417,7 +425,7 @@ int main(int argc, char **argv) { flags = layer_flags[frame_cnt % cfg.ts_periodicity]; frame_avail = read_frame(infile, &raw); - if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt, + if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts, 1, flags, VPX_DL_REALTIME)) die_codec(&codec, "Failed to encode frame"); @@ -446,6 +454,7 @@ int main(int argc, char **argv) { fflush (stdout); } frame_cnt++; + pts += frame_duration; } printf ("\n"); fclose (infile); -- 2.7.4