Improved 1-pass CBR rate control
authorJohn Koleszar <jkoleszar@google.com>
Wed, 29 Jun 2011 15:41:50 +0000 (11:41 -0400)
committerJohn Koleszar <jkoleszar@google.com>
Mon, 18 Jul 2011 15:48:05 +0000 (11:48 -0400)
This patch attempts to improve the handling of CBR streams with
respect to the short term buffering requirements. The "buffer level"
is changed to be an average over the rc buffer, rather than a long
running average. Overshoot is also tracked over the same interval
and the golden frame targets suppressed accordingly to correct for
overly aggressive boosting.

Testing shows that this is fairly consistently positive in one
metric or another -- some clips that show significant decreases
in quality have better buffering characteristics, others show
improvenents in both.

Change-Id: I924c89aa9bdb210271f2e03311e63de3f1f8f920

vp8/common/onyxd.h
vp8/encoder/firstpass.c
vp8/encoder/onyx_if.c
vp8/encoder/onyx_int.h
vp8/encoder/ratectrl.c

index 8a4703a..2dfdc66 100644 (file)
@@ -18,6 +18,7 @@
 extern "C"
 {
 #endif
+#include "vpx/vpx_codec.h"
 #include "type_aliases.h"
 #include "vpx_scale/yv12config.h"
 #include "ppflags.h"
index 881481b..795388d 100644 (file)
@@ -357,33 +357,58 @@ static int frame_max_bits(VP8_COMP *cpi)
     int max_bits;
 
     // For CBR we need to also consider buffer fullness.
-    // If we are running below the optimal level then we need to gradually tighten up on max_bits.
     if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
     {
-        double buffer_fullness_ratio = (double)cpi->buffer_level / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.optimal_buffer_level);
+        max_bits = 2 * cpi->av_per_frame_bandwidth;
+        max_bits -= cpi->buffered_av_per_frame_bandwidth;
+        max_bits *= ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0);
+    }
+    // VBR
+    else
+    {
+        // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user
+        max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats->count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
+    }
+
+    // Trap case where we are out of bits
+    if (max_bits < 0)
+        max_bits = 0;
 
-        // For CBR base this on the target average bits per frame plus the maximum sedction rate passed in by the user
-        max_bits = (int)(cpi->av_per_frame_bandwidth * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
+    return max_bits;
+}
 
-        // If our buffer is below the optimum level
-        if (buffer_fullness_ratio < 1.0)
-        {
-            // The lower of max_bits / 4 or cpi->av_per_frame_bandwidth / 4.
-            int min_max_bits = ((cpi->av_per_frame_bandwidth >> 2) < (max_bits >> 2)) ? cpi->av_per_frame_bandwidth >> 2 : max_bits >> 2;
 
-            max_bits = (int)(max_bits * buffer_fullness_ratio);
+static int gf_group_max_bits(VP8_COMP *cpi)
+{
+    // Max allocation for a golden frame group
+    int max_bits;
 
-            if (max_bits < min_max_bits)
-                max_bits = min_max_bits;       // Lowest value we will set ... which should allow the buffer to refil.
+    // For CBR we need to also consider buffer fullness.
+    if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
+    {
+        max_bits = cpi->av_per_frame_bandwidth * cpi->baseline_gf_interval;
+        if (max_bits > cpi->oxcf.optimal_buffer_level)
+        {
+            max_bits -= cpi->oxcf.optimal_buffer_level;
+            max_bits += cpi->buffer_level;
         }
+        else
+        {
+            max_bits -= (cpi->buffered_av_per_frame_bandwidth
+                         - cpi->av_per_frame_bandwidth)
+                        * cpi->baseline_gf_interval;
+        }
+
+        max_bits *= ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0);
     }
-    // VBR
     else
     {
         // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user
         max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats->count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
+        max_bits *=  cpi->baseline_gf_interval;
     }
 
+
     // Trap case where we are out of bits
     if (max_bits < 0)
         max_bits = 0;
@@ -1601,7 +1626,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     double abs_mv_in_out_accumulator = 0.0;
     double mod_err_per_mb_accumulator = 0.0;
 
-    int max_bits = frame_max_bits(cpi);     // Max for a single frame
+    int max_group_bits;
 
     unsigned int allow_alt_ref =
                     cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames;
@@ -1963,8 +1988,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 
     // Clip cpi->twopass.gf_group_bits based on user supplied data rate
     // variability limit (cpi->oxcf.two_pass_vbrmax_section)
-    if (cpi->twopass.gf_group_bits > max_bits * cpi->baseline_gf_interval)
-        cpi->twopass.gf_group_bits = max_bits * cpi->baseline_gf_interval;
+    max_group_bits = gf_group_max_bits(cpi);
+    if (cpi->twopass.gf_group_bits > max_group_bits)
+        cpi->twopass.gf_group_bits = max_group_bits;
 
     // Reset the file position
     reset_fpf_position(cpi, start_pos);
@@ -2064,13 +2090,6 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
             }
         }
 
-        // Apply an additional limit for CBR
-        if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
-        {
-            if (cpi->twopass.gf_bits > (cpi->buffer_level >> 1))
-                cpi->twopass.gf_bits = cpi->buffer_level >> 1;
-        }
-
         // Dont allow a negative value for gf_bits
         if (gf_bits < 0)
             gf_bits = 0;
index d719f36..ba8793d 100644 (file)
@@ -1455,6 +1455,7 @@ static void init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
     cpi->rolling_actual_bits          = cpi->av_per_frame_bandwidth;
     cpi->long_rolling_target_bits     = cpi->av_per_frame_bandwidth;
     cpi->long_rolling_actual_bits     = cpi->av_per_frame_bandwidth;
+    cpi->buffered_av_per_frame_bandwidth = cpi->av_per_frame_bandwidth;
 
     cpi->total_actual_bits            = 0;
     cpi->total_target_vs_actual       = 0;
@@ -1550,7 +1551,7 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
         break;
     }
 
-    if (cpi->pass == 0)
+    if (cpi->pass == 0 && cpi->oxcf.end_usage != USAGE_STREAM_FROM_SERVER)
         cpi->auto_worst_q = 1;
 
     cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q];
@@ -3187,6 +3188,116 @@ void loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm)
 
 }
 
+
+static void update_buffer_level(VP8_COMP *cpi)
+{
+    long long tmp;
+
+    /* Update the buffered average bitrate.
+     *
+     * The buffered average bitrate tracks the bitrate over the buffer
+     * window. Here we simulate taking a frame of average size out
+     * of the buffer, and putting in the new frame just encoded.
+     * It is calculated accordingly:
+     *
+     * A = Average Bits Per Frame In The Buffer
+     * P = New Frame Size
+     * N = Number of bits in the buffer
+     *
+     * We recalculate the average as so:
+     *      (N-A)*A + A*P    A * (N - A + P)
+     * A' = ------------- =  ---------------
+     *            N                 N
+     *
+     * This is modeled after a the standard algorithm for a moving
+     * average with fixed weighting (eg A' = ((N-1)*A + 1*P) / N). This makes
+     * the step response nonlinear but consistent with expected behavior --
+     * when A is large, the model adapts more quickly, since there are
+     * fewer frames in the buffer and conversely when A is small there
+     * will be more frames in the buffer so the average will adapt
+     * slowly.
+     *
+     * TODO(jkoleszar): This may give poor step response in some situations,
+     * for example motion following a long static section. It might be
+     * worth experimenting more with weighting by av_per_frame_bandwidth
+     * rather than buffered_av_per_frame_bandwidth or using a more accurate
+     * algorithm to get faster response. Current testing showed worse results
+     * with that setting though.
+     *
+     */
+
+    /* Guard against buffered_av_per_frame_bandwidth falling to 0. Should
+     * never happen, but without this check, it would be irrecoverable.
+     */
+    if(cpi->buffered_av_per_frame_bandwidth == 0)
+        cpi->buffered_av_per_frame_bandwidth = 1;
+
+    tmp = cpi->oxcf.maximum_buffer_size
+                - cpi->buffered_av_per_frame_bandwidth
+                + cpi->projected_frame_size;
+    tmp *= cpi->buffered_av_per_frame_bandwidth;
+    cpi->buffered_av_per_frame_bandwidth = tmp
+                                           / cpi->oxcf.maximum_buffer_size;
+
+    if(cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
+    {
+        /* In CBR mode, buffer level is synthesized from the buffered
+         * average per-frame bandwidth to get the response characteristics
+         * of that model, rather than using the unbounded (wrt buffer size)
+         * bits_off_target. ie, the long term average bitrate doesn't
+         * matter in CBR mode. If the clip is consistently undershooting
+         * because it is very static, for example, you don't want to blow
+         * your short term bitrate budget trying to the the long term spend
+         * up to the target when you hit a motion section.
+         *
+         * Instead, the ratio of buffered_av_per_frame_bandwidth to the
+         * target av_per_frame_bandwidth is taken, scaled by
+         * maximum_buffer_size and centered around optimal_buffer_level,
+         * which presents the expected behavior of buffer_level for the other
+         * parts of the rate control code which handle the targeting.
+         *
+         * Note that this only happens after the starting_buffer_level
+         * has passed, to give the model a chance to stabilize.
+         */
+        if(cpi->total_actual_bits > cpi->oxcf.starting_buffer_level)
+        {
+            tmp = (long long)cpi->buffered_av_per_frame_bandwidth
+                  * cpi->oxcf.maximum_buffer_size
+                  / cpi->av_per_frame_bandwidth;
+            cpi->buffer_level = cpi->oxcf.maximum_buffer_size
+                                - tmp
+                                + cpi->oxcf.optimal_buffer_level;
+        }
+        else
+            cpi->buffer_level = cpi->oxcf.optimal_buffer_level;
+
+        /* Accumulate recent overshoot error.
+         *
+         * If this frame is larger than the target, then accumulate
+         * that error to apply as a damping factor later. Only care about
+         * recent overshoot, so this value decays by (N-P)/N
+         */
+        if(cpi->total_actual_bits > cpi->oxcf.starting_buffer_level)
+        {
+            long long decayed_overshoot;
+
+            decayed_overshoot = cpi->accumulated_overshoot;
+            decayed_overshoot *= (cpi->oxcf.maximum_buffer_size
+                                  - cpi->projected_frame_size);
+            decayed_overshoot /= cpi->oxcf.maximum_buffer_size;
+            cpi->accumulated_overshoot = decayed_overshoot;
+
+            cpi->accumulated_overshoot +=
+                (cpi->projected_frame_size > cpi->av_per_frame_bandwidth)
+                ? cpi->projected_frame_size - cpi->av_per_frame_bandwidth
+                : 0;
+        }
+    }
+    else
+        cpi->buffer_level = cpi->bits_off_target;
+}
+
+
 static void encode_frame_to_data_rate
 (
     VP8_COMP *cpi,
@@ -3432,7 +3543,8 @@ static void encode_frame_to_data_rate
     // For CBR if the buffer reaches its maximum level then we can no longer
     // save up bits for later frames so we might as well use them up
     // on the current frame.
-    if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) &&
+    if (cpi->pass == 2
+        && (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) &&
         (cpi->buffer_level >= cpi->oxcf.optimal_buffer_level) && cpi->buffered_mode)
     {
         int Adjustment = cpi->active_worst_quality / 4;       // Max adjustment is 1/4
@@ -3523,6 +3635,10 @@ static void encode_frame_to_data_rate
         }
         else
         {
+            if(cpi->pass != 2)
+                Q = cpi->auto_worst_q?
+                    cpi->active_worst_quality:cpi->avg_frame_qindex;
+
             cpi->active_best_quality = inter_minq[Q];
 
             // For the constant/constrained quality mode we dont want
@@ -3824,15 +3940,17 @@ static void encode_frame_to_data_rate
             (cpi->active_worst_quality < cpi->worst_quality)      &&
             (cpi->projected_frame_size > frame_over_shoot_limit))
         {
-            int over_size_percent = ((cpi->projected_frame_size - frame_over_shoot_limit) * 100) / frame_over_shoot_limit;
+            /* step down active_worst_quality such that the corresponding
+             * active_best_quality will be equal to the current
+             * active_worst_quality + 1. Once the limit on active_best_quality
+             * is reached, active_worst_quality will equal worst_quality.
+             */
+            int i;
 
-            // If so is there any scope for relaxing it
-            while ((cpi->active_worst_quality < cpi->worst_quality) && (over_size_percent > 0))
-            {
-                cpi->active_worst_quality++;
-                top_index = cpi->active_worst_quality;
-                over_size_percent = (int)(over_size_percent * 0.96);        // Assume 1 qstep = about 4% on frame size.
-            }
+            for(i=cpi->active_worst_quality; i<cpi->worst_quality; i++)
+                if(inter_minq[i] >= cpi->active_worst_quality + 1)
+                    break;
+            cpi->active_worst_quality = i;
 
             // If we have updated the active max Q do not call vp8_update_rate_correction_factors() this loop.
             active_worst_qchanged = TRUE;
@@ -4220,10 +4338,9 @@ static void encode_frame_to_data_rate
 
     // Update the buffer level variable.
     // Non-viewable frames are a special case and are treated as pure overhead.
-    if ( !cm->show_frame )
-        cpi->bits_off_target -= cpi->projected_frame_size;
-    else
-        cpi->bits_off_target += cpi->av_per_frame_bandwidth - cpi->projected_frame_size;
+    if ( cm->show_frame )
+        cpi->bits_off_target += cpi->av_per_frame_bandwidth;
+    cpi->bits_off_target -= cpi->projected_frame_size;
 
     // Rolling monitors of whether we are over or underspending used to help regulate min and Max Q in two pass.
     cpi->rolling_target_bits = ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4;
@@ -4237,7 +4354,7 @@ static void encode_frame_to_data_rate
     // Debug stats
     cpi->total_target_vs_actual += (cpi->this_frame_target - cpi->projected_frame_size);
 
-    cpi->buffer_level = cpi->bits_off_target;
+    update_buffer_level(cpi);
 
     // Update bits left to the kf and gf groups to account for overshoot or undershoot on these frames
     if (cm->frame_type == KEY_FRAME)
index be79cb0..341ece3 100644 (file)
@@ -348,6 +348,10 @@ typedef struct VP8_COMP
     int per_frame_bandwidth;          // Current section per frame bandwidth target
     int av_per_frame_bandwidth;        // Average frame size target for clip
     int min_frame_bandwidth;          // Minimum allocation that should be used for any frame
+    int buffered_av_per_frame_bandwidth; // Average bitrate over the last buffer
+    int buffered_av_per_frame_bandwidth_rem; // Average bitrate remainder
+    int accumulated_overshoot;           // Accumulated # of bits spent > target
+
     int inter_frame_target;
     double output_frame_rate;
     long long last_time_stamp_seen;
index 54c394d..70e133f 100644 (file)
@@ -608,7 +608,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
     int min_frame_target;
     int Adjustment;
 
-    min_frame_target = 0;
+    min_frame_target = 1;
 
     if (cpi->pass == 2)
     {
@@ -617,9 +617,11 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
         if (min_frame_target < (cpi->av_per_frame_bandwidth >> 5))
             min_frame_target = cpi->av_per_frame_bandwidth >> 5;
     }
-    else if (min_frame_target < cpi->per_frame_bandwidth / 4)
-        min_frame_target = cpi->per_frame_bandwidth / 4;
-
+    else
+    {
+        if (min_frame_target < cpi->per_frame_bandwidth / 4)
+            min_frame_target = cpi->per_frame_bandwidth / 4;
+    }
 
     // Special alt reference frame case
     if (cpi->common.refresh_alt_ref_frame)
@@ -1112,6 +1114,33 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
 
         }
     }
+
+    if (cpi->pass==0
+        && cpi->common.refresh_golden_frame
+        && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) {
+        long long adjust;
+
+        /*
+        frames_in_buffer = cpi->oxcf.maximum_buffer_size
+                           / cpi->av_per_frame_bandwidth;
+        gf_in_buffer = frames_in_buffer /
+                       cpi->frames_till_gf_update_due;
+        overshoot_per_gf = cpi->accumulated_overshoot / gf_in_buffer;
+
+        */
+
+        adjust = cpi->accumulated_overshoot;
+        adjust *= cpi->frames_till_gf_update_due + 1;
+        adjust *= cpi->av_per_frame_bandwidth;
+        adjust /= cpi->oxcf.maximum_buffer_size;
+
+        if (adjust > (cpi->this_frame_target - min_frame_target))
+            adjust = (cpi->this_frame_target - min_frame_target);
+        else if (adjust < 0)
+            adjust = 0;
+
+        cpi->this_frame_target -= adjust;
+    }
 }