New BRC algorithm to match HRD
authorRogozhkin, Dmitry V <dmitry.v.rogozhkin@intel.com>
Mon, 11 Jun 2012 01:45:41 +0000 (09:45 +0800)
committerXiang, Haihao <haihao.xiang@intel.com>
Mon, 11 Jun 2012 01:49:38 +0000 (09:49 +0800)
It reduces the number of recoding as well.

Signed-off-by: Rogozhkin, Dmitry V <dmitry.v.rogozhkin@intel.com>
src/Makefile.am
src/gen6_mfc.c
src/gen6_mfc.h
src/gen6_vme.c
src/gen7_mfc.c
src/i965_encoder.c
src/i965_encoder.h

index bb0b033..b8cc501 100755 (executable)
@@ -40,7 +40,7 @@ driver_ldflags = \
        $(NULL)
 
 driver_libs = \
-       -lpthread               \
+       -lpthread -lm           \
        $(DRM_LIBS) -ldrm_intel \
        $(LIBVA_DEPS_LIBS)      \
        $(LIBVA_X11_DEPS_LIBS)  \
index aad854b..95a85f3 100644 (file)
@@ -30,6 +30,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <assert.h>
+#include <math.h>
 
 #include "intel_batchbuffer.h"
 #include "i965_defines.h"
 
 #define CMD_LEN_IN_OWORD        4
 
+#define BRC_CLIP(x, min, max) \
+{ \
+    x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \
+}
+
+#define BRC_P_B_QP_DIFF 4
+#define BRC_I_P_QP_DIFF 2
+#define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
+
+#define BRC_PWEIGHT 0.6  /* weight if P slice with comparison to I slice */
+#define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
+
+#define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
+#define BRC_CY 0.1 /* weight for */
+#define BRC_CX_UNDERFLOW 5.
+#define BRC_CX_OVERFLOW -4.
+
+#define BRC_PI_0_5 1.5707963267948966192313216916398
+
+typedef enum _gen6_brc_status
+{
+    BRC_NO_HRD_VIOLATION = 0,
+    BRC_UNDERFLOW = 1,
+    BRC_OVERFLOW = 2,
+    BRC_UNDERFLOW_WITH_MAX_QP = 3,
+    BRC_OVERFLOW_WITH_MIN_QP = 4,
+} gen6_brc_status;
+
 static const uint32_t gen6_mfc_batchbuffer_avc_intra[][4] = {
 #include "shaders/utils/mfc_batchbuffer_avc_intra.g6b"
 };
@@ -364,10 +393,11 @@ gen6_mfc_avc_slice_state(VADriverContextP ctx,
     if (batch == NULL)
         batch = encoder_context->base.batch;
 
-    if (slice_type == SLICE_TYPE_I)
-        bit_rate_control_target = 0;
-    else
-        bit_rate_control_target = 1;
+    bit_rate_control_target = slice_type;
+    if (slice_type == SLICE_TYPE_SP)
+        bit_rate_control_target = SLICE_TYPE_P;
+    else if (slice_type == SLICE_TYPE_SI)
+        bit_rate_control_target = SLICE_TYPE_I;
 
     if (slice_type == SLICE_TYPE_P) {
         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
@@ -420,11 +450,11 @@ gen6_mfc_avc_slice_state(VADriverContextP ctx,
                   slice_param->macroblock_address );
     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
     OUT_BCS_BATCH(batch, 
-                  (rate_control_enable << 31) |                /*in CBR mode RateControlCounterEnable = enable*/
+                  (0/*rate_control_enable*/ << 31) |           /*in CBR mode RateControlCounterEnable = enable*/
                   (1 << 30) |          /*ResetRateControlCounter*/
                   (0 << 28) |          /*RC Triggle Mode = Always Rate Control*/
                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
-                  (rate_control_enable << 23) |     /*RC Panic Enable*/                 
+                  (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                 
                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/ 
                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/ 
@@ -734,20 +764,21 @@ gen6_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
                                        struct gen6_mfc_context *mfc_context)
 {
     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
-    
     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
     float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
     int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
     int intra_mb_size = inter_mb_size * 5.0;
     int i;
-    
-    mfc_context->bit_rate_control_context[0].target_mb_size = intra_mb_size;
-    mfc_context->bit_rate_control_context[0].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
-    mfc_context->bit_rate_control_context[1].target_mb_size = inter_mb_size;
-    mfc_context->bit_rate_control_context[1].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
 
-    for(i = 0 ; i < 2; i++) {
+    mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
+    mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
+    mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
+    mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
+    mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size;
+    mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
+
+    for(i = 0 ; i < 3; i++) {
         mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
@@ -764,52 +795,222 @@ gen6_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
     }
     
-    mfc_context->bit_rate_control_context[0].TargetSizeInWord = (intra_mb_size + 16)/ 16;
-    mfc_context->bit_rate_control_context[1].TargetSizeInWord = (inter_mb_size + 16)/ 16;
+    mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
+    mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
+    mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16;
 
-    mfc_context->bit_rate_control_context[0].MaxSizeInWord = mfc_context->bit_rate_control_context[0].TargetSizeInWord * 1.5;
-    mfc_context->bit_rate_control_context[1].MaxSizeInWord = mfc_context->bit_rate_control_context[1].TargetSizeInWord * 1.5;
+    mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
+    mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
+    mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5;
 }
 
-static int gen6_mfc_bit_rate_control_context_update(struct encode_state *encode_state, 
-                                                    struct gen6_mfc_context *mfc_context,
-                                                    int current_frame_size)
+static void
+gen6_mfc_brc_init(struct encode_state *encode_state,
+                  struct intel_encoder_context* encoder_context)
 {
-    VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
-    int control_index = 1 - (pSliceParameter->slice_type == SLICE_TYPE_I);
-    int oldQp = mfc_context->bit_rate_control_context[control_index].QpPrimeY;
-
-    /*
-      printf("conrol_index = %d, start_qp = %d, result = %d, target = %d\n", control_index, 
-      mfc_context->bit_rate_control_context[control_index].QpPrimeY, current_frame_size,
-      mfc_context->bit_rate_control_context[control_index].target_frame_size );
-    */
-
-    if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 4.0 ) {
-        mfc_context->bit_rate_control_context[control_index].QpPrimeY += 4;
-    } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 2.0 ) {
-        mfc_context->bit_rate_control_context[control_index].QpPrimeY += 3;
-    } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 1.50 ) {
-        mfc_context->bit_rate_control_context[control_index].QpPrimeY += 2;
-    } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 1.20 ) {
-        mfc_context->bit_rate_control_context[control_index].QpPrimeY ++;
-    } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.30 )  {
-        mfc_context->bit_rate_control_context[control_index].QpPrimeY -= 3;
-    } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.50 )  {
-        mfc_context->bit_rate_control_context[control_index].QpPrimeY -= 2;
-    } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.80 )  {
-        mfc_context->bit_rate_control_context[control_index].QpPrimeY --;
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+    VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
+    VAEncMiscParameterBuffer* pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
+    VAEncMiscParameterHRD* pParameterHRD = (VAEncMiscParameterBuffer*)pMiscParamHRD->data;
+    double bitrate = pSequenceParameter->bits_per_second;
+    double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick);
+    int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
+    int intra_period = pSequenceParameter->intra_period;
+    int ip_period = pSequenceParameter->ip_period;
+    double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
+    double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
+    double bpf;
+
+    if (pSequenceParameter->ip_period) {
+        pnum = (intra_period + ip_period - 1)/ip_period - 1;
+        bnum = intra_period - inum - pnum;
+    }
+
+    mfc_context->brc.mode = encoder_context->rate_control_mode;
+
+    mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
+                                                             (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
+    mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
+    mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
+
+    mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
+    mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
+    mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
+
+    bpf = mfc_context->brc.bits_per_frame = bitrate/framerate;
+
+    mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
+    mfc_context->hrd.current_buffer_fullness =
+        (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
+            pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
+    mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
+    mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
+    mfc_context->hrd.violation_noted = 0;
+
+    if ((bpf > qp51_size) && (bpf < qp1_size)) {
+        mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
+    }
+    else if (bpf >= qp1_size)
+        mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1;
+    else if (bpf <= qp51_size)
+        mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51;
+
+    mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
+    mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
+
+    BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
+    BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
+    BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
+}
+
+static int gen6_mfc_update_hrd(struct encode_state *encode_state,
+                               struct gen6_mfc_context *mfc_context,
+                               int frame_bits)
+{
+    double prev_bf = mfc_context->hrd.current_buffer_fullness;
+
+    mfc_context->hrd.current_buffer_fullness -= frame_bits;
+
+    if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
+        mfc_context->hrd.current_buffer_fullness = prev_bf;
+        return BRC_UNDERFLOW;
     }
     
-    if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY > 51)
-        mfc_context->bit_rate_control_context[control_index].QpPrimeY = 51;
-    if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY < 1)
-        mfc_context->bit_rate_control_context[control_index].QpPrimeY = 1;
-    if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY != oldQp)
-        return 0;
+    mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
+    if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
+        if (mfc_context->brc.mode == VA_RC_VBR)
+            mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
+        else {
+            mfc_context->hrd.current_buffer_fullness = prev_bf;
+            return BRC_OVERFLOW;
+        }
+    }
+    return BRC_NO_HRD_VIOLATION;
+}
 
-    return 1;
+static int gen6_mfc_brc_postpack(struct encode_state *encode_state,
+                                 struct gen6_mfc_context *mfc_context,
+                                 int frame_bits)
+{
+    gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
+    VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; 
+    int slicetype = pSliceParameter->slice_type;
+    int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
+    int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
+    int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY;
+    int qp; // quantizer of previously encoded slice of current type
+    int qpn; // predicted quantizer for next frame of current type in integer format
+    double qpf; // predicted quantizer for next frame of current type in float format
+    double delta_qp; // QP correction
+    int target_frame_size, frame_size_next;
+    /* Notes:
+     *  x - how far we are from HRD buffer borders
+     *  y - how far we are from target HRD buffer fullness
+     */
+    double x, y;
+    double frame_size_alpha;
+
+    if (slicetype == SLICE_TYPE_SP)
+        slicetype = SLICE_TYPE_P;
+    else if (slicetype == SLICE_TYPE_SI)
+        slicetype = SLICE_TYPE_I;
+
+    qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
+
+    target_frame_size = mfc_context->brc.target_frame_size[slicetype];
+    if (mfc_context->hrd.buffer_capacity < 5)
+        frame_size_alpha = 0;
+    else
+        frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
+    if (frame_size_alpha > 30) frame_size_alpha = 30;
+    frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
+                                          (double)(frame_size_alpha + 1.);
+
+    /* frame_size_next: avoiding negative number and too small value */
+    if ((double)frame_size_next < (double)(target_frame_size * 0.25))
+        frame_size_next = (int)((double)target_frame_size * 0.25);
+
+    qpf = (double)qp * target_frame_size / frame_size_next;
+    qpn = (int)(qpf + 0.5);
+
+    if (qpn == qp) {
+        /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
+        mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
+        if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
+            qpn++;
+            mfc_context->brc.qpf_rounding_accumulator = 0.;
+        } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
+            qpn--;
+            mfc_context->brc.qpf_rounding_accumulator = 0.;
+        }
+    }
+    /* making sure that QP is not changing too fast */
+    if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
+    else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
+    /* making sure that with QP predictions we did do not leave QPs range */
+    BRC_CLIP(qpn, 1, 51);
+
+    /* checking wthether HRD compliance is still met */
+    sts = gen6_mfc_update_hrd(encode_state, mfc_context, frame_bits);
+
+    /* calculating QP delta as some function*/
+    x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
+    if (x > 0) {
+        x /= mfc_context->hrd.target_buffer_fullness;
+        y = mfc_context->hrd.current_buffer_fullness;
+    }
+    else {
+        x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
+        y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
+    }
+    if (y < 0.01) y = 0.01;
+    if (x > 1) x = 1;
+    else if (x < -1) x = -1;
+
+    delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
+    qpn = (int)(qpn + delta_qp + 0.5);
+
+    /* making sure that with QP predictions we did do not leave QPs range */
+    BRC_CLIP(qpn, 1, 51);
+
+    if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
+        /* correcting QPs of slices of other types */
+        if (slicetype == SLICE_TYPE_P) {
+            if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
+                mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
+            if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
+                mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
+        } else if (slicetype == SLICE_TYPE_I) {
+            if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
+                mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
+            if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
+                mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
+        } else { // SLICE_TYPE_B
+            if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
+                mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
+            if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
+                mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
+        }
+        BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
+        BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
+        BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
+    } else if (sts == BRC_UNDERFLOW) { // underflow
+        if (qpn <= qp) qpn = qp + 1;
+        if (qpn > 51) {
+            qpn = 51;
+            sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
+        }
+    } else if (sts == BRC_OVERFLOW) {
+        if (qpn >= qp) qpn = qp - 1;
+        if (qpn < 1) { // < 0 (?) overflow with minQP
+            qpn = 1;
+            sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
+        }
+    }
+
+    mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
+
+    return sts;
 }
 
 static void 
@@ -983,14 +1184,6 @@ static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx,
     *flag = 0;
     dri_bo_unmap(bo);
 
-    /*Programing bit rate control */
-    if ( mfc_context->bit_rate_control_context[0].MaxSizeInWord == 0 )
-        gen6_mfc_bit_rate_control_context_init(encode_state, mfc_context);
-
-    /*Programing HRD control */
-    if ( (rate_control_mode == VA_RC_CBR) && (mfc_context->vui_hrd.i_cpb_size_value == 0) )
-        gen6_mfc_hrd_context_init(encode_state, encoder_context);
-
     return vaStatus;
 }
 
@@ -1146,9 +1339,11 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     unsigned char *slice_header = NULL;
     int slice_header_length_in_bits = 0;
     unsigned int tail_data[] = { 0x0, 0x0 };
+    int slice_type = pSliceParameter->slice_type;
+
 
     if (rate_control_mode == VA_RC_CBR) {
-        qp = mfc_context->bit_rate_control_context[1 - is_intra].QpPrimeY;
+        qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
         pSliceParameter->slice_qp_delta = qp - 26;
     }
 
@@ -1175,10 +1370,6 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
                                5,  /* first 5 bytes are start code + nal unit type */
                                1, 0, 1, slice_batch);
 
-    if ( rate_control_mode == VA_RC_CBR) {
-        qp = mfc_context->bit_rate_control_context[1-is_intra].QpPrimeY;
-    }
-
     dri_bo_map(vme_context->vme_output.bo , 1);
     msg = (unsigned int *)vme_context->vme_output.bo->virtual;
 
@@ -1514,7 +1705,6 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer; 
-    int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
@@ -1526,9 +1716,10 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     long head_offset;
     int old_used = intel_batchbuffer_used_size(slice_batch), used;
     unsigned short head_size, tail_size;
+    int slice_type = pSliceParameter->slice_type;
 
     if (rate_control_mode == VA_RC_CBR) {
-        qp = mfc_context->bit_rate_control_context[1 - is_intra].QpPrimeY;
+        qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
         pSliceParameter->slice_qp_delta = qp - 26;
     }
 
@@ -1571,10 +1762,6 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     head_size = (used - old_used) / 16;
     old_used = used;
 
-    if (rate_control_mode == VA_RC_CBR) {
-        qp = mfc_context->bit_rate_control_context[1 - is_intra].QpPrimeY;
-    }
-
     /* tail */
     if (last_slice) {    
         mfc_context->insert_object(ctx,
@@ -1734,23 +1921,29 @@ gen6_mfc_avc_encode_picture(VADriverContextP ctx,
 {
     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
     unsigned int rate_control_mode = encoder_context->rate_control_mode;
-    int MAX_CBR_INTERATE = 4;
     int current_frame_bits_size;
-    int i;
+    int sts;
  
-    for(i = 0; i < MAX_CBR_INTERATE; i++) {
+    for (;;) {
         gen6_mfc_init(ctx, encoder_context);
         gen6_mfc_avc_prepare(ctx, encode_state, encoder_context);
         /*Programing bcs pipeline*/
         gen6_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
         gen6_mfc_run(ctx, encode_state, encoder_context);
-        if ( rate_control_mode == VA_RC_CBR) {
+        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
             gen6_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
-            //gen6_mfc_hrd_context_check(encode_state, mfc_context);
-            if ( gen6_mfc_bit_rate_control_context_update(encode_state, mfc_context, current_frame_bits_size)) {
+            sts = gen6_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
+            if (sts == BRC_NO_HRD_VIOLATION) {
                 gen6_mfc_hrd_context_update(encode_state, mfc_context);
                 break;
             }
+            else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
+                if (!mfc_context->hrd.violation_noted) {
+                    fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
+                    mfc_context->hrd.violation_noted = 1;
+                }
+                return VA_STATUS_SUCCESS;
+            }
         } else {
             break;
         }
@@ -1840,6 +2033,25 @@ gen6_mfc_context_destroy(void *context)
     free(mfc_context);
 }
 
+void gen6_mfc_brc_prepare(struct encode_state *encode_state,
+                          struct intel_encoder_context *encoder_context)
+{
+    unsigned int rate_control_mode = encoder_context->rate_control_mode;
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+
+    if (rate_control_mode == VA_RC_CBR) {
+        /*Programing bit rate control */
+        if ( mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0 ) {
+            gen6_mfc_bit_rate_control_context_init(encode_state, mfc_context);
+            gen6_mfc_brc_init(encode_state, encoder_context);
+        }
+
+        /*Programing HRD control */
+        if ( mfc_context->vui_hrd.i_cpb_size_value == 0 )
+            gen6_mfc_hrd_context_init(encode_state, encoder_context);    
+    }
+}
+
 Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
 {
     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
@@ -1874,6 +2086,7 @@ Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *e
     encoder_context->mfc_context = mfc_context;
     encoder_context->mfc_context_destroy = gen6_mfc_context_destroy;
     encoder_context->mfc_pipeline = gen6_mfc_pipeline;
+    encoder_context->mfc_brc_prepare = gen6_mfc_brc_prepare;
 
     return True;
 }
index 4cdb20d..43f7082 100644 (file)
@@ -144,7 +144,23 @@ struct gen6_mfc_context
 
         unsigned int target_mb_size;
         unsigned int target_frame_size;
-    } bit_rate_control_context[2];      //INTERNAL: 0 for intra frames, 1 for inter frames.
+    } bit_rate_control_context[3];      //INTERNAL: for I, P, B frames
+
+    struct {
+        int mode;
+        int gop_nums[3];
+        int target_frame_size[3]; // I,P,B
+        double bits_per_frame;
+        double qpf_rounding_accumulator;
+    } brc;
+
+    struct {
+        double current_buffer_fullness;
+        double target_buffer_fullness;
+        double buffer_capacity;
+        unsigned int buffer_size;
+        unsigned int violation_noted;
+    } hrd;
 
     //HRD control context
     struct {
@@ -200,5 +216,8 @@ VAStatus gen6_mfc_pipeline(VADriverContextP ctx,
                            struct encode_state *encode_state,
                            struct intel_encoder_context *encoder_context);
 void gen6_mfc_context_destroy(void *context);
+void gen6_mfc_brc_prepare(struct encode_state *encode_state,
+                          struct intel_encoder_context *encoder_context);
+
 
 #endif /* _GEN6_MFC_BCS_H_ */
index 832e59c..e74b235 100644 (file)
@@ -38,6 +38,7 @@
 #include "i965_drv_video.h"
 #include "i965_encoder.h"
 #include "gen6_vme.h"
+#include "gen6_mfc.h"
 
 #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
 #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
@@ -387,17 +388,17 @@ static void gen6_vme_state_setup_fixup(VADriverContextP ctx,
                                        struct intel_encoder_context *encoder_context,
                                        unsigned int *vme_state_message)
 {
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
 
-    if (encoder_context->rate_control_mode != VA_RC_CQP)
-        return;
-
     if (slice_param->slice_type != SLICE_TYPE_I &&
         slice_param->slice_type != SLICE_TYPE_SI)
         return;
-
-    vme_state_message[16] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta];
+    if (encoder_context->rate_control_mode == VA_RC_CQP)
+        vme_state_message[16] = intra_mb_mode_cost_table[pic_param->pic_init_qp + slice_param->slice_qp_delta];
+    else
+        vme_state_message[16] = intra_mb_mode_cost_table[mfc_context->bit_rate_control_context[slice_param->slice_type].QpPrimeY];
 }
 
 static VAStatus gen6_vme_vme_state_setup(VADriverContextP ctx,
index f8378b9..98102c8 100644 (file)
@@ -366,6 +366,7 @@ gen7_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encode
     encoder_context->mfc_context = mfc_context;
     encoder_context->mfc_context_destroy = gen6_mfc_context_destroy;
     encoder_context->mfc_pipeline = gen6_mfc_pipeline;
+    encoder_context->mfc_brc_prepare = gen6_mfc_brc_prepare;
 
     return True;
 }
index 7cfb966..4f0c245 100644 (file)
@@ -131,6 +131,8 @@ intel_encoder_end_picture(VADriverContextP ctx,
 
     intel_encoder_check_yuv_surface(ctx, profile, encode_state, encoder_context);
 
+    encoder_context->mfc_brc_prepare(encode_state, encoder_context);
+
     vaStatus = encoder_context->vme_pipeline(ctx, profile, encode_state, encoder_context);
 
     if (vaStatus == VA_STATUS_SUCCESS)
index b477a2b..180aa65 100644 (file)
@@ -54,6 +54,8 @@ struct intel_encoder_context
                              VAProfile profile,
                              struct encode_state *encode_state,
                              struct intel_encoder_context *encoder_context);
+    void (*mfc_brc_prepare)(struct encode_state *encode_state,
+                            struct intel_encoder_context *encoder_context);
 };
 
 #endif /* _I965_ENCODER_H_ */