2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
44 #include "intel_media.h"
46 #define BRC_CLIP(x, min, max) \
48 x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \
51 #define BRC_P_B_QP_DIFF 4
52 #define BRC_I_P_QP_DIFF 2
53 #define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
55 #define BRC_PWEIGHT 0.6 /* weight if P slice with comparison to I slice */
56 #define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
58 #define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
59 #define BRC_CY 0.1 /* weight for */
60 #define BRC_CX_UNDERFLOW 5.
61 #define BRC_CX_OVERFLOW -4.
63 #define BRC_PI_0_5 1.5707963267948966192313216916398
66 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
67 struct gen6_mfc_context *mfc_context)
69 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
70 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
71 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
72 float fps = pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
73 int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
74 int intra_mb_size = inter_mb_size * 5.0;
77 mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
78 mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
79 mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
80 mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
81 mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size;
82 mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
84 for(i = 0 ; i < 3; i++) {
85 mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
86 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
87 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
88 mfc_context->bit_rate_control_context[i].GrowInit = 6;
89 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
90 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
91 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
93 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
94 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
95 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
96 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
97 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
98 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
101 mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
102 mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
103 mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16;
105 mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
106 mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
107 mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5;
110 static void intel_mfc_brc_init(struct encode_state *encode_state,
111 struct intel_encoder_context* encoder_context)
113 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
114 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
115 VAEncMiscParameterBuffer* pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
116 VAEncMiscParameterHRD* pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data;
117 double bitrate = pSequenceParameter->bits_per_second;
118 double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick);
119 int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
120 int intra_period = pSequenceParameter->intra_period;
121 int ip_period = pSequenceParameter->ip_period;
122 double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
123 double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
126 if (pSequenceParameter->ip_period) {
127 pnum = (intra_period + ip_period - 1)/ip_period - 1;
128 bnum = intra_period - inum - pnum;
131 mfc_context->brc.mode = encoder_context->rate_control_mode;
133 mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
134 (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
135 mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
136 mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
138 mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
139 mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
140 mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
142 bpf = mfc_context->brc.bits_per_frame = bitrate/framerate;
144 mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
145 mfc_context->hrd.current_buffer_fullness =
146 (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
147 pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
148 mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
149 mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
150 mfc_context->hrd.violation_noted = 0;
152 if ((bpf > qp51_size) && (bpf < qp1_size)) {
153 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
155 else if (bpf >= qp1_size)
156 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1;
157 else if (bpf <= qp51_size)
158 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51;
160 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
161 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
163 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
164 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
165 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
168 int intel_mfc_update_hrd(struct encode_state *encode_state,
169 struct gen6_mfc_context *mfc_context,
172 double prev_bf = mfc_context->hrd.current_buffer_fullness;
174 mfc_context->hrd.current_buffer_fullness -= frame_bits;
176 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
177 mfc_context->hrd.current_buffer_fullness = prev_bf;
178 return BRC_UNDERFLOW;
181 mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
182 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
183 if (mfc_context->brc.mode == VA_RC_VBR)
184 mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
186 mfc_context->hrd.current_buffer_fullness = prev_bf;
190 return BRC_NO_HRD_VIOLATION;
193 int intel_mfc_brc_postpack(struct encode_state *encode_state,
194 struct gen6_mfc_context *mfc_context,
197 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
198 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
199 int slicetype = pSliceParameter->slice_type;
200 int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
201 int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
202 int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY;
203 int qp; // quantizer of previously encoded slice of current type
204 int qpn; // predicted quantizer for next frame of current type in integer format
205 double qpf; // predicted quantizer for next frame of current type in float format
206 double delta_qp; // QP correction
207 int target_frame_size, frame_size_next;
209 * x - how far we are from HRD buffer borders
210 * y - how far we are from target HRD buffer fullness
213 double frame_size_alpha;
215 if (slicetype == SLICE_TYPE_SP)
216 slicetype = SLICE_TYPE_P;
217 else if (slicetype == SLICE_TYPE_SI)
218 slicetype = SLICE_TYPE_I;
220 qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
222 target_frame_size = mfc_context->brc.target_frame_size[slicetype];
223 if (mfc_context->hrd.buffer_capacity < 5)
224 frame_size_alpha = 0;
226 frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
227 if (frame_size_alpha > 30) frame_size_alpha = 30;
228 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
229 (double)(frame_size_alpha + 1.);
231 /* frame_size_next: avoiding negative number and too small value */
232 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
233 frame_size_next = (int)((double)target_frame_size * 0.25);
235 qpf = (double)qp * target_frame_size / frame_size_next;
236 qpn = (int)(qpf + 0.5);
239 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
240 mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
241 if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
243 mfc_context->brc.qpf_rounding_accumulator = 0.;
244 } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
246 mfc_context->brc.qpf_rounding_accumulator = 0.;
249 /* making sure that QP is not changing too fast */
250 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
251 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
252 /* making sure that with QP predictions we did do not leave QPs range */
253 BRC_CLIP(qpn, 1, 51);
255 /* checking wthether HRD compliance is still met */
256 sts = intel_mfc_update_hrd(encode_state, mfc_context, frame_bits);
258 /* calculating QP delta as some function*/
259 x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
261 x /= mfc_context->hrd.target_buffer_fullness;
262 y = mfc_context->hrd.current_buffer_fullness;
265 x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
266 y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
268 if (y < 0.01) y = 0.01;
270 else if (x < -1) x = -1;
272 delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
273 qpn = (int)(qpn + delta_qp + 0.5);
275 /* making sure that with QP predictions we did do not leave QPs range */
276 BRC_CLIP(qpn, 1, 51);
278 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
279 /* correcting QPs of slices of other types */
280 if (slicetype == SLICE_TYPE_P) {
281 if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
282 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
283 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
284 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
285 } else if (slicetype == SLICE_TYPE_I) {
286 if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
287 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
288 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
289 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
290 } else { // SLICE_TYPE_B
291 if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
292 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
293 if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
294 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
296 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
297 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
298 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
299 } else if (sts == BRC_UNDERFLOW) { // underflow
300 if (qpn <= qp) qpn = qp + 1;
303 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
305 } else if (sts == BRC_OVERFLOW) {
306 if (qpn >= qp) qpn = qp - 1;
307 if (qpn < 1) { // < 0 (?) overflow with minQP
309 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
313 mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
318 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
319 struct intel_encoder_context *encoder_context)
321 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
322 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
323 unsigned int rate_control_mode = encoder_context->rate_control_mode;
324 int target_bit_rate = pSequenceParameter->bits_per_second;
326 // current we only support CBR mode.
327 if (rate_control_mode == VA_RC_CBR) {
328 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
329 mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
330 mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
331 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
332 mfc_context->vui_hrd.i_frame_number = 0;
334 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
335 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
336 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
342 intel_mfc_hrd_context_update(struct encode_state *encode_state,
343 struct gen6_mfc_context *mfc_context)
345 mfc_context->vui_hrd.i_frame_number++;
348 int intel_mfc_interlace_check(VADriverContextP ctx,
349 struct encode_state *encode_state,
350 struct intel_encoder_context *encoder_context)
352 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
353 VAEncSliceParameterBufferH264 *pSliceParameter;
356 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
357 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
359 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
360 pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer;
361 mbCount += pSliceParameter->num_macroblocks;
364 if ( mbCount == ( width_in_mbs * height_in_mbs ) )
370 void intel_mfc_brc_prepare(struct encode_state *encode_state,
371 struct intel_encoder_context *encoder_context)
373 unsigned int rate_control_mode = encoder_context->rate_control_mode;
374 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
376 if (rate_control_mode == VA_RC_CBR) {
377 /*Programing bit rate control */
378 if ( mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0 ) {
379 intel_mfc_bit_rate_control_context_init(encode_state, mfc_context);
380 intel_mfc_brc_init(encode_state, encoder_context);
383 /*Programing HRD control */
384 if ( mfc_context->vui_hrd.i_cpb_size_value == 0 )
385 intel_mfc_hrd_context_init(encode_state, encoder_context);
389 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
390 struct encode_state *encode_state,
391 struct intel_encoder_context *encoder_context,
392 struct intel_batchbuffer *slice_batch)
394 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
395 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
396 unsigned int rate_control_mode = encoder_context->rate_control_mode;
398 if (encode_state->packed_header_data[idx]) {
399 VAEncPackedHeaderParameterBuffer *param = NULL;
400 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
401 unsigned int length_in_bits;
403 assert(encode_state->packed_header_param[idx]);
404 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
405 length_in_bits = param->bit_length;
407 mfc_context->insert_object(ctx,
410 ALIGN(length_in_bits, 32) >> 5,
411 length_in_bits & 0x1f,
412 5, /* FIXME: check it */
415 !param->has_emulation_bytes,
419 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
421 if (encode_state->packed_header_data[idx]) {
422 VAEncPackedHeaderParameterBuffer *param = NULL;
423 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
424 unsigned int length_in_bits;
426 assert(encode_state->packed_header_param[idx]);
427 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
428 length_in_bits = param->bit_length;
430 mfc_context->insert_object(ctx,
433 ALIGN(length_in_bits, 32) >> 5,
434 length_in_bits & 0x1f,
435 5, /* FIXME: check it */
438 !param->has_emulation_bytes,
442 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
444 if (encode_state->packed_header_data[idx]) {
445 VAEncPackedHeaderParameterBuffer *param = NULL;
446 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
447 unsigned int length_in_bits;
449 assert(encode_state->packed_header_param[idx]);
450 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
451 length_in_bits = param->bit_length;
453 mfc_context->insert_object(ctx,
456 ALIGN(length_in_bits, 32) >> 5,
457 length_in_bits & 0x1f,
458 5, /* FIXME: check it */
461 !param->has_emulation_bytes,
463 } else if (rate_control_mode == VA_RC_CBR) {
465 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
467 unsigned char *sei_data = NULL;
469 int length_in_bits = build_avc_sei_buffer_timing(
470 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
471 mfc_context->vui_hrd.i_initial_cpb_removal_delay,
473 mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
474 mfc_context->vui_hrd.i_dpb_output_delay_length,
477 mfc_context->insert_object(ctx,
479 (unsigned int *)sei_data,
480 ALIGN(length_in_bits, 32) >> 5,
481 length_in_bits & 0x1f,
491 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
492 struct encode_state *encode_state,
493 struct intel_encoder_context *encoder_context)
495 struct i965_driver_data *i965 = i965_driver_data(ctx);
496 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
497 struct object_surface *obj_surface;
498 struct object_buffer *obj_buffer;
499 GenAvcSurface *gen6_avc_surface;
501 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
502 VAStatus vaStatus = VA_STATUS_SUCCESS;
503 int i, j, enable_avc_ildb = 0;
504 VAEncSliceParameterBufferH264 *slice_param;
505 struct i965_coded_buffer_segment *coded_buffer_segment;
506 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
507 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
508 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
510 if (IS_GEN6(i965->intel.device_id)) {
511 /* On the SNB it should be fixed to 128 for the DMV buffer */
515 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
516 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
517 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
519 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
520 assert((slice_param->slice_type == SLICE_TYPE_I) ||
521 (slice_param->slice_type == SLICE_TYPE_SI) ||
522 (slice_param->slice_type == SLICE_TYPE_P) ||
523 (slice_param->slice_type == SLICE_TYPE_SP) ||
524 (slice_param->slice_type == SLICE_TYPE_B));
526 if (slice_param->disable_deblocking_filter_idc != 1) {
535 /*Setup all the input&output object*/
537 /* Setup current frame and current direct mv buffer*/
538 obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
540 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
542 if ( obj_surface->private_data == NULL) {
543 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
544 gen6_avc_surface->dmv_top =
545 dri_bo_alloc(i965->intel.bufmgr,
547 68 * width_in_mbs * height_in_mbs,
549 gen6_avc_surface->dmv_bottom =
550 dri_bo_alloc(i965->intel.bufmgr,
552 68 * width_in_mbs * height_in_mbs,
554 assert(gen6_avc_surface->dmv_top);
555 assert(gen6_avc_surface->dmv_bottom);
556 obj_surface->private_data = (void *)gen6_avc_surface;
557 obj_surface->free_private_data = (void *)gen_free_avc_surface;
559 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
560 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
561 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
562 dri_bo_reference(gen6_avc_surface->dmv_top);
563 dri_bo_reference(gen6_avc_surface->dmv_bottom);
565 if (enable_avc_ildb) {
566 mfc_context->post_deblocking_output.bo = obj_surface->bo;
567 dri_bo_reference(mfc_context->post_deblocking_output.bo);
569 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
570 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
573 mfc_context->surface_state.width = obj_surface->orig_width;
574 mfc_context->surface_state.height = obj_surface->orig_height;
575 mfc_context->surface_state.w_pitch = obj_surface->width;
576 mfc_context->surface_state.h_pitch = obj_surface->height;
578 /* Setup reference frames and direct mv buffers*/
579 for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
580 if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) {
581 obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
583 if (obj_surface->bo != NULL) {
584 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
585 dri_bo_reference(obj_surface->bo);
587 /* Check DMV buffer */
588 if ( obj_surface->private_data == NULL) {
590 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
591 gen6_avc_surface->dmv_top =
592 dri_bo_alloc(i965->intel.bufmgr,
594 68 * width_in_mbs * height_in_mbs,
596 gen6_avc_surface->dmv_bottom =
597 dri_bo_alloc(i965->intel.bufmgr,
599 68 * width_in_mbs * height_in_mbs,
601 assert(gen6_avc_surface->dmv_top);
602 assert(gen6_avc_surface->dmv_bottom);
603 obj_surface->private_data = gen6_avc_surface;
604 obj_surface->free_private_data = gen_free_avc_surface;
607 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
608 /* Setup DMV buffer */
609 mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
610 mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
611 dri_bo_reference(gen6_avc_surface->dmv_top);
612 dri_bo_reference(gen6_avc_surface->dmv_bottom);
618 obj_surface = SURFACE(encoder_context->input_yuv_surface);
619 assert(obj_surface && obj_surface->bo);
620 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
621 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
623 obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
624 bo = obj_buffer->buffer_store->bo;
626 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
627 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
628 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
629 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
632 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
633 coded_buffer_segment->mapped = 0;
634 coded_buffer_segment->codec = CODED_H264;
640 * The LUT uses the pair of 4-bit units: (shift, base) structure.
642 * So it is necessary to convert one cost into the nearest LUT format.
644 * 2^K *x = 2^n * (1 + deltaX)
645 * k + log2(x) = n + log2(1 + deltaX)
646 * log2(x) = n - k + log2(1 + deltaX)
647 * As X is in the range of [1, 15]
648 * 4 > n - k + log2(1 + deltaX) >= 0
649 * => n + log2(1 + deltaX) >= k > n - 4 + log2(1 + deltaX)
650 * Then we can derive the corresponding K and get the nearest LUT format.
652 int intel_format_lutvalue(int value, int max)
655 int logvalue, temp1, temp2;
660 logvalue = (int)(log2f((float)value));
664 int error, temp_value, base, j, temp_err;
666 j = logvalue - 4 + 1;
668 for(; j <= logvalue; j++) {
672 base = (value + (1 << (j - 1)) - 1) >> j;
677 temp_value = base << j;
678 temp_err = abs(value - temp_value);
679 if (temp_err < error) {
681 ret = (j << 4) | base;
687 temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
688 temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
699 static float intel_lambda_qp(int qp)
701 float value, lambdaf;
703 value = value / 6 - 2;
706 lambdaf = roundf(powf(2, value));
711 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
712 struct encode_state *encode_state,
713 struct intel_encoder_context *encoder_context)
715 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
716 struct gen6_vme_context *vme_context = encoder_context->vme_context;
717 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
718 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
719 int qp, m_cost, j, mv_count;
720 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
721 float lambda, m_costf;
723 if (encoder_context->rate_control_mode == VA_RC_CQP)
724 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
726 qp = mfc_context->bit_rate_control_context[slice_param->slice_type].QpPrimeY;
728 if (vme_state_message == NULL)
731 assert(qp <= QP_MAX);
732 lambda = intel_lambda_qp(qp);
733 if ((slice_param->slice_type == SLICE_TYPE_I) ||
734 (slice_param->slice_type == SLICE_TYPE_SI)) {
735 vme_state_message[MODE_INTRA_16X16] = 0;
737 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
738 m_cost = lambda * 16;
739 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
741 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
744 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
745 for (j = 1; j < 3; j++) {
746 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
747 m_cost = (int)m_costf;
748 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
751 for (j = 4; j <= 64; j *= 2) {
752 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
753 m_cost = (int)m_costf;
754 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
759 vme_state_message[MODE_INTRA_16X16] = 0x4a;
760 vme_state_message[MODE_INTRA_8X8] = 0x4a;
761 vme_state_message[MODE_INTRA_4X4] = 0x4a;
762 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
763 vme_state_message[MODE_INTER_16X16] = 0x4a;
764 vme_state_message[MODE_INTER_16X8] = 0x4a;
765 vme_state_message[MODE_INTER_8X8] = 0x4a;
766 vme_state_message[MODE_INTER_8X4] = 0x4a;
767 vme_state_message[MODE_INTER_4X4] = 0x4a;
768 vme_state_message[MODE_INTER_BWD] = 0x2a;
771 m_costf = lambda * 10;
772 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
773 m_cost = lambda * 14;
774 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
775 m_cost = lambda * 24;
776 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
777 m_costf = lambda * 3.5;
779 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
780 if ((slice_param->slice_type == SLICE_TYPE_P) ||
781 (slice_param->slice_type == SLICE_TYPE_SP)) {
782 m_costf = lambda * 2.5;
784 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
785 m_costf = lambda * 4;
787 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
788 m_costf = lambda * 1.5;
790 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
791 m_costf = lambda * 3;
793 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
794 m_costf = lambda * 5;
796 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
797 /* BWD is not used in P-frame */
798 vme_state_message[MODE_INTER_BWD] = 0;
800 m_costf = lambda * 2.5;
802 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
803 m_costf = lambda * 5.5;
805 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
806 m_costf = lambda * 3.5;
808 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
809 m_costf = lambda * 5.0;
811 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
812 m_costf = lambda * 6.5;
814 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
815 m_costf = lambda * 1.5;
817 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);