2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
44 #include "intel_media.h"
46 #define BRC_CLIP(x, min, max) \
48 x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \
51 #define BRC_P_B_QP_DIFF 4
52 #define BRC_I_P_QP_DIFF 2
53 #define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
55 #define BRC_PWEIGHT 0.6 /* weight if P slice with comparison to I slice */
56 #define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
58 #define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
59 #define BRC_CY 0.1 /* weight for */
60 #define BRC_CX_UNDERFLOW 5.
61 #define BRC_CX_OVERFLOW -4.
63 #define BRC_PI_0_5 1.5707963267948966192313216916398
66 #define log2f(x) (logf(x)/(float)M_LN2)
69 int intel_avc_enc_slice_type_fixup(int slice_type)
71 if (slice_type == SLICE_TYPE_SP ||
72 slice_type == SLICE_TYPE_P)
73 slice_type = SLICE_TYPE_P;
74 else if (slice_type == SLICE_TYPE_SI ||
75 slice_type == SLICE_TYPE_I)
76 slice_type = SLICE_TYPE_I;
78 if (slice_type != SLICE_TYPE_B)
79 WARN_ONCE("Invalid slice type for H.264 encoding!\n");
81 slice_type = SLICE_TYPE_B;
88 intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
89 struct gen6_mfc_context *mfc_context)
91 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
92 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
93 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
94 float fps = pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
95 int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
96 int intra_mb_size = inter_mb_size * 5.0;
99 mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
100 mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
101 mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
102 mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
103 mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size;
104 mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
106 for(i = 0 ; i < 3; i++) {
107 mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
108 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
109 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
110 mfc_context->bit_rate_control_context[i].GrowInit = 6;
111 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
112 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
113 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
115 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
116 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
117 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
118 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
119 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
120 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
123 mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
124 mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
125 mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16;
127 mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
128 mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
129 mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5;
132 static void intel_mfc_brc_init(struct encode_state *encode_state,
133 struct intel_encoder_context* encoder_context)
135 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
136 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
137 VAEncMiscParameterBuffer* pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
138 VAEncMiscParameterHRD* pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data;
139 double bitrate = pSequenceParameter->bits_per_second;
140 double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick);
141 int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
142 int intra_period = pSequenceParameter->intra_period;
143 int ip_period = pSequenceParameter->ip_period;
144 double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
145 double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
148 if (pSequenceParameter->ip_period) {
149 pnum = (intra_period + ip_period - 1)/ip_period - 1;
150 bnum = intra_period - inum - pnum;
153 mfc_context->brc.mode = encoder_context->rate_control_mode;
155 mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
156 (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
157 mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
158 mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
160 mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
161 mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
162 mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
164 bpf = mfc_context->brc.bits_per_frame = bitrate/framerate;
166 mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
167 mfc_context->hrd.current_buffer_fullness =
168 (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
169 pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
170 mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
171 mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
172 mfc_context->hrd.violation_noted = 0;
174 if ((bpf > qp51_size) && (bpf < qp1_size)) {
175 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
177 else if (bpf >= qp1_size)
178 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1;
179 else if (bpf <= qp51_size)
180 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51;
182 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
183 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
185 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
186 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
187 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
190 int intel_mfc_update_hrd(struct encode_state *encode_state,
191 struct gen6_mfc_context *mfc_context,
194 double prev_bf = mfc_context->hrd.current_buffer_fullness;
196 mfc_context->hrd.current_buffer_fullness -= frame_bits;
198 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
199 mfc_context->hrd.current_buffer_fullness = prev_bf;
200 return BRC_UNDERFLOW;
203 mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
204 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
205 if (mfc_context->brc.mode == VA_RC_VBR)
206 mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
208 mfc_context->hrd.current_buffer_fullness = prev_bf;
212 return BRC_NO_HRD_VIOLATION;
215 int intel_mfc_brc_postpack(struct encode_state *encode_state,
216 struct gen6_mfc_context *mfc_context,
219 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
220 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
221 int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
222 int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
223 int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
224 int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY;
225 int qp; // quantizer of previously encoded slice of current type
226 int qpn; // predicted quantizer for next frame of current type in integer format
227 double qpf; // predicted quantizer for next frame of current type in float format
228 double delta_qp; // QP correction
229 int target_frame_size, frame_size_next;
231 * x - how far we are from HRD buffer borders
232 * y - how far we are from target HRD buffer fullness
235 double frame_size_alpha;
237 qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
239 target_frame_size = mfc_context->brc.target_frame_size[slicetype];
240 if (mfc_context->hrd.buffer_capacity < 5)
241 frame_size_alpha = 0;
243 frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
244 if (frame_size_alpha > 30) frame_size_alpha = 30;
245 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
246 (double)(frame_size_alpha + 1.);
248 /* frame_size_next: avoiding negative number and too small value */
249 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
250 frame_size_next = (int)((double)target_frame_size * 0.25);
252 qpf = (double)qp * target_frame_size / frame_size_next;
253 qpn = (int)(qpf + 0.5);
256 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
257 mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
258 if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
260 mfc_context->brc.qpf_rounding_accumulator = 0.;
261 } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
263 mfc_context->brc.qpf_rounding_accumulator = 0.;
266 /* making sure that QP is not changing too fast */
267 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
268 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
269 /* making sure that with QP predictions we did do not leave QPs range */
270 BRC_CLIP(qpn, 1, 51);
272 /* checking wthether HRD compliance is still met */
273 sts = intel_mfc_update_hrd(encode_state, mfc_context, frame_bits);
275 /* calculating QP delta as some function*/
276 x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
278 x /= mfc_context->hrd.target_buffer_fullness;
279 y = mfc_context->hrd.current_buffer_fullness;
282 x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
283 y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
285 if (y < 0.01) y = 0.01;
287 else if (x < -1) x = -1;
289 delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
290 qpn = (int)(qpn + delta_qp + 0.5);
292 /* making sure that with QP predictions we did do not leave QPs range */
293 BRC_CLIP(qpn, 1, 51);
295 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
296 /* correcting QPs of slices of other types */
297 if (slicetype == SLICE_TYPE_P) {
298 if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
299 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
300 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
301 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
302 } else if (slicetype == SLICE_TYPE_I) {
303 if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
304 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
305 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
306 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
307 } else { // SLICE_TYPE_B
308 if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
309 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
310 if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
311 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
313 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
314 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
315 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
316 } else if (sts == BRC_UNDERFLOW) { // underflow
317 if (qpn <= qp) qpn = qp + 1;
320 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
322 } else if (sts == BRC_OVERFLOW) {
323 if (qpn >= qp) qpn = qp - 1;
324 if (qpn < 1) { // < 0 (?) overflow with minQP
326 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
330 mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
335 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
336 struct intel_encoder_context *encoder_context)
338 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
339 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
340 unsigned int rate_control_mode = encoder_context->rate_control_mode;
341 int target_bit_rate = pSequenceParameter->bits_per_second;
343 // current we only support CBR mode.
344 if (rate_control_mode == VA_RC_CBR) {
345 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
346 mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
347 mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
348 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
349 mfc_context->vui_hrd.i_frame_number = 0;
351 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
352 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
353 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
359 intel_mfc_hrd_context_update(struct encode_state *encode_state,
360 struct gen6_mfc_context *mfc_context)
362 mfc_context->vui_hrd.i_frame_number++;
365 int intel_mfc_interlace_check(VADriverContextP ctx,
366 struct encode_state *encode_state,
367 struct intel_encoder_context *encoder_context)
369 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
370 VAEncSliceParameterBufferH264 *pSliceParameter;
373 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
374 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
376 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
377 pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer;
378 mbCount += pSliceParameter->num_macroblocks;
381 if ( mbCount == ( width_in_mbs * height_in_mbs ) )
388 * Check whether the parameters related with CBR are updated and decide whether
389 * it needs to reinitialize the configuration related with CBR.
390 * Currently it will check the following parameters:
393 * gop_configuration(intra_period, ip_period, intra_idr_period)
395 static bool intel_mfc_brc_updated_check(struct encode_state *encode_state,
396 struct intel_encoder_context *encoder_context)
398 unsigned int rate_control_mode = encoder_context->rate_control_mode;
399 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
400 double cur_fps, cur_bitrate;
401 VAEncSequenceParameterBufferH264 *pSequenceParameter;
404 if (rate_control_mode != VA_RC_CBR) {
408 pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
410 cur_bitrate = pSequenceParameter->bits_per_second;
411 cur_fps = (double)pSequenceParameter->time_scale /
412 (2 * (double)pSequenceParameter->num_units_in_tick);
414 if ((cur_bitrate == mfc_context->brc.saved_bps) &&
415 (cur_fps == mfc_context->brc.saved_fps) &&
416 (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period) &&
417 (pSequenceParameter->intra_idr_period == mfc_context->brc.saved_idr_period) &&
418 (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period)) {
419 /* the parameters related with CBR are not updaetd */
423 mfc_context->brc.saved_ip_period = pSequenceParameter->ip_period;
424 mfc_context->brc.saved_intra_period = pSequenceParameter->intra_period;
425 mfc_context->brc.saved_idr_period = pSequenceParameter->intra_idr_period;
426 mfc_context->brc.saved_fps = cur_fps;
427 mfc_context->brc.saved_bps = cur_bitrate;
431 void intel_mfc_brc_prepare(struct encode_state *encode_state,
432 struct intel_encoder_context *encoder_context)
434 unsigned int rate_control_mode = encoder_context->rate_control_mode;
435 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
437 if (rate_control_mode == VA_RC_CBR) {
439 assert(encoder_context->codec != CODEC_MPEG2);
441 brc_updated = intel_mfc_brc_updated_check(encode_state, encoder_context);
443 /*Programing bit rate control */
444 if ((mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0) ||
446 intel_mfc_bit_rate_control_context_init(encode_state, mfc_context);
447 intel_mfc_brc_init(encode_state, encoder_context);
450 /*Programing HRD control */
451 if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated )
452 intel_mfc_hrd_context_init(encode_state, encoder_context);
456 static int intel_avc_find_skipemulcnt(unsigned char *buf, int bits_length)
459 int leading_zero_cnt, byte_length, zero_byte;
463 #define NAL_UNIT_TYPE_MASK 0x1f
464 #define HW_MAX_SKIP_LENGTH 15
466 byte_length = ALIGN(bits_length, 32) >> 3;
469 leading_zero_cnt = 0;
471 for(i = 0; i < byte_length - 4; i++) {
472 if (((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)) ||
473 ((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 0) && (buf[i + 3] == 1))) {
480 /* warning message is complained. But anyway it will be inserted. */
481 WARN_ONCE("Invalid packed header data. "
482 "Can't find the 000001 start_prefix code\n");
485 i = leading_zero_cnt;
488 if (!((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)))
491 skip_cnt = leading_zero_cnt + zero_byte + 3;
493 /* the unit header byte is accounted */
494 nal_unit_type = (buf[skip_cnt]) & NAL_UNIT_TYPE_MASK;
497 if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) {
498 /* more unit header bytes are accounted for MVC/SVC */
501 if (skip_cnt > HW_MAX_SKIP_LENGTH) {
502 WARN_ONCE("Too many leading zeros are padded for packed data. "
503 "It is beyond the HW range.!!!\n");
508 void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
509 struct encode_state *encode_state,
510 struct intel_encoder_context *encoder_context,
511 struct intel_batchbuffer *slice_batch)
513 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
514 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
515 unsigned int rate_control_mode = encoder_context->rate_control_mode;
516 unsigned int skip_emul_byte_cnt;
518 if (encode_state->packed_header_data[idx]) {
519 VAEncPackedHeaderParameterBuffer *param = NULL;
520 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
521 unsigned int length_in_bits;
523 assert(encode_state->packed_header_param[idx]);
524 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
525 length_in_bits = param->bit_length;
527 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
528 mfc_context->insert_object(ctx,
531 ALIGN(length_in_bits, 32) >> 5,
532 length_in_bits & 0x1f,
536 !param->has_emulation_bytes,
540 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
542 if (encode_state->packed_header_data[idx]) {
543 VAEncPackedHeaderParameterBuffer *param = NULL;
544 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
545 unsigned int length_in_bits;
547 assert(encode_state->packed_header_param[idx]);
548 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
549 length_in_bits = param->bit_length;
551 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
553 mfc_context->insert_object(ctx,
556 ALIGN(length_in_bits, 32) >> 5,
557 length_in_bits & 0x1f,
561 !param->has_emulation_bytes,
565 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
567 if (encode_state->packed_header_data[idx]) {
568 VAEncPackedHeaderParameterBuffer *param = NULL;
569 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
570 unsigned int length_in_bits;
572 assert(encode_state->packed_header_param[idx]);
573 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
574 length_in_bits = param->bit_length;
576 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
577 mfc_context->insert_object(ctx,
580 ALIGN(length_in_bits, 32) >> 5,
581 length_in_bits & 0x1f,
585 !param->has_emulation_bytes,
587 } else if (rate_control_mode == VA_RC_CBR) {
589 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
591 unsigned char *sei_data = NULL;
593 int length_in_bits = build_avc_sei_buffer_timing(
594 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
595 mfc_context->vui_hrd.i_initial_cpb_removal_delay,
597 mfc_context->vui_hrd.i_cpb_removal_delay_length, mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
598 mfc_context->vui_hrd.i_dpb_output_delay_length,
601 mfc_context->insert_object(ctx,
603 (unsigned int *)sei_data,
604 ALIGN(length_in_bits, 32) >> 5,
605 length_in_bits & 0x1f,
615 VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
616 struct encode_state *encode_state,
617 struct intel_encoder_context *encoder_context)
619 struct i965_driver_data *i965 = i965_driver_data(ctx);
620 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
621 struct object_surface *obj_surface;
622 struct object_buffer *obj_buffer;
623 GenAvcSurface *gen6_avc_surface;
625 VAStatus vaStatus = VA_STATUS_SUCCESS;
626 int i, j, enable_avc_ildb = 0;
627 VAEncSliceParameterBufferH264 *slice_param;
628 struct i965_coded_buffer_segment *coded_buffer_segment;
629 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
630 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
631 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
633 if (IS_GEN6(i965->intel.device_info)) {
634 /* On the SNB it should be fixed to 128 for the DMV buffer */
638 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
639 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
640 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
642 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
643 assert((slice_param->slice_type == SLICE_TYPE_I) ||
644 (slice_param->slice_type == SLICE_TYPE_SI) ||
645 (slice_param->slice_type == SLICE_TYPE_P) ||
646 (slice_param->slice_type == SLICE_TYPE_SP) ||
647 (slice_param->slice_type == SLICE_TYPE_B));
649 if (slice_param->disable_deblocking_filter_idc != 1) {
658 /*Setup all the input&output object*/
660 /* Setup current frame and current direct mv buffer*/
661 obj_surface = encode_state->reconstructed_object;
662 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
664 if ( obj_surface->private_data == NULL) {
665 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
666 gen6_avc_surface->dmv_top =
667 dri_bo_alloc(i965->intel.bufmgr,
669 68 * width_in_mbs * height_in_mbs,
671 gen6_avc_surface->dmv_bottom =
672 dri_bo_alloc(i965->intel.bufmgr,
674 68 * width_in_mbs * height_in_mbs,
676 assert(gen6_avc_surface->dmv_top);
677 assert(gen6_avc_surface->dmv_bottom);
678 obj_surface->private_data = (void *)gen6_avc_surface;
679 obj_surface->free_private_data = (void *)gen_free_avc_surface;
681 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
682 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
683 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
684 dri_bo_reference(gen6_avc_surface->dmv_top);
685 dri_bo_reference(gen6_avc_surface->dmv_bottom);
687 if (enable_avc_ildb) {
688 mfc_context->post_deblocking_output.bo = obj_surface->bo;
689 dri_bo_reference(mfc_context->post_deblocking_output.bo);
691 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
692 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
695 mfc_context->surface_state.width = obj_surface->orig_width;
696 mfc_context->surface_state.height = obj_surface->orig_height;
697 mfc_context->surface_state.w_pitch = obj_surface->width;
698 mfc_context->surface_state.h_pitch = obj_surface->height;
700 /* Setup reference frames and direct mv buffers*/
701 for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
702 obj_surface = encode_state->reference_objects[i];
704 if (obj_surface && obj_surface->bo) {
705 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
706 dri_bo_reference(obj_surface->bo);
708 /* Check DMV buffer */
709 if ( obj_surface->private_data == NULL) {
711 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
712 gen6_avc_surface->dmv_top =
713 dri_bo_alloc(i965->intel.bufmgr,
715 68 * width_in_mbs * height_in_mbs,
717 gen6_avc_surface->dmv_bottom =
718 dri_bo_alloc(i965->intel.bufmgr,
720 68 * width_in_mbs * height_in_mbs,
722 assert(gen6_avc_surface->dmv_top);
723 assert(gen6_avc_surface->dmv_bottom);
724 obj_surface->private_data = gen6_avc_surface;
725 obj_surface->free_private_data = gen_free_avc_surface;
728 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
729 /* Setup DMV buffer */
730 mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
731 mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
732 dri_bo_reference(gen6_avc_surface->dmv_top);
733 dri_bo_reference(gen6_avc_surface->dmv_bottom);
739 mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
740 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
742 obj_buffer = encode_state->coded_buf_object;
743 bo = obj_buffer->buffer_store->bo;
744 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
745 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
746 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
747 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
750 coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
751 coded_buffer_segment->mapped = 0;
752 coded_buffer_segment->codec = encoder_context->codec;
758 * The LUT uses the pair of 4-bit units: (shift, base) structure.
760 * So it is necessary to convert one cost into the nearest LUT format.
762 * 2^K *x = 2^n * (1 + deltaX)
763 * k + log2(x) = n + log2(1 + deltaX)
764 * log2(x) = n - k + log2(1 + deltaX)
765 * As X is in the range of [1, 15]
766 * 4 > n - k + log2(1 + deltaX) >= 0
767 * => n + log2(1 + deltaX) >= k > n - 4 + log2(1 + deltaX)
768 * Then we can derive the corresponding K and get the nearest LUT format.
770 int intel_format_lutvalue(int value, int max)
773 int logvalue, temp1, temp2;
778 logvalue = (int)(log2f((float)value));
782 int error, temp_value, base, j, temp_err;
784 j = logvalue - 4 + 1;
786 for(; j <= logvalue; j++) {
790 base = (value + (1 << (j - 1)) - 1) >> j;
795 temp_value = base << j;
796 temp_err = abs(value - temp_value);
797 if (temp_err < error) {
799 ret = (j << 4) | base;
805 temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
806 temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
817 static float intel_lambda_qp(int qp)
819 float value, lambdaf;
821 value = value / 6 - 2;
824 lambdaf = roundf(powf(2, value));
829 void intel_vme_update_mbmv_cost(VADriverContextP ctx,
830 struct encode_state *encode_state,
831 struct intel_encoder_context *encoder_context)
833 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
834 struct gen6_vme_context *vme_context = encoder_context->vme_context;
835 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
836 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
837 int qp, m_cost, j, mv_count;
838 uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
839 float lambda, m_costf;
841 int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
844 if (encoder_context->rate_control_mode == VA_RC_CQP)
845 qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
847 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
849 if (vme_state_message == NULL)
852 assert(qp <= QP_MAX);
853 lambda = intel_lambda_qp(qp);
854 if (slice_type == SLICE_TYPE_I) {
855 vme_state_message[MODE_INTRA_16X16] = 0;
857 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
858 m_cost = lambda * 16;
859 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
861 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
864 vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
865 for (j = 1; j < 3; j++) {
866 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
867 m_cost = (int)m_costf;
868 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
871 for (j = 4; j <= 64; j *= 2) {
872 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
873 m_cost = (int)m_costf;
874 vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
879 vme_state_message[MODE_INTRA_16X16] = 0x4a;
880 vme_state_message[MODE_INTRA_8X8] = 0x4a;
881 vme_state_message[MODE_INTRA_4X4] = 0x4a;
882 vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
883 vme_state_message[MODE_INTER_16X16] = 0x4a;
884 vme_state_message[MODE_INTER_16X8] = 0x4a;
885 vme_state_message[MODE_INTER_8X8] = 0x4a;
886 vme_state_message[MODE_INTER_8X4] = 0x4a;
887 vme_state_message[MODE_INTER_4X4] = 0x4a;
888 vme_state_message[MODE_INTER_BWD] = 0x2a;
891 m_costf = lambda * 10;
892 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
893 m_cost = lambda * 14;
894 vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
895 m_cost = lambda * 24;
896 vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
897 m_costf = lambda * 3.5;
899 vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
900 if (slice_type == SLICE_TYPE_P) {
901 m_costf = lambda * 2.5;
903 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
904 m_costf = lambda * 4;
906 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
907 m_costf = lambda * 1.5;
909 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
910 m_costf = lambda * 3;
912 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
913 m_costf = lambda * 5;
915 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
916 /* BWD is not used in P-frame */
917 vme_state_message[MODE_INTER_BWD] = 0;
919 m_costf = lambda * 2.5;
921 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
922 m_costf = lambda * 5.5;
924 vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
925 m_costf = lambda * 3.5;
927 vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
928 m_costf = lambda * 5.0;
930 vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
931 m_costf = lambda * 6.5;
933 vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
934 m_costf = lambda * 1.5;
936 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
942 #define MB_SCOREBOARD_A (1 << 0)
943 #define MB_SCOREBOARD_B (1 << 1)
944 #define MB_SCOREBOARD_C (1 << 2)
946 gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
948 vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
949 vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
950 vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
954 /* In VME prediction the current mb depends on the neighbour
955 * A/B/C macroblock. So the left/up/up-right dependency should
958 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
959 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
960 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
961 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
962 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
963 vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
965 vme_context->gpe_context.vfe_desc7.dword = 0;
969 /* check whether the mb of (x_index, y_index) is out of bound */
970 static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
973 if (x_index < 0 || x_index >= mb_width)
975 if (y_index < 0 || y_index >= mb_height)
978 mb_index = y_index * mb_width + x_index;
979 if (mb_index < first_mb || mb_index > (first_mb + num_mb))
985 gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
986 struct encode_state *encode_state,
987 int mb_width, int mb_height,
989 int transform_8x8_mode_flag,
990 struct intel_encoder_context *encoder_context)
992 struct gen6_vme_context *vme_context = encoder_context->vme_context;
995 unsigned int *command_ptr;
997 #define USE_SCOREBOARD (1 << 21)
999 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1000 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1002 for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1003 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1004 int first_mb = pSliceParameter->macroblock_address;
1005 int num_mb = pSliceParameter->num_macroblocks;
1006 unsigned int mb_intra_ub, score_dep;
1007 int x_outer, y_outer, x_inner, y_inner;
1008 int xtemp_outer = 0;
1010 x_outer = first_mb % mb_width;
1011 y_outer = first_mb / mb_width;
1014 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1017 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1021 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1022 score_dep |= MB_SCOREBOARD_A;
1024 if (y_inner != mb_row) {
1025 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1026 score_dep |= MB_SCOREBOARD_B;
1028 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1029 if (x_inner != (mb_width -1)) {
1030 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1031 score_dep |= MB_SCOREBOARD_C;
1035 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1036 *command_ptr++ = kernel;
1037 *command_ptr++ = USE_SCOREBOARD;
1040 /* the (X, Y) term of scoreboard */
1041 *command_ptr++ = ((y_inner << 16) | x_inner);
1042 *command_ptr++ = score_dep;
1044 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1045 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1052 xtemp_outer = mb_width - 2;
1053 if (xtemp_outer < 0)
1055 x_outer = xtemp_outer;
1056 y_outer = first_mb / mb_width;
1057 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1060 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1064 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1065 score_dep |= MB_SCOREBOARD_A;
1067 if (y_inner != mb_row) {
1068 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1069 score_dep |= MB_SCOREBOARD_B;
1071 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1073 if (x_inner != (mb_width -1)) {
1074 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1075 score_dep |= MB_SCOREBOARD_C;
1079 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1080 *command_ptr++ = kernel;
1081 *command_ptr++ = USE_SCOREBOARD;
1084 /* the (X, Y) term of scoreboard */
1085 *command_ptr++ = ((y_inner << 16) | x_inner);
1086 *command_ptr++ = score_dep;
1088 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1089 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1095 if (x_outer >= mb_width) {
1097 x_outer = xtemp_outer;
1103 *command_ptr++ = MI_BATCH_BUFFER_END;
1105 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1109 intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1111 unsigned int is_long_term =
1112 !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1113 unsigned int is_top_field =
1114 !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1115 unsigned int is_bottom_field =
1116 !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1118 return ((is_long_term << 6) |
1119 ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1120 (frame_store_id << 1) |
1121 ((is_top_field ^ 1) & is_bottom_field));
1125 intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1126 struct encode_state *encode_state,
1127 struct intel_encoder_context *encoder_context)
1129 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1130 struct intel_batchbuffer *batch = encoder_context->base.batch;
1132 struct object_surface *obj_surface;
1133 unsigned int fref_entry, bref_entry;
1135 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1137 fref_entry = 0x80808080;
1138 bref_entry = 0x80808080;
1139 slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1141 if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1142 int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1144 if (ref_idx_l0 > 3) {
1145 WARN_ONCE("ref_idx_l0 is out of range\n");
1149 obj_surface = vme_context->used_reference_objects[0];
1151 for (i = 0; i < 16; i++) {
1153 obj_surface == encode_state->reference_objects[i]) {
1158 if (frame_index == -1) {
1159 WARN_ONCE("RefPicList0 is not found in DPB!\n");
1161 int ref_idx_l0_shift = ref_idx_l0 * 8;
1162 fref_entry &= ~(0xFF << ref_idx_l0_shift);
1163 fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1167 if (slice_type == SLICE_TYPE_B) {
1168 int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1170 if (ref_idx_l1 > 3) {
1171 WARN_ONCE("ref_idx_l1 is out of range\n");
1175 obj_surface = vme_context->used_reference_objects[1];
1177 for (i = 0; i < 16; i++) {
1179 obj_surface == encode_state->reference_objects[i]) {
1184 if (frame_index == -1) {
1185 WARN_ONCE("RefPicList1 is not found in DPB!\n");
1187 int ref_idx_l1_shift = ref_idx_l1 * 8;
1188 bref_entry &= ~(0xFF << ref_idx_l1_shift);
1189 bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1193 BEGIN_BCS_BATCH(batch, 10);
1194 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1195 OUT_BCS_BATCH(batch, 0); //Select L0
1196 OUT_BCS_BATCH(batch, fref_entry); //Only 1 reference
1197 for(i = 0; i < 7; i++) {
1198 OUT_BCS_BATCH(batch, 0x80808080);
1200 ADVANCE_BCS_BATCH(batch);
1202 BEGIN_BCS_BATCH(batch, 10);
1203 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1204 OUT_BCS_BATCH(batch, 1); //Select L1
1205 OUT_BCS_BATCH(batch, bref_entry); //Only 1 reference
1206 for(i = 0; i < 7; i++) {
1207 OUT_BCS_BATCH(batch, 0x80808080);
1209 ADVANCE_BCS_BATCH(batch);
1213 void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1214 struct encode_state *encode_state,
1215 struct intel_encoder_context *encoder_context)
1217 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1218 uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1219 VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1220 int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1221 int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1222 uint32_t mv_x, mv_y;
1223 VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1224 VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1225 slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1227 if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1230 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1233 } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1237 WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1242 pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1243 if (pic_param->picture_type != VAEncPictureTypeIntra) {
1244 int qp, m_cost, j, mv_count;
1245 float lambda, m_costf;
1246 slice_param = (VAEncSliceParameterBufferMPEG2 *)
1247 encode_state->slice_params_ext[0]->buffer;
1248 qp = slice_param->quantiser_scale_code;
1249 lambda = intel_lambda_qp(qp);
1250 /* No Intra prediction. So it is zero */
1251 vme_state_message[MODE_INTRA_8X8] = 0;
1252 vme_state_message[MODE_INTRA_4X4] = 0;
1253 vme_state_message[MODE_INTER_MV0] = 0;
1254 for (j = 1; j < 3; j++) {
1255 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1256 m_cost = (int)m_costf;
1257 vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1260 for (j = 4; j <= 64; j *= 2) {
1261 m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1262 m_cost = (int)m_costf;
1263 vme_state_message[MODE_INTER_MV0 + mv_count] =
1264 intel_format_lutvalue(m_cost, 0x6f);
1268 /* It can only perform the 16x16 search. So mode cost can be ignored for
1269 * the other mode. for example: 16x8/8x8
1271 vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1272 vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1274 vme_state_message[MODE_INTER_16X8] = 0;
1275 vme_state_message[MODE_INTER_8X8] = 0;
1276 vme_state_message[MODE_INTER_8X4] = 0;
1277 vme_state_message[MODE_INTER_4X4] = 0;
1278 vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1281 vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1283 vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1288 gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1289 struct encode_state *encode_state,
1290 int mb_width, int mb_height,
1292 struct intel_encoder_context *encoder_context)
1294 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1295 unsigned int *command_ptr;
1297 #define MPEG2_SCOREBOARD (1 << 21)
1299 dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1300 command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1303 unsigned int mb_intra_ub, score_dep;
1304 int x_outer, y_outer, x_inner, y_inner;
1305 int xtemp_outer = 0;
1307 int num_mb = mb_width * mb_height;
1313 for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1316 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1320 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1321 score_dep |= MB_SCOREBOARD_A;
1324 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1325 score_dep |= MB_SCOREBOARD_B;
1328 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1330 if (x_inner != (mb_width -1)) {
1331 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1332 score_dep |= MB_SCOREBOARD_C;
1336 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1337 *command_ptr++ = kernel;
1338 *command_ptr++ = MPEG2_SCOREBOARD;
1341 /* the (X, Y) term of scoreboard */
1342 *command_ptr++ = ((y_inner << 16) | x_inner);
1343 *command_ptr++ = score_dep;
1345 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1346 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1353 xtemp_outer = mb_width - 2;
1354 if (xtemp_outer < 0)
1356 x_outer = xtemp_outer;
1358 for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1361 for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1365 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1366 score_dep |= MB_SCOREBOARD_A;
1369 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1370 score_dep |= MB_SCOREBOARD_B;
1373 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1375 if (x_inner != (mb_width -1)) {
1376 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1377 score_dep |= MB_SCOREBOARD_C;
1381 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1382 *command_ptr++ = kernel;
1383 *command_ptr++ = MPEG2_SCOREBOARD;
1386 /* the (X, Y) term of scoreboard */
1387 *command_ptr++ = ((y_inner << 16) | x_inner);
1388 *command_ptr++ = score_dep;
1390 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1391 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1397 if (x_outer >= mb_width) {
1399 x_outer = xtemp_outer;
1405 *command_ptr++ = MI_BATCH_BUFFER_END;
1407 dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1412 avc_temporal_find_surface(VAPictureH264 *curr_pic,
1413 VAPictureH264 *ref_list,
1417 int i, found = -1, min = 0x7FFFFFFF;
1419 for (i = 0; i < num_pictures; i++) {
1422 if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1423 (ref_list[i].picture_id == VA_INVALID_SURFACE))
1426 tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1431 if (tmp > 0 && tmp < min) {
1441 intel_avc_vme_reference_state(VADriverContextP ctx,
1442 struct encode_state *encode_state,
1443 struct intel_encoder_context *encoder_context,
1446 void (* vme_source_surface_state)(
1447 VADriverContextP ctx,
1449 struct object_surface *obj_surface,
1450 struct intel_encoder_context *encoder_context))
1452 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1453 struct object_surface *obj_surface = NULL;
1454 struct i965_driver_data *i965 = i965_driver_data(ctx);
1455 VASurfaceID ref_surface_id;
1456 VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1457 VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1458 int max_num_references;
1459 VAPictureH264 *curr_pic;
1460 VAPictureH264 *ref_list;
1463 if (list_index == 0) {
1464 max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1465 ref_list = slice_param->RefPicList0;
1467 max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1468 ref_list = slice_param->RefPicList1;
1471 if (max_num_references == 1) {
1472 if (list_index == 0) {
1473 ref_surface_id = slice_param->RefPicList0[0].picture_id;
1474 vme_context->used_references[0] = &slice_param->RefPicList0[0];
1476 ref_surface_id = slice_param->RefPicList1[0].picture_id;
1477 vme_context->used_references[1] = &slice_param->RefPicList1[0];
1480 if (ref_surface_id != VA_INVALID_SURFACE)
1481 obj_surface = SURFACE(ref_surface_id);
1485 obj_surface = encode_state->reference_objects[list_index];
1486 vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1491 curr_pic = &pic_param->CurrPic;
1493 /* select the reference frame in temporal space */
1494 ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1495 ref_surface_id = ref_list[ref_idx].picture_id;
1497 if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1498 obj_surface = SURFACE(ref_surface_id);
1500 vme_context->used_reference_objects[list_index] = obj_surface;
1501 vme_context->used_references[list_index] = &ref_list[ref_idx];
1506 assert(ref_idx >= 0);
1507 vme_context->used_reference_objects[list_index] = obj_surface;
1508 vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1509 vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1514 vme_context->used_reference_objects[list_index] = NULL;
1515 vme_context->used_references[list_index] = NULL;
1516 vme_context->ref_index_in_mb[list_index] = 0;
1520 void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1521 struct encode_state *encode_state,
1522 struct intel_encoder_context *encoder_context,
1524 struct intel_batchbuffer *slice_batch)
1526 int count, i, start_index;
1527 unsigned int length_in_bits;
1528 VAEncPackedHeaderParameterBuffer *param = NULL;
1529 unsigned int *header_data = NULL;
1530 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1531 int slice_header_index;
1533 if (encode_state->slice_header_index[slice_index] == 0)
1534 slice_header_index = -1;
1536 slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1538 count = encode_state->slice_rawdata_count[slice_index];
1539 start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1541 for (i = 0; i < count; i++) {
1542 unsigned int skip_emul_byte_cnt;
1544 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1546 param = (VAEncPackedHeaderParameterBuffer *)
1547 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1549 /* skip the slice header packed data type as it is lastly inserted */
1550 if (param->type == VAEncPackedHeaderSlice)
1553 length_in_bits = param->bit_length;
1555 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1557 /* as the slice header is still required, the last header flag is set to
1560 mfc_context->insert_object(ctx,
1563 ALIGN(length_in_bits, 32) >> 5,
1564 length_in_bits & 0x1f,
1568 !param->has_emulation_bytes,
1572 if (slice_header_index == -1) {
1573 unsigned char *slice_header = NULL;
1574 int slice_header_length_in_bits = 0;
1575 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1576 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1577 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1579 /* No slice header data is passed. And the driver needs to generate it */
1580 /* For the Normal H264 */
1581 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1585 mfc_context->insert_object(ctx, encoder_context,
1586 (unsigned int *)slice_header,
1587 ALIGN(slice_header_length_in_bits, 32) >> 5,
1588 slice_header_length_in_bits & 0x1f,
1589 5, /* first 5 bytes are start code + nal unit type */
1590 1, 0, 1, slice_batch);
1594 unsigned int skip_emul_byte_cnt;
1596 header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1598 param = (VAEncPackedHeaderParameterBuffer *)
1599 (encode_state->packed_header_params_ext[start_index + i]->buffer);
1600 length_in_bits = param->bit_length;
1602 /* as the slice header is the last header data for one slice,
1603 * the last header flag is set to one.
1605 skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1607 mfc_context->insert_object(ctx,
1610 ALIGN(length_in_bits, 32) >> 5,
1611 length_in_bits & 0x1f,
1615 !param->has_emulation_bytes,