2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Zhao Yakui <yakui.zhao@intel.com>
26 * Xiang Haihao <haihao.xiang@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
45 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
46 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
49 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
50 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
53 static struct i965_kernel gen75_mfc_kernels[] = {
55 "MFC AVC INTRA BATCHBUFFER ",
56 MFC_BATCHBUFFER_AVC_INTRA,
57 gen75_mfc_batchbuffer_avc_intra,
58 sizeof(gen75_mfc_batchbuffer_avc_intra),
63 "MFC AVC INTER BATCHBUFFER ",
64 MFC_BATCHBUFFER_AVC_INTER,
65 gen75_mfc_batchbuffer_avc_inter,
66 sizeof(gen75_mfc_batchbuffer_avc_inter),
72 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
74 struct intel_encoder_context *encoder_context)
76 struct intel_batchbuffer *batch = encoder_context->base.batch;
77 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
79 assert(standard_select == MFX_FORMAT_MPEG2 ||
80 standard_select == MFX_FORMAT_AVC);
82 BEGIN_BCS_BATCH(batch, 5);
84 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
86 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
87 (MFD_MODE_VLD << 15) | /* VLD mode */
88 (1 << 10) | /* Stream-Out Enable */
89 ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
90 ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
91 (0 << 8) | /* Pre Deblocking Output */
92 (0 << 5) | /* not in stitch mode */
93 (1 << 4) | /* encoding mode */
94 (standard_select << 0)); /* standard select: avc or mpeg2 */
96 (0 << 7) | /* expand NOA bus flag */
97 (0 << 6) | /* disable slice-level clock gating */
98 (0 << 5) | /* disable clock gating for NOA */
99 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
100 (0 << 3) | /* terminate if AVC mbdata error occurs */
101 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
104 OUT_BCS_BATCH(batch, 0);
105 OUT_BCS_BATCH(batch, 0);
107 ADVANCE_BCS_BATCH(batch);
111 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
113 struct intel_batchbuffer *batch = encoder_context->base.batch;
114 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
116 BEGIN_BCS_BATCH(batch, 6);
118 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
119 OUT_BCS_BATCH(batch, 0);
121 ((mfc_context->surface_state.height - 1) << 18) |
122 ((mfc_context->surface_state.width - 1) << 4));
124 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
125 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
126 (0 << 22) | /* surface object control state, FIXME??? */
127 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
128 (0 << 2) | /* must be 0 for interleave U/V */
129 (1 << 1) | /* must be tiled */
130 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
132 (0 << 16) | /* must be 0 for interleave U/V */
133 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
134 OUT_BCS_BATCH(batch, 0);
136 ADVANCE_BCS_BATCH(batch);
140 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
142 struct intel_batchbuffer *batch = encoder_context->base.batch;
143 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
144 struct gen6_vme_context *vme_context = encoder_context->vme_context;
146 BEGIN_BCS_BATCH(batch, 11);
148 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
149 OUT_BCS_BATCH(batch, 0);
150 OUT_BCS_BATCH(batch, 0);
151 /* MFX Indirect MV Object Base Address */
152 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
153 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
154 OUT_BCS_BATCH(batch, 0);
155 OUT_BCS_BATCH(batch, 0);
156 OUT_BCS_BATCH(batch, 0);
157 OUT_BCS_BATCH(batch, 0);
158 /*MFC Indirect PAK-BSE Object Base Address for Encoder*/
160 mfc_context->mfc_indirect_pak_bse_object.bo,
161 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
164 mfc_context->mfc_indirect_pak_bse_object.bo,
165 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
166 mfc_context->mfc_indirect_pak_bse_object.end_offset);
168 ADVANCE_BCS_BATCH(batch);
172 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
173 struct intel_encoder_context *encoder_context)
175 struct intel_batchbuffer *batch = encoder_context->base.batch;
176 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
177 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
179 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
180 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
182 BEGIN_BCS_BATCH(batch, 16);
184 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
186 ((width_in_mbs * height_in_mbs) & 0xFFFF));
188 ((height_in_mbs - 1) << 16) |
189 ((width_in_mbs - 1) << 0));
191 (0 << 24) | /* Second Chroma QP Offset */
192 (0 << 16) | /* Chroma QP Offset */
193 (0 << 14) | /* Max-bit conformance Intra flag */
194 (0 << 13) | /* Max Macroblock size conformance Inter flag */
195 (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) | /*Weighted_Pred_Flag */
196 (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) | /* Weighted_BiPred_Idc */
197 (0 << 8) | /* FIXME: Image Structure */
198 (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */
200 (0 << 16) | /* Mininum Frame size */
201 (0 << 15) | /* Disable reading of Macroblock Status Buffer */
202 (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */
203 (0 << 13) | /* CABAC 0 word insertion test enable */
204 (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */
205 (1 << 10) | /* Chroma Format IDC, 4:2:0 */
206 (0 << 9) | /* FIXME: MbMvFormatFlag */
207 (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/
208 (0 << 6) | /* Only valid for VLD decoding mode */
209 (0 << 5) | /* Constrained Intra Predition Flag, from PPS */
210 (0 << 4) | /* Direct 8x8 inference flag */
211 (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/
212 (1 << 2) | /* Frame MB only flag */
213 (0 << 1) | /* MBAFF mode is in active */
214 (0 << 0)); /* Field picture flag */
215 OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
216 OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
217 (0xBB8 << 16) | /* InterMbMaxSz */
218 (0xEE8) ); /* IntraMbMaxSz */
219 OUT_BCS_BATCH(batch, 0); /* Reserved */
220 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
221 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
222 OUT_BCS_BATCH(batch, 0x8C000000);
223 OUT_BCS_BATCH(batch, 0x00010000);
224 OUT_BCS_BATCH(batch, 0);
225 OUT_BCS_BATCH(batch, 0);
226 OUT_BCS_BATCH(batch, 0);
227 OUT_BCS_BATCH(batch, 0);
229 ADVANCE_BCS_BATCH(batch);
233 gen75_mfc_qm_state(VADriverContextP ctx,
237 struct intel_encoder_context *encoder_context)
239 struct intel_batchbuffer *batch = encoder_context->base.batch;
240 unsigned int qm_buffer[16];
242 assert(qm_length <= 16);
243 assert(sizeof(*qm) == 4);
244 memcpy(qm_buffer, qm, qm_length * 4);
246 BEGIN_BCS_BATCH(batch, 18);
247 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
248 OUT_BCS_BATCH(batch, qm_type << 0);
249 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
250 ADVANCE_BCS_BATCH(batch);
254 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
256 unsigned int qm[16] = {
257 0x10101010, 0x10101010, 0x10101010, 0x10101010,
258 0x10101010, 0x10101010, 0x10101010, 0x10101010,
259 0x10101010, 0x10101010, 0x10101010, 0x10101010,
260 0x10101010, 0x10101010, 0x10101010, 0x10101010
263 gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
264 gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
265 gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
266 gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
270 gen75_mfc_fqm_state(VADriverContextP ctx,
274 struct intel_encoder_context *encoder_context)
276 struct intel_batchbuffer *batch = encoder_context->base.batch;
277 unsigned int fqm_buffer[32];
279 assert(fqm_length <= 32);
280 assert(sizeof(*fqm) == 4);
281 memcpy(fqm_buffer, fqm, fqm_length * 4);
283 BEGIN_BCS_BATCH(batch, 34);
284 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
285 OUT_BCS_BATCH(batch, fqm_type << 0);
286 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
287 ADVANCE_BCS_BATCH(batch);
291 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
293 unsigned int qm[32] = {
294 0x10001000, 0x10001000, 0x10001000, 0x10001000,
295 0x10001000, 0x10001000, 0x10001000, 0x10001000,
296 0x10001000, 0x10001000, 0x10001000, 0x10001000,
297 0x10001000, 0x10001000, 0x10001000, 0x10001000,
298 0x10001000, 0x10001000, 0x10001000, 0x10001000,
299 0x10001000, 0x10001000, 0x10001000, 0x10001000,
300 0x10001000, 0x10001000, 0x10001000, 0x10001000,
301 0x10001000, 0x10001000, 0x10001000, 0x10001000
304 gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
305 gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
306 gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
307 gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
311 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
312 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
313 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
314 struct intel_batchbuffer *batch)
317 batch = encoder_context->base.batch;
319 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
321 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
323 (0 << 16) | /* always start at offset 0 */
324 (data_bits_in_last_dw << 8) |
325 (skip_emul_byte_count << 4) |
326 (!!emulation_flag << 3) |
327 ((!!is_last_header) << 2) |
328 ((!!is_end_of_slice) << 1) |
329 (0 << 0)); /* FIXME: ??? */
330 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
332 ADVANCE_BCS_BATCH(batch);
337 gen75_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
338 struct gen6_mfc_context *mfc_context)
340 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
341 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
342 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
343 float fps = pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
344 int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
345 int intra_mb_size = inter_mb_size * 5.0;
348 mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
349 mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
350 mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
351 mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
352 mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size;
353 mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
355 for(i = 0 ; i < 3; i++) {
356 mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
357 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
358 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
359 mfc_context->bit_rate_control_context[i].GrowInit = 6;
360 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
361 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
362 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
364 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
365 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
366 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
367 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
368 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
369 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
372 mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
373 mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
374 mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16;
376 mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
377 mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
378 mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5;
382 gen75_mfc_brc_init(struct encode_state *encode_state,
383 struct intel_encoder_context* encoder_context)
385 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
386 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
387 VAEncMiscParameterBuffer* pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
388 VAEncMiscParameterHRD* pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data;
389 double bitrate = pSequenceParameter->bits_per_second;
390 double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick);
391 int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
392 int intra_period = pSequenceParameter->intra_period;
393 int ip_period = pSequenceParameter->ip_period;
394 double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
395 double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
398 if (pSequenceParameter->ip_period) {
399 pnum = (intra_period + ip_period - 1)/ip_period - 1;
400 bnum = intra_period - inum - pnum;
403 mfc_context->brc.mode = encoder_context->rate_control_mode;
405 mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
406 (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
407 mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
408 mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
410 mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
411 mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
412 mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
414 bpf = mfc_context->brc.bits_per_frame = bitrate/framerate;
416 mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
417 mfc_context->hrd.current_buffer_fullness =
418 (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
419 pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
420 mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
421 mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
422 mfc_context->hrd.violation_noted = 0;
424 if ((bpf > qp51_size) && (bpf < qp1_size)) {
425 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
427 else if (bpf >= qp1_size)
428 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1;
429 else if (bpf <= qp51_size)
430 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51;
432 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
433 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
435 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
436 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
437 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
441 gen75_mfc_hrd_context_init(struct encode_state *encode_state,
442 struct intel_encoder_context *encoder_context)
444 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
445 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
446 unsigned int rate_control_mode = encoder_context->rate_control_mode;
447 int target_bit_rate = pSequenceParameter->bits_per_second;
449 // current we only support CBR mode.
450 if (rate_control_mode == VA_RC_CBR) {
451 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
452 mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
453 mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
454 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
455 mfc_context->vui_hrd.i_frame_number = 0;
457 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
458 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
459 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
465 gen75_mfc_free_avc_surface(void **data)
467 struct gen6_mfc_avc_surface_aux *avc_surface = *data;
472 dri_bo_unreference(avc_surface->dmv_top);
473 avc_surface->dmv_top = NULL;
474 dri_bo_unreference(avc_surface->dmv_bottom);
475 avc_surface->dmv_bottom = NULL;
481 static void gen75_mfc_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
483 struct i965_driver_data *i965 = i965_driver_data(ctx);
484 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
488 /*Encode common setup for MFC*/
489 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
490 mfc_context->post_deblocking_output.bo = NULL;
492 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
493 mfc_context->pre_deblocking_output.bo = NULL;
495 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
496 mfc_context->uncompressed_picture_source.bo = NULL;
498 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
499 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
501 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
502 if ( mfc_context->direct_mv_buffers[i].bo != NULL);
503 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
504 mfc_context->direct_mv_buffers[i].bo = NULL;
507 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
508 if (mfc_context->reference_surfaces[i].bo != NULL)
509 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
510 mfc_context->reference_surfaces[i].bo = NULL;
513 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
514 bo = dri_bo_alloc(i965->intel.bufmgr,
519 mfc_context->intra_row_store_scratch_buffer.bo = bo;
521 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
522 bo = dri_bo_alloc(i965->intel.bufmgr,
527 mfc_context->macroblock_status_buffer.bo = bo;
529 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
530 bo = dri_bo_alloc(i965->intel.bufmgr,
532 49152, /* 6 * 128 * 64 */
535 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
537 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
538 bo = dri_bo_alloc(i965->intel.bufmgr,
540 12288, /* 1.5 * 128 * 64 */
543 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
545 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
546 mfc_context->mfc_batchbuffer_surface.bo = NULL;
548 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
549 mfc_context->aux_batchbuffer_surface.bo = NULL;
551 if (mfc_context->aux_batchbuffer)
552 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
554 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
555 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
556 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
557 mfc_context->aux_batchbuffer_surface.pitch = 16;
558 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
559 mfc_context->aux_batchbuffer_surface.size_block = 16;
561 i965_gpe_context_init(ctx, &mfc_context->gpe_context);
565 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
567 struct intel_batchbuffer *batch = encoder_context->base.batch;
568 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
571 BEGIN_BCS_BATCH(batch, 24);
573 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
575 if (mfc_context->pre_deblocking_output.bo)
576 OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
577 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
580 OUT_BCS_BATCH(batch, 0); /* pre output addr */
582 if (mfc_context->post_deblocking_output.bo)
583 OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
584 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
585 0); /* post output addr */
587 OUT_BCS_BATCH(batch, 0);
589 OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
590 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
591 0); /* uncompressed data */
592 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
593 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
594 0); /* StreamOut data*/
595 OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
596 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
598 OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
599 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
601 /* 7..22 Reference pictures*/
602 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
603 if ( mfc_context->reference_surfaces[i].bo != NULL) {
604 OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
605 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
608 OUT_BCS_BATCH(batch, 0);
611 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
612 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
613 0); /* Macroblock status buffer*/
615 ADVANCE_BCS_BATCH(batch);
619 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
621 struct intel_batchbuffer *batch = encoder_context->base.batch;
622 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
626 BEGIN_BCS_BATCH(batch, 69);
628 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
630 /* Reference frames and Current frames */
631 for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
632 if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
633 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
634 I915_GEM_DOMAIN_INSTRUCTION, 0,
637 OUT_BCS_BATCH(batch, 0);
642 for(i = 0; i < 32; i++) {
643 OUT_BCS_BATCH(batch, i/2);
645 OUT_BCS_BATCH(batch, 0);
646 OUT_BCS_BATCH(batch, 0);
648 ADVANCE_BCS_BATCH(batch);
652 gen75_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
654 struct intel_batchbuffer *batch = encoder_context->base.batch;
657 BEGIN_BCS_BATCH(batch, 10);
658 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
659 OUT_BCS_BATCH(batch, 0); //Select L0
660 OUT_BCS_BATCH(batch, 0x80808020); //Only 1 reference
661 for(i = 0; i < 7; i++) {
662 OUT_BCS_BATCH(batch, 0x80808080);
664 ADVANCE_BCS_BATCH(batch);
666 BEGIN_BCS_BATCH(batch, 10);
667 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
668 OUT_BCS_BATCH(batch, 1); //Select L1
669 OUT_BCS_BATCH(batch, 0x80808022); //Only 1 reference
670 for(i = 0; i < 7; i++) {
671 OUT_BCS_BATCH(batch, 0x80808080);
673 ADVANCE_BCS_BATCH(batch);
677 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
679 struct intel_batchbuffer *batch = encoder_context->base.batch;
680 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
682 BEGIN_BCS_BATCH(batch, 4);
684 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
685 OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
686 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
688 OUT_BCS_BATCH(batch, 0);
689 OUT_BCS_BATCH(batch, 0);
691 ADVANCE_BCS_BATCH(batch);
695 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
696 struct encode_state *encode_state,
697 struct intel_encoder_context *encoder_context)
699 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
701 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
702 mfc_context->set_surface_state(ctx, encoder_context);
703 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
704 gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
705 gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
706 mfc_context->avc_img_state(ctx, encode_state, encoder_context);
707 mfc_context->avc_qm_state(ctx, encoder_context);
708 mfc_context->avc_fqm_state(ctx, encoder_context);
709 gen75_mfc_avc_directmode_state(ctx, encoder_context);
710 gen75_mfc_avc_ref_idx_state(ctx, encoder_context);
714 static VAStatus gen75_mfc_avc_prepare(VADriverContextP ctx,
715 struct encode_state *encode_state,
716 struct intel_encoder_context *encoder_context)
718 struct i965_driver_data *i965 = i965_driver_data(ctx);
719 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
720 struct object_surface *obj_surface;
721 struct object_buffer *obj_buffer;
722 struct gen6_mfc_avc_surface_aux* gen6_avc_surface;
724 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
725 VAStatus vaStatus = VA_STATUS_SUCCESS;
726 int i, j, enable_avc_ildb = 0;
727 VAEncSliceParameterBufferH264 *slice_param;
728 VACodedBufferSegment *coded_buffer_segment;
729 unsigned char *flag = NULL;
731 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
732 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
733 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
735 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
736 assert((slice_param->slice_type == SLICE_TYPE_I) ||
737 (slice_param->slice_type == SLICE_TYPE_SI) ||
738 (slice_param->slice_type == SLICE_TYPE_P) ||
739 (slice_param->slice_type == SLICE_TYPE_SP) ||
740 (slice_param->slice_type == SLICE_TYPE_B));
742 if (slice_param->disable_deblocking_filter_idc != 1) {
751 /*Setup all the input&output object*/
753 /* Setup current frame and current direct mv buffer*/
754 obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
756 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
758 if ( obj_surface->private_data == NULL) {
759 gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
760 gen6_avc_surface->dmv_top =
761 dri_bo_alloc(i965->intel.bufmgr,
765 gen6_avc_surface->dmv_bottom =
766 dri_bo_alloc(i965->intel.bufmgr,
770 assert(gen6_avc_surface->dmv_top);
771 assert(gen6_avc_surface->dmv_bottom);
772 obj_surface->private_data = (void *)gen6_avc_surface;
773 obj_surface->free_private_data = (void *)gen75_mfc_free_avc_surface;
775 gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
776 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
777 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
778 dri_bo_reference(gen6_avc_surface->dmv_top);
779 dri_bo_reference(gen6_avc_surface->dmv_bottom);
781 if (enable_avc_ildb) {
782 mfc_context->post_deblocking_output.bo = obj_surface->bo;
783 dri_bo_reference(mfc_context->post_deblocking_output.bo);
785 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
786 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
789 mfc_context->surface_state.width = obj_surface->orig_width;
790 mfc_context->surface_state.height = obj_surface->orig_height;
791 mfc_context->surface_state.w_pitch = obj_surface->width;
792 mfc_context->surface_state.h_pitch = obj_surface->height;
794 /* Setup reference frames and direct mv buffers*/
795 for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
796 if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) {
797 obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
799 if (obj_surface->bo != NULL) {
800 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
801 dri_bo_reference(obj_surface->bo);
803 /* Check DMV buffer */
804 if ( obj_surface->private_data == NULL) {
806 gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
807 gen6_avc_surface->dmv_top =
808 dri_bo_alloc(i965->intel.bufmgr,
812 gen6_avc_surface->dmv_bottom =
813 dri_bo_alloc(i965->intel.bufmgr,
817 assert(gen6_avc_surface->dmv_top);
818 assert(gen6_avc_surface->dmv_bottom);
819 obj_surface->private_data = gen6_avc_surface;
820 obj_surface->free_private_data = gen75_mfc_free_avc_surface;
823 gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
824 /* Setup DMV buffer */
825 mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
826 mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
827 dri_bo_reference(gen6_avc_surface->dmv_top);
828 dri_bo_reference(gen6_avc_surface->dmv_bottom);
834 obj_surface = SURFACE(encoder_context->input_yuv_surface);
835 assert(obj_surface && obj_surface->bo);
836 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
837 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
839 obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
840 bo = obj_buffer->buffer_store->bo;
842 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
843 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_SIZE;
844 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
845 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
848 coded_buffer_segment = (VACodedBufferSegment *)bo->virtual;
849 flag = (unsigned char *)(coded_buffer_segment + 1);
857 static VAStatus gen75_mfc_run(VADriverContextP ctx,
858 struct encode_state *encode_state,
859 struct intel_encoder_context *encoder_context)
861 struct intel_batchbuffer *batch = encoder_context->base.batch;
863 intel_batchbuffer_flush(batch); //run the pipeline
865 return VA_STATUS_SUCCESS;
870 gen75_mfc_stop(VADriverContextP ctx,
871 struct encode_state *encode_state,
872 struct intel_encoder_context *encoder_context,
873 int *encoded_bits_size)
875 VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
876 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
877 VACodedBufferSegment *coded_buffer_segment;
879 vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
880 assert(vaStatus == VA_STATUS_SUCCESS);
881 *encoded_bits_size = coded_buffer_segment->size * 8;
882 i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
884 return VA_STATUS_SUCCESS;
888 static int gen75_mfc_update_hrd(struct encode_state *encode_state,
889 struct gen6_mfc_context *mfc_context,
892 double prev_bf = mfc_context->hrd.current_buffer_fullness;
894 mfc_context->hrd.current_buffer_fullness -= frame_bits;
896 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
897 mfc_context->hrd.current_buffer_fullness = prev_bf;
898 return BRC_UNDERFLOW;
901 mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
902 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
903 if (mfc_context->brc.mode == VA_RC_VBR)
904 mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
906 mfc_context->hrd.current_buffer_fullness = prev_bf;
910 return BRC_NO_HRD_VIOLATION;
914 static int gen75_mfc_brc_postpack(struct encode_state *encode_state,
915 struct gen6_mfc_context *mfc_context,
918 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
919 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
920 int slicetype = pSliceParameter->slice_type;
921 int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
922 int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
923 int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY;
924 int qp; // quantizer of previously encoded slice of current type
925 int qpn; // predicted quantizer for next frame of current type in integer format
926 double qpf; // predicted quantizer for next frame of current type in float format
927 double delta_qp; // QP correction
928 int target_frame_size, frame_size_next;
930 * x - how far we are from HRD buffer borders
931 * y - how far we are from target HRD buffer fullness
934 double frame_size_alpha;
936 if (slicetype == SLICE_TYPE_SP)
937 slicetype = SLICE_TYPE_P;
938 else if (slicetype == SLICE_TYPE_SI)
939 slicetype = SLICE_TYPE_I;
941 qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
943 target_frame_size = mfc_context->brc.target_frame_size[slicetype];
944 if (mfc_context->hrd.buffer_capacity < 5)
945 frame_size_alpha = 0;
947 frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
948 if (frame_size_alpha > 30) frame_size_alpha = 30;
949 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
950 (double)(frame_size_alpha + 1.);
952 /* frame_size_next: avoiding negative number and too small value */
953 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
954 frame_size_next = (int)((double)target_frame_size * 0.25);
956 qpf = (double)qp * target_frame_size / frame_size_next;
957 qpn = (int)(qpf + 0.5);
960 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
961 mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
962 if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
964 mfc_context->brc.qpf_rounding_accumulator = 0.;
965 } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
967 mfc_context->brc.qpf_rounding_accumulator = 0.;
970 /* making sure that QP is not changing too fast */
971 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
972 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
973 /* making sure that with QP predictions we did do not leave QPs range */
974 BRC_CLIP(qpn, 1, 51);
976 /* checking wthether HRD compliance is still met */
977 sts = gen75_mfc_update_hrd(encode_state, mfc_context, frame_bits);
979 /* calculating QP delta as some function*/
980 x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
982 x /= mfc_context->hrd.target_buffer_fullness;
983 y = mfc_context->hrd.current_buffer_fullness;
986 x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
987 y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
989 if (y < 0.01) y = 0.01;
991 else if (x < -1) x = -1;
993 delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
994 qpn = (int)(qpn + delta_qp + 0.5);
996 /* making sure that with QP predictions we did do not leave QPs range */
997 BRC_CLIP(qpn, 1, 51);
999 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
1000 /* correcting QPs of slices of other types */
1001 if (slicetype == SLICE_TYPE_P) {
1002 if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
1003 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
1004 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
1005 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
1006 } else if (slicetype == SLICE_TYPE_I) {
1007 if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
1008 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
1009 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
1010 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
1011 } else { // SLICE_TYPE_B
1012 if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
1013 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
1014 if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
1015 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
1017 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
1018 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
1019 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
1020 } else if (sts == BRC_UNDERFLOW) { // underflow
1021 if (qpn <= qp) qpn = qp + 1;
1024 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
1026 } else if (sts == BRC_OVERFLOW) {
1027 if (qpn >= qp) qpn = qp - 1;
1028 if (qpn < 1) { // < 0 (?) overflow with minQP
1030 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
1034 mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
1040 gen75_mfc_hrd_context_update(struct encode_state *encode_state,
1041 struct gen6_mfc_context *mfc_context)
1043 mfc_context->vui_hrd.i_frame_number++;
1047 static int interlace_check(VADriverContextP ctx,
1048 struct encode_state *encode_state,
1049 struct intel_encoder_context *encoder_context)
1051 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1052 VAEncSliceParameterBufferH264 *pSliceParameter;
1055 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1056 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1058 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1059 pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer;
1060 mbCount += pSliceParameter->num_macroblocks;
1063 if ( mbCount == ( width_in_mbs * height_in_mbs ) )
1070 gen75_mfc_avc_slice_state(VADriverContextP ctx,
1071 VAEncPictureParameterBufferH264 *pic_param,
1072 VAEncSliceParameterBufferH264 *slice_param,
1073 struct encode_state *encode_state,
1074 struct intel_encoder_context *encoder_context,
1075 int rate_control_enable,
1077 struct intel_batchbuffer *batch)
1079 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1080 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1081 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1082 int beginmb = slice_param->macroblock_address;
1083 int endmb = beginmb + slice_param->num_macroblocks;
1084 int beginx = beginmb % width_in_mbs;
1085 int beginy = beginmb / width_in_mbs;
1086 int nextx = endmb % width_in_mbs;
1087 int nexty = endmb / width_in_mbs;
1088 int slice_type = slice_param->slice_type;
1089 int last_slice = (endmb == (width_in_mbs * height_in_mbs));
1090 int bit_rate_control_target, maxQpN, maxQpP;
1091 unsigned char correct[6], grow, shrink;
1093 int weighted_pred_idc = 0;
1094 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
1095 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
1098 batch = encoder_context->base.batch;
1100 bit_rate_control_target = slice_type;
1101 if (slice_type == SLICE_TYPE_SP)
1102 bit_rate_control_target = SLICE_TYPE_P;
1103 else if (slice_type == SLICE_TYPE_SI)
1104 bit_rate_control_target = SLICE_TYPE_I;
1106 if (slice_type == SLICE_TYPE_P) {
1107 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
1108 } else if (slice_type == SLICE_TYPE_B) {
1109 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1111 if (weighted_pred_idc == 2) {
1112 /* 8.4.3 - Derivation process for prediction weights (8-279) */
1113 luma_log2_weight_denom = 5;
1114 chroma_log2_weight_denom = 5;
1118 maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
1119 maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
1121 for (i = 0; i < 6; i++)
1122 correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
1124 grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit +
1125 (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
1126 shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit +
1127 (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
1129 BEGIN_BCS_BATCH(batch, 11);;
1131 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
1132 OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/
1134 if (slice_type == SLICE_TYPE_I) {
1135 OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/
1137 OUT_BCS_BATCH(batch,
1138 (1 << 16) | /*1 reference frame*/
1139 (chroma_log2_weight_denom << 8) |
1140 (luma_log2_weight_denom << 0));
1143 OUT_BCS_BATCH(batch,
1144 (weighted_pred_idc << 30) |
1145 (slice_param->direct_spatial_mv_pred_flag<<29) | /*Direct Prediction Type*/
1146 (slice_param->disable_deblocking_filter_idc << 27) |
1147 (slice_param->cabac_init_idc << 24) |
1148 (qp<<16) | /*Slice Quantization Parameter*/
1149 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
1150 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
1151 OUT_BCS_BATCH(batch,
1152 (beginy << 24) | /*First MB X&Y , the begin postion of current slice*/
1154 slice_param->macroblock_address );
1155 OUT_BCS_BATCH(batch, (nexty << 16) | nextx); /*Next slice first MB X&Y*/
1156 OUT_BCS_BATCH(batch,
1157 (0/*rate_control_enable*/ << 31) | /*in CBR mode RateControlCounterEnable = enable*/
1158 (1 << 30) | /*ResetRateControlCounter*/
1159 (0 << 28) | /*RC Triggle Mode = Always Rate Control*/
1160 (4 << 24) | /*RC Stable Tolerance, middle level*/
1161 (0/*rate_control_enable*/ << 23) | /*RC Panic Enable*/
1162 (0 << 22) | /*QP mode, don't modfiy CBP*/
1163 (0 << 21) | /*MB Type Direct Conversion Enabled*/
1164 (0 << 20) | /*MB Type Skip Conversion Enabled*/
1165 (last_slice << 19) | /*IsLastSlice*/
1166 (0 << 18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
1167 (1 << 17) | /*HeaderPresentFlag*/
1168 (1 << 16) | /*SliceData PresentFlag*/
1169 (1 << 15) | /*TailPresentFlag*/
1170 (1 << 13) | /*RBSP NAL TYPE*/
1171 (0 << 12) ); /*CabacZeroWordInsertionEnable*/
1172 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1173 OUT_BCS_BATCH(batch,
1174 (maxQpN << 24) | /*Target QP - 24 is lowest QP*/
1175 (maxQpP << 16) | /*Target QP + 20 is highest QP*/
1178 OUT_BCS_BATCH(batch,
1179 (correct[5] << 20) |
1180 (correct[4] << 16) |
1181 (correct[3] << 12) |
1185 OUT_BCS_BATCH(batch, 0);
1187 ADVANCE_BCS_BATCH(batch);
1191 static void gen75_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
1192 struct encode_state *encode_state,
1193 struct intel_encoder_context *encoder_context,
1194 struct intel_batchbuffer *slice_batch)
1196 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1197 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
1199 if (encode_state->packed_header_data[idx]) {
1200 VAEncPackedHeaderParameterBuffer *param = NULL;
1201 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1202 unsigned int length_in_bits;
1204 assert(encode_state->packed_header_param[idx]);
1205 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1206 length_in_bits = param->bit_length;
1208 mfc_context->insert_object(ctx,
1211 ALIGN(length_in_bits, 32) >> 5,
1212 length_in_bits & 0x1f,
1213 5, /* FIXME: check it */
1216 !param->has_emulation_bytes,
1220 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
1222 if (encode_state->packed_header_data[idx]) {
1223 VAEncPackedHeaderParameterBuffer *param = NULL;
1224 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1225 unsigned int length_in_bits;
1227 assert(encode_state->packed_header_param[idx]);
1228 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1229 length_in_bits = param->bit_length;
1231 mfc_context->insert_object(ctx,
1234 ALIGN(length_in_bits, 32) >> 5,
1235 length_in_bits & 0x1f,
1236 5, /* FIXME: check it */
1239 !param->has_emulation_bytes,
1243 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
1245 if (encode_state->packed_header_data[idx]) {
1246 VAEncPackedHeaderParameterBuffer *param = NULL;
1247 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
1248 unsigned int length_in_bits;
1250 assert(encode_state->packed_header_param[idx]);
1251 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
1252 length_in_bits = param->bit_length;
1254 mfc_context->insert_object(ctx,
1257 ALIGN(length_in_bits, 32) >> 5,
1258 length_in_bits & 0x1f,
1259 5, /* FIXME: check it */
1262 !param->has_emulation_bytes,
1270 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
1271 int qp,unsigned int *msg,
1272 struct intel_encoder_context *encoder_context,
1273 unsigned char target_mb_size, unsigned char max_mb_size,
1274 struct intel_batchbuffer *batch)
1276 int len_in_dwords = 11;
1279 batch = encoder_context->base.batch;
1281 BEGIN_BCS_BATCH(batch, len_in_dwords);
1283 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1284 OUT_BCS_BATCH(batch, 0);
1285 OUT_BCS_BATCH(batch, 0);
1286 OUT_BCS_BATCH(batch,
1287 (0 << 24) | /* PackedMvNum, Debug*/
1288 (0 << 20) | /* No motion vector */
1289 (1 << 19) | /* CbpDcY */
1290 (1 << 18) | /* CbpDcU */
1291 (1 << 17) | /* CbpDcV */
1292 (msg[0] & 0xFFFF) );
1294 OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x); /* Code Block Pattern for Y*/
1295 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1296 OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
1298 /*Stuff for Intra MB*/
1299 OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/
1300 OUT_BCS_BATCH(batch, msg[2]);
1301 OUT_BCS_BATCH(batch, msg[3]&0xFC);
1303 /*MaxSizeInWord and TargetSzieInWord*/
1304 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1305 (target_mb_size << 16) );
1307 ADVANCE_BCS_BATCH(batch);
1309 return len_in_dwords;
1313 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1314 unsigned int *msg, unsigned int offset,
1315 struct intel_encoder_context *encoder_context,
1316 unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1317 struct intel_batchbuffer *batch)
1319 int len_in_dwords = 11;
1322 batch = encoder_context->base.batch;
1324 BEGIN_BCS_BATCH(batch, len_in_dwords);
1326 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1328 OUT_BCS_BATCH(batch, msg[2]); /* 32 MV*/
1329 OUT_BCS_BATCH(batch, offset);
1331 OUT_BCS_BATCH(batch, msg[0]);
1333 OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
1334 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1336 if ( slice_type == SLICE_TYPE_B) {
1337 OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp); /* Last MB */
1339 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1342 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1346 /*Stuff for Inter MB*/
1347 OUT_BCS_BATCH(batch, msg[1]);
1348 OUT_BCS_BATCH(batch, 0x0);
1349 OUT_BCS_BATCH(batch, 0x0);
1351 /*MaxSizeInWord and TargetSzieInWord*/
1352 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1353 (target_mb_size << 16) );
1355 ADVANCE_BCS_BATCH(batch);
1357 return len_in_dwords;
1361 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1362 struct encode_state *encode_state,
1363 struct intel_encoder_context *encoder_context,
1365 struct intel_batchbuffer *slice_batch)
1367 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1368 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1369 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1370 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1371 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1372 unsigned int *msg = NULL, offset = 0;
1373 int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
1374 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1375 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1376 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1378 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1379 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1380 unsigned char *slice_header = NULL;
1381 int slice_header_length_in_bits = 0;
1382 unsigned int tail_data[] = { 0x0, 0x0 };
1383 int slice_type = pSliceParameter->slice_type;
1386 if (rate_control_mode == VA_RC_CBR) {
1387 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1388 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1391 /* only support for 8-bit pixel bit-depth */
1392 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1393 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1394 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1395 assert(qp >= 0 && qp < 52);
1397 gen75_mfc_avc_slice_state(ctx,
1400 encode_state, encoder_context,
1401 (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1403 if ( slice_index == 0)
1404 gen75_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1406 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1409 mfc_context->insert_object(ctx, encoder_context,
1410 (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1411 5, /* first 5 bytes are start code + nal unit type */
1412 1, 0, 1, slice_batch);
1414 dri_bo_map(vme_context->vme_output.bo , 1);
1415 msg = (unsigned int *)vme_context->vme_output.bo->virtual;
1418 msg += pSliceParameter->macroblock_address * INTRA_VME_OUTPUT_IN_DWS;
1420 msg += pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_DWS;
1421 msg += 32; /* the first 32 DWs are MVs */
1422 offset = pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_BYTES;
1425 for (i = pSliceParameter->macroblock_address;
1426 i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1427 int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1428 x = i % width_in_mbs;
1429 y = i / width_in_mbs;
1433 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1434 msg += INTRA_VME_OUTPUT_IN_DWS;
1436 if (msg[0] & INTRA_MB_FLAG_MASK) {
1437 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1439 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1442 msg += INTER_VME_OUTPUT_IN_DWS;
1443 offset += INTER_VME_OUTPUT_IN_BYTES;
1447 dri_bo_unmap(vme_context->vme_output.bo);
1450 mfc_context->insert_object(ctx, encoder_context,
1452 2, 1, 1, 0, slice_batch);
1454 mfc_context->insert_object(ctx, encoder_context,
1456 1, 1, 1, 0, slice_batch);
1464 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1465 struct encode_state *encode_state,
1466 struct intel_encoder_context *encoder_context)
1468 struct i965_driver_data *i965 = i965_driver_data(ctx);
1469 struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
1470 dri_bo *batch_bo = batch->buffer;
1473 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1474 gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1477 intel_batchbuffer_align(batch, 8);
1479 BEGIN_BCS_BATCH(batch, 2);
1480 OUT_BCS_BATCH(batch, 0);
1481 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1482 ADVANCE_BCS_BATCH(batch);
1484 dri_bo_reference(batch_bo);
1485 intel_batchbuffer_free(batch);
1493 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1494 struct encode_state *encode_state,
1495 struct intel_encoder_context *encoder_context)
1498 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1499 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1501 assert(vme_context->vme_output.bo);
1502 mfc_context->buffer_suface_setup(ctx,
1503 &mfc_context->gpe_context,
1504 &vme_context->vme_output,
1505 BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1506 SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1507 assert(mfc_context->aux_batchbuffer_surface.bo);
1508 mfc_context->buffer_suface_setup(ctx,
1509 &mfc_context->gpe_context,
1510 &mfc_context->aux_batchbuffer_surface,
1511 BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1512 SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1516 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1517 struct encode_state *encode_state,
1518 struct intel_encoder_context *encoder_context)
1521 struct i965_driver_data *i965 = i965_driver_data(ctx);
1522 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1523 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1524 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1525 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1526 mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1527 mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1528 mfc_context->mfc_batchbuffer_surface.pitch = 16;
1529 mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr,
1531 mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1533 mfc_context->buffer_suface_setup(ctx,
1534 &mfc_context->gpe_context,
1535 &mfc_context->mfc_batchbuffer_surface,
1536 BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1537 SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1541 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
1542 struct encode_state *encode_state,
1543 struct intel_encoder_context *encoder_context)
1545 gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1546 gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1550 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
1551 struct encode_state *encode_state,
1552 struct intel_encoder_context *encoder_context)
1554 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1555 struct gen6_interface_descriptor_data *desc;
1559 bo = mfc_context->gpe_context.idrt.bo;
1561 assert(bo->virtual);
1564 for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1565 struct i965_kernel *kernel;
1567 kernel = &mfc_context->gpe_context.kernels[i];
1568 assert(sizeof(*desc) == 32);
1570 /*Setup the descritor table*/
1571 memset(desc, 0, sizeof(*desc));
1572 desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1573 desc->desc2.sampler_count = 0;
1574 desc->desc2.sampler_state_pointer = 0;
1575 desc->desc3.binding_table_entry_count = 2;
1576 desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1577 desc->desc4.constant_urb_entry_read_offset = 0;
1578 desc->desc4.constant_urb_entry_read_length = 4;
1581 dri_bo_emit_reloc(bo,
1582 I915_GEM_DOMAIN_INSTRUCTION, 0,
1584 i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1593 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
1594 struct encode_state *encode_state,
1595 struct intel_encoder_context *encoder_context)
1597 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1603 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1606 int batchbuffer_offset,
1618 BEGIN_BATCH(batch, 12);
1620 OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1621 OUT_BATCH(batch, index);
1622 OUT_BATCH(batch, 0);
1623 OUT_BATCH(batch, 0);
1624 OUT_BATCH(batch, 0);
1625 OUT_BATCH(batch, 0);
1628 OUT_BATCH(batch, head_offset);
1629 OUT_BATCH(batch, batchbuffer_offset);
1634 number_mb_cmds << 16 |
1645 ADVANCE_BATCH(batch);
1649 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1650 struct intel_encoder_context *encoder_context,
1651 VAEncSliceParameterBufferH264 *slice_param,
1653 unsigned short head_size,
1654 unsigned short tail_size,
1655 int batchbuffer_offset,
1659 struct intel_batchbuffer *batch = encoder_context->base.batch;
1660 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1661 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1662 int total_mbs = slice_param->num_macroblocks;
1663 int number_mb_cmds = 128;
1664 int starting_mb = 0;
1665 int last_object = 0;
1666 int first_object = 1;
1669 int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1671 for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1672 last_object = (total_mbs - starting_mb) == number_mb_cmds;
1673 mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1674 mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1675 assert(mb_x <= 255 && mb_y <= 255);
1677 starting_mb += number_mb_cmds;
1679 gen75_mfc_batchbuffer_emit_object_command(batch,
1695 head_offset += head_size;
1696 batchbuffer_offset += head_size;
1700 head_offset += tail_size;
1701 batchbuffer_offset += tail_size;
1704 batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1711 number_mb_cmds = total_mbs % number_mb_cmds;
1712 mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1713 mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1714 assert(mb_x <= 255 && mb_y <= 255);
1715 starting_mb += number_mb_cmds;
1717 gen75_mfc_batchbuffer_emit_object_command(batch,
1735 * return size in Owords (16bytes)
1738 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1739 struct encode_state *encode_state,
1740 struct intel_encoder_context *encoder_context,
1742 int batchbuffer_offset)
1744 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1745 struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1746 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1747 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1748 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1749 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1750 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1751 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1752 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1753 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1754 unsigned char *slice_header = NULL;
1755 int slice_header_length_in_bits = 0;
1756 unsigned int tail_data[] = { 0x0, 0x0 };
1758 int old_used = intel_batchbuffer_used_size(slice_batch), used;
1759 unsigned short head_size, tail_size;
1760 int slice_type = pSliceParameter->slice_type;
1762 if (rate_control_mode == VA_RC_CBR) {
1763 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1764 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1767 /* only support for 8-bit pixel bit-depth */
1768 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1769 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1770 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1771 assert(qp >= 0 && qp < 52);
1773 head_offset = old_used / 16;
1774 gen75_mfc_avc_slice_state(ctx,
1779 (rate_control_mode == VA_RC_CBR),
1783 if (slice_index == 0)
1784 gen75_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1786 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1789 mfc_context->insert_object(ctx,
1791 (unsigned int *)slice_header,
1792 ALIGN(slice_header_length_in_bits, 32) >> 5,
1793 slice_header_length_in_bits & 0x1f,
1794 5, /* first 5 bytes are start code + nal unit type */
1801 intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1802 used = intel_batchbuffer_used_size(slice_batch);
1803 head_size = (used - old_used) / 16;
1808 mfc_context->insert_object(ctx,
1819 mfc_context->insert_object(ctx,
1831 intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1832 used = intel_batchbuffer_used_size(slice_batch);
1833 tail_size = (used - old_used) / 16;
1836 gen75_mfc_avc_batchbuffer_slice_command(ctx,
1846 return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1850 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1851 struct encode_state *encode_state,
1852 struct intel_encoder_context *encoder_context)
1854 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1855 struct intel_batchbuffer *batch = encoder_context->base.batch;
1856 int i, size, offset = 0;
1857 intel_batchbuffer_start_atomic(batch, 0x4000);
1858 gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1860 for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1861 size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1865 intel_batchbuffer_end_atomic(batch);
1866 intel_batchbuffer_flush(batch);
1870 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx,
1871 struct encode_state *encode_state,
1872 struct intel_encoder_context *encoder_context)
1874 gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1875 gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1876 gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1877 gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1881 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1882 struct encode_state *encode_state,
1883 struct intel_encoder_context *encoder_context)
1885 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1887 gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1888 dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1890 return mfc_context->mfc_batchbuffer_surface.bo;
1896 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1897 struct encode_state *encode_state,
1898 struct intel_encoder_context *encoder_context)
1900 struct intel_batchbuffer *batch = encoder_context->base.batch;
1901 dri_bo *slice_batch_bo;
1903 if ( interlace_check(ctx, encode_state, encoder_context) ) {
1904 fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1910 slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1912 slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1916 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1917 intel_batchbuffer_emit_mi_flush(batch);
1919 // picture level programing
1920 gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1922 BEGIN_BCS_BATCH(batch, 2);
1923 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1924 OUT_BCS_RELOC(batch,
1926 I915_GEM_DOMAIN_COMMAND, 0,
1928 ADVANCE_BCS_BATCH(batch);
1931 intel_batchbuffer_end_atomic(batch);
1933 dri_bo_unreference(slice_batch_bo);
1938 gen75_mfc_avc_encode_picture(VADriverContextP ctx,
1939 struct encode_state *encode_state,
1940 struct intel_encoder_context *encoder_context)
1942 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1943 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1944 int current_frame_bits_size;
1948 gen75_mfc_init(ctx, encoder_context);
1949 gen75_mfc_avc_prepare(ctx, encode_state, encoder_context);
1950 /*Programing bcs pipeline*/
1951 gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
1952 gen75_mfc_run(ctx, encode_state, encoder_context);
1953 if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1954 gen75_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
1955 sts = gen75_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1956 if (sts == BRC_NO_HRD_VIOLATION) {
1957 gen75_mfc_hrd_context_update(encode_state, mfc_context);
1960 else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1961 if (!mfc_context->hrd.violation_noted) {
1962 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1963 mfc_context->hrd.violation_noted = 1;
1965 return VA_STATUS_SUCCESS;
1972 return VA_STATUS_SUCCESS;
1975 static void gen75_mfc_brc_prepare(struct encode_state *encode_state,
1976 struct intel_encoder_context *encoder_context)
1978 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1979 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1981 if (rate_control_mode == VA_RC_CBR) {
1982 /*Programing bit rate control */
1983 if ( mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0 ) {
1984 gen75_mfc_bit_rate_control_context_init(encode_state, mfc_context);
1985 gen75_mfc_brc_init(encode_state, encoder_context);
1988 /*Programing HRD control */
1989 if ( mfc_context->vui_hrd.i_cpb_size_value == 0 )
1990 gen75_mfc_hrd_context_init(encode_state, encoder_context);
1995 gen75_mfc_context_destroy(void *context)
1997 struct gen6_mfc_context *mfc_context = context;
2000 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
2001 mfc_context->post_deblocking_output.bo = NULL;
2003 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
2004 mfc_context->pre_deblocking_output.bo = NULL;
2006 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
2007 mfc_context->uncompressed_picture_source.bo = NULL;
2009 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
2010 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
2012 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
2013 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
2014 mfc_context->direct_mv_buffers[i].bo = NULL;
2017 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2018 mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2020 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2021 mfc_context->macroblock_status_buffer.bo = NULL;
2023 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2024 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2026 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2027 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2030 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2031 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2032 mfc_context->reference_surfaces[i].bo = NULL;
2035 i965_gpe_context_destroy(&mfc_context->gpe_context);
2037 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2038 mfc_context->mfc_batchbuffer_surface.bo = NULL;
2040 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2041 mfc_context->aux_batchbuffer_surface.bo = NULL;
2043 if (mfc_context->aux_batchbuffer)
2044 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2046 mfc_context->aux_batchbuffer = NULL;
2051 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
2053 struct encode_state *encode_state,
2054 struct intel_encoder_context *encoder_context)
2059 case VAProfileH264Baseline:
2060 case VAProfileH264Main:
2061 case VAProfileH264High:
2062 vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
2065 /* FIXME: add for other profile */
2067 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
2074 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2076 struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2078 mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2080 mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2081 mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2083 mfc_context->gpe_context.curbe.length = 32 * 4;
2085 mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2086 mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2087 mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2088 mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2089 mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2091 i965_gpe_load_kernels(ctx,
2092 &mfc_context->gpe_context,
2096 mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
2097 mfc_context->set_surface_state = gen75_mfc_surface_state;
2098 mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
2099 mfc_context->avc_img_state = gen75_mfc_avc_img_state;
2100 mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
2101 mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
2102 mfc_context->insert_object = gen75_mfc_avc_insert_object;
2103 mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
2105 encoder_context->mfc_context = mfc_context;
2106 encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
2107 encoder_context->mfc_pipeline = gen75_mfc_pipeline;
2108 encoder_context->mfc_brc_prepare = gen75_mfc_brc_prepare;