2 * Copyright © 2010-2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Zhou Chang <chang.zhou@intel.com>
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
44 #define CMD_LEN_IN_OWORD 4
46 #define BRC_CLIP(x, min, max) \
48 x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x)); \
51 #define BRC_P_B_QP_DIFF 4
52 #define BRC_I_P_QP_DIFF 2
53 #define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
55 #define BRC_PWEIGHT 0.6 /* weight if P slice with comparison to I slice */
56 #define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
58 #define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
59 #define BRC_CY 0.1 /* weight for */
60 #define BRC_CX_UNDERFLOW 5.
61 #define BRC_CX_OVERFLOW -4.
63 #define BRC_PI_0_5 1.5707963267948966192313216916398
65 typedef enum _gen6_brc_status
67 BRC_NO_HRD_VIOLATION = 0,
70 BRC_UNDERFLOW_WITH_MAX_QP = 3,
71 BRC_OVERFLOW_WITH_MIN_QP = 4,
74 static const uint32_t gen6_mfc_batchbuffer_avc_intra[][4] = {
75 #include "shaders/utils/mfc_batchbuffer_avc_intra.g6b"
78 static const uint32_t gen6_mfc_batchbuffer_avc_inter[][4] = {
79 #include "shaders/utils/mfc_batchbuffer_avc_inter.g6b"
82 static struct i965_kernel gen6_mfc_kernels[] = {
84 "MFC AVC INTRA BATCHBUFFER ",
85 MFC_BATCHBUFFER_AVC_INTRA,
86 gen6_mfc_batchbuffer_avc_intra,
87 sizeof(gen6_mfc_batchbuffer_avc_intra),
92 "MFC AVC INTER BATCHBUFFER ",
93 MFC_BATCHBUFFER_AVC_INTER,
94 gen6_mfc_batchbuffer_avc_inter,
95 sizeof(gen6_mfc_batchbuffer_avc_inter),
101 gen6_mfc_pipe_mode_select(VADriverContextP ctx,
103 struct intel_encoder_context *encoder_context)
105 struct intel_batchbuffer *batch = encoder_context->base.batch;
106 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
108 assert(standard_select == MFX_FORMAT_AVC);
110 BEGIN_BCS_BATCH(batch, 4);
112 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
114 (1 << 10) | /* disable Stream-Out , advanced QP/bitrate control need enable it*/
115 ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
116 ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
117 (0 << 7) | /* disable TLB prefectch */
118 (0 << 5) | /* not in stitch mode */
119 (1 << 4) | /* encoding mode */
120 (2 << 0)); /* Standard Select: AVC */
122 (0 << 20) | /* round flag in PB slice */
123 (0 << 19) | /* round flag in Intra8x8 */
124 (0 << 7) | /* expand NOA bus flag */
125 (1 << 6) | /* must be 1 */
126 (0 << 5) | /* disable clock gating for NOA */
127 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
128 (0 << 3) | /* terminate if AVC mbdata error occurs */
129 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
130 (0 << 1) | /* AVC long field motion vector */
131 (0 << 0)); /* always calculate AVC ILDB boundary strength */
132 OUT_BCS_BATCH(batch, 0);
134 ADVANCE_BCS_BATCH(batch);
138 gen6_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
140 struct intel_batchbuffer *batch = encoder_context->base.batch;
141 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
143 BEGIN_BCS_BATCH(batch, 6);
145 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
146 OUT_BCS_BATCH(batch, 0);
148 ((mfc_context->surface_state.height - 1) << 19) |
149 ((mfc_context->surface_state.width - 1) << 6));
151 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
152 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
153 (0 << 22) | /* surface object control state, FIXME??? */
154 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
155 (0 << 2) | /* must be 0 for interleave U/V */
156 (1 << 1) | /* must be y-tiled */
157 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
159 (0 << 16) | /* must be 0 for interleave U/V */
160 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
161 OUT_BCS_BATCH(batch, 0);
162 ADVANCE_BCS_BATCH(batch);
166 gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
168 struct intel_batchbuffer *batch = encoder_context->base.batch;
169 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
172 BEGIN_BCS_BATCH(batch, 24);
174 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
176 if (mfc_context->pre_deblocking_output.bo)
177 OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
178 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
181 OUT_BCS_BATCH(batch, 0); /* pre output addr */
183 if (mfc_context->post_deblocking_output.bo)
184 OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
185 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
186 0); /* post output addr */
188 OUT_BCS_BATCH(batch, 0);
190 OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
191 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192 0); /* uncompressed data */
193 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
194 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
195 0); /* StreamOut data*/
196 OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
197 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
199 OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
200 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
202 /* 7..22 Reference pictures*/
203 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
204 if ( mfc_context->reference_surfaces[i].bo != NULL) {
205 OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
206 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
209 OUT_BCS_BATCH(batch, 0);
212 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
213 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
214 0); /* Macroblock status buffer*/
216 ADVANCE_BCS_BATCH(batch);
220 gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
222 struct intel_batchbuffer *batch = encoder_context->base.batch;
223 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
224 struct gen6_vme_context *vme_context = encoder_context->vme_context;
226 BEGIN_BCS_BATCH(batch, 11);
228 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
229 OUT_BCS_BATCH(batch, 0);
230 OUT_BCS_BATCH(batch, 0);
231 /* MFX Indirect MV Object Base Address */
232 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
233 OUT_BCS_BATCH(batch, 0);
234 OUT_BCS_BATCH(batch, 0);
235 OUT_BCS_BATCH(batch, 0);
236 OUT_BCS_BATCH(batch, 0);
237 OUT_BCS_BATCH(batch, 0);
238 /*MFC Indirect PAK-BSE Object Base Address for Encoder*/
240 mfc_context->mfc_indirect_pak_bse_object.bo,
241 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
244 mfc_context->mfc_indirect_pak_bse_object.bo,
245 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
246 mfc_context->mfc_indirect_pak_bse_object.end_offset);
248 ADVANCE_BCS_BATCH(batch);
252 gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
254 struct intel_batchbuffer *batch = encoder_context->base.batch;
255 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
257 BEGIN_BCS_BATCH(batch, 4);
259 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
260 OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
261 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
263 OUT_BCS_BATCH(batch, 0);
264 OUT_BCS_BATCH(batch, 0);
266 ADVANCE_BCS_BATCH(batch);
270 gen6_mfc_avc_img_state(VADriverContextP ctx,struct encode_state *encode_state,
271 struct intel_encoder_context *encoder_context)
273 struct intel_batchbuffer *batch = encoder_context->base.batch;
274 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
275 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
276 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
277 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
278 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
280 BEGIN_BCS_BATCH(batch, 13);
281 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
283 ((width_in_mbs * height_in_mbs) & 0xFFFF));
285 (height_in_mbs << 16) |
286 (width_in_mbs << 0));
288 (0 << 24) | /*Second Chroma QP Offset*/
289 (0 << 16) | /*Chroma QP Offset*/
290 (0 << 14) | /*Max-bit conformance Intra flag*/
291 (0 << 13) | /*Max Macroblock size conformance Inter flag*/
292 (1 << 12) | /*Should always be written as "1" */
293 (0 << 10) | /*QM Preset FLag */
294 (0 << 8) | /*Image Structure*/
295 (0 << 0) ); /*Current Decoed Image Frame Store ID, reserved in Encode mode*/
297 (400 << 16) | /*Mininum Frame size*/
298 (0 << 15) | /*Disable reading of Macroblock Status Buffer*/
299 (0 << 14) | /*Load BitStream Pointer only once, 1 slic 1 frame*/
300 (0 << 13) | /*CABAC 0 word insertion test enable*/
301 (1 << 12) | /*MVUnpackedEnable,compliant to DXVA*/
302 (1 << 10) | /*Chroma Format IDC, 4:2:0*/
303 (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/
304 (0 << 6) | /*Only valid for VLD decoding mode*/
305 (0 << 5) | /*Constrained Intra Predition Flag, from PPS*/
306 (pSequenceParameter->seq_fields.bits.direct_8x8_inference_flag << 4) | /*Direct 8x8 inference flag*/
307 (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/
308 (1 << 2) | /*Frame MB only flag*/
309 (0 << 1) | /*MBAFF mode is in active*/
310 (0 << 0) ); /*Field picture flag*/
312 (1<<16) | /*Frame Size Rate Control Flag*/
314 (1<<9) | /*MB level Rate Control Enabling Flag*/
315 (1 << 3) | /*FrameBitRateMinReportMask*/
316 (1 << 2) | /*FrameBitRateMaxReportMask*/
317 (1 << 1) | /*InterMBMaxSizeReportMask*/
318 (1 << 0) ); /*IntraMBMaxSizeReportMask*/
319 OUT_BCS_BATCH(batch, /*Inter and Intra Conformance Max size limit*/
320 (0x0600 << 16) | /*InterMbMaxSz 192 Byte*/
321 (0x0800) ); /*IntraMbMaxSz 256 Byte*/
322 OUT_BCS_BATCH(batch, 0x00000000); /*Reserved : MBZReserved*/
323 OUT_BCS_BATCH(batch, 0x01020304); /*Slice QP Delta for bitrate control*/
324 OUT_BCS_BATCH(batch, 0xFEFDFCFB);
325 OUT_BCS_BATCH(batch, 0x80601004); /*MAX = 128KB, MIN = 64KB*/
326 OUT_BCS_BATCH(batch, 0x00800001);
327 OUT_BCS_BATCH(batch, 0);
329 ADVANCE_BCS_BATCH(batch);
333 gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
335 struct intel_batchbuffer *batch = encoder_context->base.batch;
336 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
340 BEGIN_BCS_BATCH(batch, 69);
342 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
344 /* Reference frames and Current frames */
345 for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
346 if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
347 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
348 I915_GEM_DOMAIN_INSTRUCTION, 0,
351 OUT_BCS_BATCH(batch, 0);
356 for(i = 0; i < 32; i++) {
357 OUT_BCS_BATCH(batch, i/2);
359 OUT_BCS_BATCH(batch, 0);
360 OUT_BCS_BATCH(batch, 0);
362 ADVANCE_BCS_BATCH(batch);
366 gen6_mfc_avc_slice_state(VADriverContextP ctx,
367 VAEncPictureParameterBufferH264 *pic_param,
368 VAEncSliceParameterBufferH264 *slice_param,
369 struct encode_state *encode_state,
370 struct intel_encoder_context *encoder_context,
371 int rate_control_enable,
373 struct intel_batchbuffer *batch)
375 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
376 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
377 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
378 int beginmb = slice_param->macroblock_address;
379 int endmb = beginmb + slice_param->num_macroblocks;
380 int beginx = beginmb % width_in_mbs;
381 int beginy = beginmb / width_in_mbs;
382 int nextx = endmb % width_in_mbs;
383 int nexty = endmb / width_in_mbs;
384 int slice_type = slice_param->slice_type;
385 int last_slice = (endmb == (width_in_mbs * height_in_mbs));
386 int bit_rate_control_target, maxQpN, maxQpP;
387 unsigned char correct[6], grow, shrink;
389 int weighted_pred_idc = 0;
390 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
391 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
394 batch = encoder_context->base.batch;
396 bit_rate_control_target = slice_type;
397 if (slice_type == SLICE_TYPE_SP)
398 bit_rate_control_target = SLICE_TYPE_P;
399 else if (slice_type == SLICE_TYPE_SI)
400 bit_rate_control_target = SLICE_TYPE_I;
402 if (slice_type == SLICE_TYPE_P) {
403 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
404 } else if (slice_type == SLICE_TYPE_B) {
405 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
407 if (weighted_pred_idc == 2) {
408 /* 8.4.3 - Derivation process for prediction weights (8-279) */
409 luma_log2_weight_denom = 5;
410 chroma_log2_weight_denom = 5;
414 maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
415 maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
417 for (i = 0; i < 6; i++)
418 correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
420 grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit +
421 (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
422 shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit +
423 (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
425 BEGIN_BCS_BATCH(batch, 11);;
427 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
428 OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/
430 if (slice_type == SLICE_TYPE_I) {
431 OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/
434 (1 << 16) | /*1 reference frame*/
435 (chroma_log2_weight_denom << 8) |
436 (luma_log2_weight_denom << 0));
440 (weighted_pred_idc << 30) |
441 (slice_param->direct_spatial_mv_pred_flag<<29) | /*Direct Prediction Type*/
442 (slice_param->disable_deblocking_filter_idc << 27) |
443 (slice_param->cabac_init_idc << 24) |
444 (qp<<16) | /*Slice Quantization Parameter*/
445 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
446 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
448 (beginy << 24) | /*First MB X&Y , the begin postion of current slice*/
450 slice_param->macroblock_address );
451 OUT_BCS_BATCH(batch, (nexty << 16) | nextx); /*Next slice first MB X&Y*/
453 (0/*rate_control_enable*/ << 31) | /*in CBR mode RateControlCounterEnable = enable*/
454 (1 << 30) | /*ResetRateControlCounter*/
455 (0 << 28) | /*RC Triggle Mode = Always Rate Control*/
456 (4 << 24) | /*RC Stable Tolerance, middle level*/
457 (0/*rate_control_enable*/ << 23) | /*RC Panic Enable*/
458 (0 << 22) | /*QP mode, don't modfiy CBP*/
459 (0 << 21) | /*MB Type Direct Conversion Enabled*/
460 (0 << 20) | /*MB Type Skip Conversion Enabled*/
461 (last_slice << 19) | /*IsLastSlice*/
462 (0 << 18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
463 (1 << 17) | /*HeaderPresentFlag*/
464 (1 << 16) | /*SliceData PresentFlag*/
465 (1 << 15) | /*TailPresentFlag*/
466 (1 << 13) | /*RBSP NAL TYPE*/
467 (0 << 12) ); /*CabacZeroWordInsertionEnable*/
468 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
470 (maxQpN << 24) | /*Target QP - 24 is lowest QP*/
471 (maxQpP << 16) | /*Target QP + 20 is highest QP*/
481 OUT_BCS_BATCH(batch, 0);
483 ADVANCE_BCS_BATCH(batch);
486 static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
488 struct intel_batchbuffer *batch = encoder_context->base.batch;
491 BEGIN_BCS_BATCH(batch, 58);
493 OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56);
494 OUT_BCS_BATCH(batch, 0xFF ) ;
495 for( i = 0; i < 56; i++) {
496 OUT_BCS_BATCH(batch, 0x10101010);
499 ADVANCE_BCS_BATCH(batch);
502 static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
504 struct intel_batchbuffer *batch = encoder_context->base.batch;
507 BEGIN_BCS_BATCH(batch, 113);
508 OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2));
510 for(i = 0; i < 112;i++) {
511 OUT_BCS_BATCH(batch, 0x10001000);
514 ADVANCE_BCS_BATCH(batch);
518 gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
520 struct intel_batchbuffer *batch = encoder_context->base.batch;
523 BEGIN_BCS_BATCH(batch, 10);
524 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
525 OUT_BCS_BATCH(batch, 0); //Select L0
526 OUT_BCS_BATCH(batch, 0x80808020); //Only 1 reference
527 for(i = 0; i < 7; i++) {
528 OUT_BCS_BATCH(batch, 0x80808080);
530 ADVANCE_BCS_BATCH(batch);
532 BEGIN_BCS_BATCH(batch, 10);
533 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
534 OUT_BCS_BATCH(batch, 1); //Select L1
535 OUT_BCS_BATCH(batch, 0x80808022); //Only 1 reference
536 for(i = 0; i < 7; i++) {
537 OUT_BCS_BATCH(batch, 0x80808080);
539 ADVANCE_BCS_BATCH(batch);
543 gen6_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
544 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
545 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
546 struct intel_batchbuffer *batch)
549 batch = encoder_context->base.batch;
551 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
553 OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
556 (0 << 16) | /* always start at offset 0 */
557 (data_bits_in_last_dw << 8) |
558 (skip_emul_byte_count << 4) |
559 (!!emulation_flag << 3) |
560 ((!!is_last_header) << 2) |
561 ((!!is_end_of_slice) << 1) |
562 (0 << 0)); /* FIXME: ??? */
564 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
565 ADVANCE_BCS_BATCH(batch);
568 static void gen6_mfc_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
570 struct i965_driver_data *i965 = i965_driver_data(ctx);
571 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
575 /*Encode common setup for MFC*/
576 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
577 mfc_context->post_deblocking_output.bo = NULL;
579 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
580 mfc_context->pre_deblocking_output.bo = NULL;
582 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
583 mfc_context->uncompressed_picture_source.bo = NULL;
585 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
586 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
588 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
589 if ( mfc_context->direct_mv_buffers[i].bo != NULL);
590 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
591 mfc_context->direct_mv_buffers[i].bo = NULL;
594 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
595 if (mfc_context->reference_surfaces[i].bo != NULL)
596 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
597 mfc_context->reference_surfaces[i].bo = NULL;
600 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
601 bo = dri_bo_alloc(i965->intel.bufmgr,
606 mfc_context->intra_row_store_scratch_buffer.bo = bo;
608 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
609 bo = dri_bo_alloc(i965->intel.bufmgr,
614 mfc_context->macroblock_status_buffer.bo = bo;
616 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
617 bo = dri_bo_alloc(i965->intel.bufmgr,
619 49152, /* 6 * 128 * 64 */
622 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
624 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
625 bo = dri_bo_alloc(i965->intel.bufmgr,
627 12288, /* 1.5 * 128 * 64 */
630 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
632 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
633 mfc_context->mfc_batchbuffer_surface.bo = NULL;
635 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
636 mfc_context->aux_batchbuffer_surface.bo = NULL;
638 if (mfc_context->aux_batchbuffer)
639 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
641 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
642 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
643 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
644 mfc_context->aux_batchbuffer_surface.pitch = 16;
645 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
646 mfc_context->aux_batchbuffer_surface.size_block = 16;
648 i965_gpe_context_init(ctx, &mfc_context->gpe_context);
651 static void gen6_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
652 struct encode_state *encode_state,
653 struct intel_encoder_context *encoder_context,
654 struct intel_batchbuffer *slice_batch)
656 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
657 int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
659 if (encode_state->packed_header_data[idx]) {
660 VAEncPackedHeaderParameterBuffer *param = NULL;
661 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
662 unsigned int length_in_bits;
664 assert(encode_state->packed_header_param[idx]);
665 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
666 length_in_bits = param->bit_length;
668 mfc_context->insert_object(ctx,
671 ALIGN(length_in_bits, 32) >> 5,
672 length_in_bits & 0x1f,
673 5, /* FIXME: check it */
676 !param->has_emulation_bytes,
680 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
682 if (encode_state->packed_header_data[idx]) {
683 VAEncPackedHeaderParameterBuffer *param = NULL;
684 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
685 unsigned int length_in_bits;
687 assert(encode_state->packed_header_param[idx]);
688 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
689 length_in_bits = param->bit_length;
691 mfc_context->insert_object(ctx,
694 ALIGN(length_in_bits, 32) >> 5,
695 length_in_bits & 0x1f,
696 5, /* FIXME: check it */
699 !param->has_emulation_bytes,
703 idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
705 if (encode_state->packed_header_data[idx]) {
706 VAEncPackedHeaderParameterBuffer *param = NULL;
707 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
708 unsigned int length_in_bits;
710 assert(encode_state->packed_header_param[idx]);
711 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
712 length_in_bits = param->bit_length;
714 mfc_context->insert_object(ctx,
717 ALIGN(length_in_bits, 32) >> 5,
718 length_in_bits & 0x1f,
719 5, /* FIXME: check it */
722 !param->has_emulation_bytes,
727 static void gen6_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
728 struct encode_state *encode_state,
729 struct intel_encoder_context *encoder_context)
731 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
733 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
734 mfc_context->set_surface_state(ctx, encoder_context);
735 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
736 gen6_mfc_pipe_buf_addr_state(ctx, encoder_context);
737 gen6_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
738 mfc_context->avc_img_state(ctx, encode_state, encoder_context);
739 mfc_context->avc_qm_state(ctx, encoder_context);
740 mfc_context->avc_fqm_state(ctx, encoder_context);
741 gen6_mfc_avc_directmode_state(ctx, encoder_context);
742 gen6_mfc_avc_ref_idx_state(ctx, encoder_context);
746 gen6_mfc_free_avc_surface(void **data)
748 struct gen6_mfc_avc_surface_aux *avc_surface = *data;
753 dri_bo_unreference(avc_surface->dmv_top);
754 avc_surface->dmv_top = NULL;
755 dri_bo_unreference(avc_surface->dmv_bottom);
756 avc_surface->dmv_bottom = NULL;
763 gen6_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
764 struct gen6_mfc_context *mfc_context)
766 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
767 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
768 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
769 float fps = pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
770 int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
771 int intra_mb_size = inter_mb_size * 5.0;
774 mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
775 mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
776 mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
777 mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
778 mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size;
779 mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
781 for(i = 0 ; i < 3; i++) {
782 mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
783 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
784 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
785 mfc_context->bit_rate_control_context[i].GrowInit = 6;
786 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
787 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
788 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
790 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
791 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
792 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
793 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
794 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
795 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
798 mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
799 mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
800 mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16;
802 mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
803 mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
804 mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5;
808 gen6_mfc_brc_init(struct encode_state *encode_state,
809 struct intel_encoder_context* encoder_context)
811 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
812 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
813 VAEncMiscParameterBuffer* pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
814 VAEncMiscParameterHRD* pParameterHRD = (VAEncMiscParameterBuffer*)pMiscParamHRD->data;
815 double bitrate = pSequenceParameter->bits_per_second;
816 double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick);
817 int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
818 int intra_period = pSequenceParameter->intra_period;
819 int ip_period = pSequenceParameter->ip_period;
820 double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
821 double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
824 if (pSequenceParameter->ip_period) {
825 pnum = (intra_period + ip_period - 1)/ip_period - 1;
826 bnum = intra_period - inum - pnum;
829 mfc_context->brc.mode = encoder_context->rate_control_mode;
831 mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
832 (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
833 mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
834 mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
836 mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
837 mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
838 mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
840 bpf = mfc_context->brc.bits_per_frame = bitrate/framerate;
842 mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
843 mfc_context->hrd.current_buffer_fullness =
844 (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
845 pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
846 mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
847 mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
848 mfc_context->hrd.violation_noted = 0;
850 if ((bpf > qp51_size) && (bpf < qp1_size)) {
851 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
853 else if (bpf >= qp1_size)
854 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1;
855 else if (bpf <= qp51_size)
856 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51;
858 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
859 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
861 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
862 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
863 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
866 static int gen6_mfc_update_hrd(struct encode_state *encode_state,
867 struct gen6_mfc_context *mfc_context,
870 double prev_bf = mfc_context->hrd.current_buffer_fullness;
872 mfc_context->hrd.current_buffer_fullness -= frame_bits;
874 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
875 mfc_context->hrd.current_buffer_fullness = prev_bf;
876 return BRC_UNDERFLOW;
879 mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
880 if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
881 if (mfc_context->brc.mode == VA_RC_VBR)
882 mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
884 mfc_context->hrd.current_buffer_fullness = prev_bf;
888 return BRC_NO_HRD_VIOLATION;
891 static int gen6_mfc_brc_postpack(struct encode_state *encode_state,
892 struct gen6_mfc_context *mfc_context,
895 gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
896 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
897 int slicetype = pSliceParameter->slice_type;
898 int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
899 int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
900 int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY;
901 int qp; // quantizer of previously encoded slice of current type
902 int qpn; // predicted quantizer for next frame of current type in integer format
903 double qpf; // predicted quantizer for next frame of current type in float format
904 double delta_qp; // QP correction
905 int target_frame_size, frame_size_next;
907 * x - how far we are from HRD buffer borders
908 * y - how far we are from target HRD buffer fullness
911 double frame_size_alpha;
913 if (slicetype == SLICE_TYPE_SP)
914 slicetype = SLICE_TYPE_P;
915 else if (slicetype == SLICE_TYPE_SI)
916 slicetype = SLICE_TYPE_I;
918 qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
920 target_frame_size = mfc_context->brc.target_frame_size[slicetype];
921 if (mfc_context->hrd.buffer_capacity < 5)
922 frame_size_alpha = 0;
924 frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
925 if (frame_size_alpha > 30) frame_size_alpha = 30;
926 frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
927 (double)(frame_size_alpha + 1.);
929 /* frame_size_next: avoiding negative number and too small value */
930 if ((double)frame_size_next < (double)(target_frame_size * 0.25))
931 frame_size_next = (int)((double)target_frame_size * 0.25);
933 qpf = (double)qp * target_frame_size / frame_size_next;
934 qpn = (int)(qpf + 0.5);
937 /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
938 mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
939 if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
941 mfc_context->brc.qpf_rounding_accumulator = 0.;
942 } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
944 mfc_context->brc.qpf_rounding_accumulator = 0.;
947 /* making sure that QP is not changing too fast */
948 if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
949 else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
950 /* making sure that with QP predictions we did do not leave QPs range */
951 BRC_CLIP(qpn, 1, 51);
953 /* checking wthether HRD compliance is still met */
954 sts = gen6_mfc_update_hrd(encode_state, mfc_context, frame_bits);
956 /* calculating QP delta as some function*/
957 x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
959 x /= mfc_context->hrd.target_buffer_fullness;
960 y = mfc_context->hrd.current_buffer_fullness;
963 x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
964 y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
966 if (y < 0.01) y = 0.01;
968 else if (x < -1) x = -1;
970 delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
971 qpn = (int)(qpn + delta_qp + 0.5);
973 /* making sure that with QP predictions we did do not leave QPs range */
974 BRC_CLIP(qpn, 1, 51);
976 if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
977 /* correcting QPs of slices of other types */
978 if (slicetype == SLICE_TYPE_P) {
979 if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
980 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
981 if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
982 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
983 } else if (slicetype == SLICE_TYPE_I) {
984 if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
985 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
986 if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
987 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
988 } else { // SLICE_TYPE_B
989 if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
990 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
991 if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
992 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
994 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
995 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
996 BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
997 } else if (sts == BRC_UNDERFLOW) { // underflow
998 if (qpn <= qp) qpn = qp + 1;
1001 sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
1003 } else if (sts == BRC_OVERFLOW) {
1004 if (qpn >= qp) qpn = qp - 1;
1005 if (qpn < 1) { // < 0 (?) overflow with minQP
1007 sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
1011 mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
1017 gen6_mfc_hrd_context_init(struct encode_state *encode_state,
1018 struct intel_encoder_context *encoder_context)
1020 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1021 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1022 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1023 int target_bit_rate = pSequenceParameter->bits_per_second;
1025 // current we only support CBR mode.
1026 if (rate_control_mode == VA_RC_CBR) {
1027 mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
1028 mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
1029 mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
1030 mfc_context->vui_hrd.i_cpb_removal_delay = 2;
1031 mfc_context->vui_hrd.i_frame_number = 0;
1033 mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
1034 mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
1035 mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
1041 gen6_mfc_hrd_context_update(struct encode_state *encode_state,
1042 struct gen6_mfc_context *mfc_context)
1044 mfc_context->vui_hrd.i_frame_number++;
1047 static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx,
1048 struct encode_state *encode_state,
1049 struct intel_encoder_context *encoder_context)
1051 struct i965_driver_data *i965 = i965_driver_data(ctx);
1052 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1053 struct object_surface *obj_surface;
1054 struct object_buffer *obj_buffer;
1055 struct gen6_mfc_avc_surface_aux* gen6_avc_surface;
1057 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1058 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1059 VAStatus vaStatus = VA_STATUS_SUCCESS;
1060 int i, j, enable_avc_ildb = 0;
1061 VAEncSliceParameterBufferH264 *slice_param;
1062 VACodedBufferSegment *coded_buffer_segment;
1063 unsigned char *flag = NULL;
1065 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
1066 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
1067 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
1069 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
1070 assert((slice_param->slice_type == SLICE_TYPE_I) ||
1071 (slice_param->slice_type == SLICE_TYPE_SI) ||
1072 (slice_param->slice_type == SLICE_TYPE_P) ||
1073 (slice_param->slice_type == SLICE_TYPE_SP) ||
1074 (slice_param->slice_type == SLICE_TYPE_B));
1076 if (slice_param->disable_deblocking_filter_idc != 1) {
1077 enable_avc_ildb = 1;
1085 /*Setup all the input&output object*/
1087 /* Setup current frame and current direct mv buffer*/
1088 obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
1089 assert(obj_surface);
1090 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1092 if ( obj_surface->private_data == NULL) {
1093 gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
1094 gen6_avc_surface->dmv_top =
1095 dri_bo_alloc(i965->intel.bufmgr,
1099 gen6_avc_surface->dmv_bottom =
1100 dri_bo_alloc(i965->intel.bufmgr,
1104 assert(gen6_avc_surface->dmv_top);
1105 assert(gen6_avc_surface->dmv_bottom);
1106 obj_surface->private_data = (void *)gen6_avc_surface;
1107 obj_surface->free_private_data = (void *)gen6_mfc_free_avc_surface;
1109 gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
1110 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
1111 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
1112 dri_bo_reference(gen6_avc_surface->dmv_top);
1113 dri_bo_reference(gen6_avc_surface->dmv_bottom);
1115 if (enable_avc_ildb) {
1116 mfc_context->post_deblocking_output.bo = obj_surface->bo;
1117 dri_bo_reference(mfc_context->post_deblocking_output.bo);
1119 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
1120 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
1123 mfc_context->surface_state.width = obj_surface->orig_width;
1124 mfc_context->surface_state.height = obj_surface->orig_height;
1125 mfc_context->surface_state.w_pitch = obj_surface->width;
1126 mfc_context->surface_state.h_pitch = obj_surface->height;
1128 /* Setup reference frames and direct mv buffers*/
1129 for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
1130 if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) {
1131 obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
1132 assert(obj_surface);
1133 if (obj_surface->bo != NULL) {
1134 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
1135 dri_bo_reference(obj_surface->bo);
1137 /* Check DMV buffer */
1138 if ( obj_surface->private_data == NULL) {
1140 gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
1141 gen6_avc_surface->dmv_top =
1142 dri_bo_alloc(i965->intel.bufmgr,
1146 gen6_avc_surface->dmv_bottom =
1147 dri_bo_alloc(i965->intel.bufmgr,
1151 assert(gen6_avc_surface->dmv_top);
1152 assert(gen6_avc_surface->dmv_bottom);
1153 obj_surface->private_data = gen6_avc_surface;
1154 obj_surface->free_private_data = gen6_mfc_free_avc_surface;
1157 gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
1158 /* Setup DMV buffer */
1159 mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
1160 mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
1161 dri_bo_reference(gen6_avc_surface->dmv_top);
1162 dri_bo_reference(gen6_avc_surface->dmv_bottom);
1168 obj_surface = SURFACE(encoder_context->input_yuv_surface);
1169 assert(obj_surface && obj_surface->bo);
1170 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
1171 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
1173 obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
1174 bo = obj_buffer->buffer_store->bo;
1176 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
1177 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_SIZE;
1178 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
1179 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
1182 coded_buffer_segment = (VACodedBufferSegment *)bo->virtual;
1183 flag = (unsigned char *)(coded_buffer_segment + 1);
1190 static VAStatus gen6_mfc_run(VADriverContextP ctx,
1191 struct encode_state *encode_state,
1192 struct intel_encoder_context *encoder_context)
1194 struct intel_batchbuffer *batch = encoder_context->base.batch;
1196 intel_batchbuffer_flush(batch); //run the pipeline
1198 return VA_STATUS_SUCCESS;
1202 i965_MapBuffer(VADriverContextP ctx,
1203 VABufferID buf_id, /* in */
1204 void **pbuf); /* out */
1206 i965_UnmapBuffer(VADriverContextP ctx,
1210 gen6_mfc_stop(VADriverContextP ctx,
1211 struct encode_state *encode_state,
1212 struct intel_encoder_context *encoder_context,
1213 int *encoded_bits_size)
1215 VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
1216 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1217 VACodedBufferSegment *coded_buffer_segment;
1219 vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
1220 assert(vaStatus == VA_STATUS_SUCCESS);
1221 *encoded_bits_size = coded_buffer_segment->size * 8;
1222 i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
1224 return VA_STATUS_SUCCESS;
1230 gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
1231 struct intel_encoder_context *encoder_context,
1232 unsigned char target_mb_size, unsigned char max_mb_size,
1233 struct intel_batchbuffer *batch)
1235 int len_in_dwords = 11;
1238 batch = encoder_context->base.batch;
1240 BEGIN_BCS_BATCH(batch, len_in_dwords);
1242 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1243 OUT_BCS_BATCH(batch, 0);
1244 OUT_BCS_BATCH(batch, 0);
1245 OUT_BCS_BATCH(batch,
1246 (0 << 24) | /* PackedMvNum, Debug*/
1247 (0 << 20) | /* No motion vector */
1248 (1 << 19) | /* CbpDcY */
1249 (1 << 18) | /* CbpDcU */
1250 (1 << 17) | /* CbpDcV */
1251 (msg[0] & 0xFFFF) );
1253 OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x); /* Code Block Pattern for Y*/
1254 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1255 OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
1257 /*Stuff for Intra MB*/
1258 OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/
1259 OUT_BCS_BATCH(batch, msg[2]);
1260 OUT_BCS_BATCH(batch, msg[3]&0xFC);
1262 /*MaxSizeInWord and TargetSzieInWord*/
1263 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1264 (target_mb_size << 16) );
1266 ADVANCE_BCS_BATCH(batch);
1268 return len_in_dwords;
1272 gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1273 unsigned int *msg, unsigned int offset,
1274 struct intel_encoder_context *encoder_context,
1275 unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1276 struct intel_batchbuffer *batch)
1278 int len_in_dwords = 11;
1281 batch = encoder_context->base.batch;
1283 BEGIN_BCS_BATCH(batch, len_in_dwords);
1285 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1287 OUT_BCS_BATCH(batch, msg[2]); /* 32 MV*/
1288 OUT_BCS_BATCH(batch, offset);
1290 OUT_BCS_BATCH(batch, msg[0]);
1292 OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
1293 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1295 if ( slice_type == SLICE_TYPE_B) {
1296 OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp); /* Last MB */
1298 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1301 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1305 /*Stuff for Inter MB*/
1306 OUT_BCS_BATCH(batch, msg[1]);
1307 OUT_BCS_BATCH(batch, 0x0);
1308 OUT_BCS_BATCH(batch, 0x0);
1310 /*MaxSizeInWord and TargetSzieInWord*/
1311 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1312 (target_mb_size << 16) );
1314 ADVANCE_BCS_BATCH(batch);
1316 return len_in_dwords;
1320 gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1321 struct encode_state *encode_state,
1322 struct intel_encoder_context *encoder_context,
1324 struct intel_batchbuffer *slice_batch)
1326 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1327 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1328 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1329 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1330 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1331 unsigned int *msg = NULL, offset = 0;
1332 int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
1333 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1334 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1335 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1337 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1338 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1339 unsigned char *slice_header = NULL;
1340 int slice_header_length_in_bits = 0;
1341 unsigned int tail_data[] = { 0x0, 0x0 };
1342 int slice_type = pSliceParameter->slice_type;
1345 if (rate_control_mode == VA_RC_CBR) {
1346 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1347 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1350 /* only support for 8-bit pixel bit-depth */
1351 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1352 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1353 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1354 assert(qp >= 0 && qp < 52);
1356 gen6_mfc_avc_slice_state(ctx,
1359 encode_state, encoder_context,
1360 (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1362 if ( slice_index == 0)
1363 gen6_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1365 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1368 mfc_context->insert_object(ctx, encoder_context,
1369 (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1370 5, /* first 5 bytes are start code + nal unit type */
1371 1, 0, 1, slice_batch);
1373 dri_bo_map(vme_context->vme_output.bo , 1);
1374 msg = (unsigned int *)vme_context->vme_output.bo->virtual;
1377 msg += pSliceParameter->macroblock_address * INTRA_VME_OUTPUT_IN_DWS;
1379 msg += pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_DWS;
1380 msg += 32; /* the first 32 DWs are MVs */
1381 offset = pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_BYTES;
1384 for (i = pSliceParameter->macroblock_address;
1385 i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1386 int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1387 x = i % width_in_mbs;
1388 y = i / width_in_mbs;
1392 gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1393 msg += INTRA_VME_OUTPUT_IN_DWS;
1395 if (msg[0] & INTRA_MB_FLAG_MASK) {
1396 gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1398 gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1401 msg += INTER_VME_OUTPUT_IN_DWS;
1402 offset += INTER_VME_OUTPUT_IN_BYTES;
1406 dri_bo_unmap(vme_context->vme_output.bo);
1409 mfc_context->insert_object(ctx, encoder_context,
1411 2, 1, 1, 0, slice_batch);
1413 mfc_context->insert_object(ctx, encoder_context,
1415 1, 1, 1, 0, slice_batch);
1423 gen6_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1424 struct encode_state *encode_state,
1425 struct intel_encoder_context *encoder_context)
1427 struct i965_driver_data *i965 = i965_driver_data(ctx);
1428 struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
1429 dri_bo *batch_bo = batch->buffer;
1432 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1433 gen6_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1436 intel_batchbuffer_align(batch, 8);
1438 BEGIN_BCS_BATCH(batch, 2);
1439 OUT_BCS_BATCH(batch, 0);
1440 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1441 ADVANCE_BCS_BATCH(batch);
1443 dri_bo_reference(batch_bo);
1444 intel_batchbuffer_free(batch);
1452 gen6_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1453 struct encode_state *encode_state,
1454 struct intel_encoder_context *encoder_context)
1457 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1458 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1460 assert(vme_context->vme_output.bo);
1461 mfc_context->buffer_suface_setup(ctx,
1462 &mfc_context->gpe_context,
1463 &vme_context->vme_output,
1464 BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1465 SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1466 assert(mfc_context->aux_batchbuffer_surface.bo);
1467 mfc_context->buffer_suface_setup(ctx,
1468 &mfc_context->gpe_context,
1469 &mfc_context->aux_batchbuffer_surface,
1470 BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1471 SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1475 gen6_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1476 struct encode_state *encode_state,
1477 struct intel_encoder_context *encoder_context)
1480 struct i965_driver_data *i965 = i965_driver_data(ctx);
1481 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1482 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1483 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1484 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1485 mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1486 mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1487 mfc_context->mfc_batchbuffer_surface.pitch = 16;
1488 mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr,
1490 mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1492 mfc_context->buffer_suface_setup(ctx,
1493 &mfc_context->gpe_context,
1494 &mfc_context->mfc_batchbuffer_surface,
1495 BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1496 SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1500 gen6_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
1501 struct encode_state *encode_state,
1502 struct intel_encoder_context *encoder_context)
1504 gen6_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1505 gen6_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1509 gen6_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
1510 struct encode_state *encode_state,
1511 struct intel_encoder_context *encoder_context)
1513 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1514 struct gen6_interface_descriptor_data *desc;
1518 bo = mfc_context->gpe_context.idrt.bo;
1520 assert(bo->virtual);
1523 for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1524 struct i965_kernel *kernel;
1526 kernel = &mfc_context->gpe_context.kernels[i];
1527 assert(sizeof(*desc) == 32);
1529 /*Setup the descritor table*/
1530 memset(desc, 0, sizeof(*desc));
1531 desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1532 desc->desc2.sampler_count = 0;
1533 desc->desc2.sampler_state_pointer = 0;
1534 desc->desc3.binding_table_entry_count = 2;
1535 desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1536 desc->desc4.constant_urb_entry_read_offset = 0;
1537 desc->desc4.constant_urb_entry_read_length = 4;
1540 dri_bo_emit_reloc(bo,
1541 I915_GEM_DOMAIN_INSTRUCTION, 0,
1543 i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1552 gen6_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
1553 struct encode_state *encode_state,
1554 struct intel_encoder_context *encoder_context)
1556 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1562 gen6_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1565 int batchbuffer_offset,
1577 BEGIN_BATCH(batch, 12);
1579 OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1580 OUT_BATCH(batch, index);
1581 OUT_BATCH(batch, 0);
1582 OUT_BATCH(batch, 0);
1583 OUT_BATCH(batch, 0);
1584 OUT_BATCH(batch, 0);
1587 OUT_BATCH(batch, head_offset);
1588 OUT_BATCH(batch, batchbuffer_offset);
1593 number_mb_cmds << 16 |
1604 ADVANCE_BATCH(batch);
1608 gen6_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1609 struct intel_encoder_context *encoder_context,
1610 VAEncSliceParameterBufferH264 *slice_param,
1612 unsigned short head_size,
1613 unsigned short tail_size,
1614 int batchbuffer_offset,
1618 struct intel_batchbuffer *batch = encoder_context->base.batch;
1619 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1620 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1621 int total_mbs = slice_param->num_macroblocks;
1622 int number_mb_cmds = 128;
1623 int starting_mb = 0;
1624 int last_object = 0;
1625 int first_object = 1;
1628 int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1630 for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1631 last_object = (total_mbs - starting_mb) == number_mb_cmds;
1632 mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1633 mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1634 assert(mb_x <= 255 && mb_y <= 255);
1636 starting_mb += number_mb_cmds;
1638 gen6_mfc_batchbuffer_emit_object_command(batch,
1654 head_offset += head_size;
1655 batchbuffer_offset += head_size;
1659 head_offset += tail_size;
1660 batchbuffer_offset += tail_size;
1663 batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1670 number_mb_cmds = total_mbs % number_mb_cmds;
1671 mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1672 mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1673 assert(mb_x <= 255 && mb_y <= 255);
1674 starting_mb += number_mb_cmds;
1676 gen6_mfc_batchbuffer_emit_object_command(batch,
1694 * return size in Owords (16bytes)
1697 gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1698 struct encode_state *encode_state,
1699 struct intel_encoder_context *encoder_context,
1701 int batchbuffer_offset)
1703 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1704 struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1705 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1706 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1707 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1708 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1709 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1710 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1711 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1712 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1713 unsigned char *slice_header = NULL;
1714 int slice_header_length_in_bits = 0;
1715 unsigned int tail_data[] = { 0x0, 0x0 };
1717 int old_used = intel_batchbuffer_used_size(slice_batch), used;
1718 unsigned short head_size, tail_size;
1719 int slice_type = pSliceParameter->slice_type;
1721 if (rate_control_mode == VA_RC_CBR) {
1722 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1723 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1726 /* only support for 8-bit pixel bit-depth */
1727 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1728 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1729 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1730 assert(qp >= 0 && qp < 52);
1732 head_offset = old_used / 16;
1733 gen6_mfc_avc_slice_state(ctx,
1738 (rate_control_mode == VA_RC_CBR),
1742 if (slice_index == 0)
1743 gen6_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1745 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1748 mfc_context->insert_object(ctx,
1750 (unsigned int *)slice_header,
1751 ALIGN(slice_header_length_in_bits, 32) >> 5,
1752 slice_header_length_in_bits & 0x1f,
1753 5, /* first 5 bytes are start code + nal unit type */
1760 intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1761 used = intel_batchbuffer_used_size(slice_batch);
1762 head_size = (used - old_used) / 16;
1767 mfc_context->insert_object(ctx,
1778 mfc_context->insert_object(ctx,
1790 intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1791 used = intel_batchbuffer_used_size(slice_batch);
1792 tail_size = (used - old_used) / 16;
1795 gen6_mfc_avc_batchbuffer_slice_command(ctx,
1805 return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1809 gen6_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1810 struct encode_state *encode_state,
1811 struct intel_encoder_context *encoder_context)
1813 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1814 struct intel_batchbuffer *batch = encoder_context->base.batch;
1815 int i, size, offset = 0;
1816 intel_batchbuffer_start_atomic(batch, 0x4000);
1817 gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1819 for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1820 size = gen6_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1824 intel_batchbuffer_end_atomic(batch);
1825 intel_batchbuffer_flush(batch);
1829 gen6_mfc_build_avc_batchbuffer(VADriverContextP ctx,
1830 struct encode_state *encode_state,
1831 struct intel_encoder_context *encoder_context)
1833 gen6_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1834 gen6_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1835 gen6_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1836 gen6_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1840 gen6_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1841 struct encode_state *encode_state,
1842 struct intel_encoder_context *encoder_context)
1844 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1846 gen6_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1847 dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1849 return mfc_context->mfc_batchbuffer_surface.bo;
1854 int interlace_check(VADriverContextP ctx,
1855 struct encode_state *encode_state,
1856 struct intel_encoder_context *encoder_context) {
1857 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1858 VAEncSliceParameterBufferH264 *pSliceParameter;
1861 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1862 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1864 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1865 pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer;
1866 mbCount += pSliceParameter->num_macroblocks;
1869 if ( mbCount == ( width_in_mbs * height_in_mbs ) )
1877 gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
1878 struct encode_state *encode_state,
1879 struct intel_encoder_context *encoder_context)
1881 struct intel_batchbuffer *batch = encoder_context->base.batch;
1882 dri_bo *slice_batch_bo;
1884 if ( interlace_check(ctx, encode_state, encoder_context) ) {
1885 fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1891 slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1893 slice_batch_bo = gen6_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1897 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1898 intel_batchbuffer_emit_mi_flush(batch);
1900 // picture level programing
1901 gen6_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1903 BEGIN_BCS_BATCH(batch, 2);
1904 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1905 OUT_BCS_RELOC(batch,
1907 I915_GEM_DOMAIN_COMMAND, 0,
1909 ADVANCE_BCS_BATCH(batch);
1912 intel_batchbuffer_end_atomic(batch);
1914 dri_bo_unreference(slice_batch_bo);
1918 gen6_mfc_avc_encode_picture(VADriverContextP ctx,
1919 struct encode_state *encode_state,
1920 struct intel_encoder_context *encoder_context)
1922 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1923 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1924 int current_frame_bits_size;
1928 gen6_mfc_init(ctx, encoder_context);
1929 gen6_mfc_avc_prepare(ctx, encode_state, encoder_context);
1930 /*Programing bcs pipeline*/
1931 gen6_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
1932 gen6_mfc_run(ctx, encode_state, encoder_context);
1933 if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1934 gen6_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
1935 sts = gen6_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1936 if (sts == BRC_NO_HRD_VIOLATION) {
1937 gen6_mfc_hrd_context_update(encode_state, mfc_context);
1940 else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1941 if (!mfc_context->hrd.violation_noted) {
1942 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1943 mfc_context->hrd.violation_noted = 1;
1945 return VA_STATUS_SUCCESS;
1952 return VA_STATUS_SUCCESS;
1956 gen6_mfc_pipeline(VADriverContextP ctx,
1958 struct encode_state *encode_state,
1959 struct intel_encoder_context *encoder_context)
1964 case VAProfileH264Baseline:
1965 case VAProfileH264Main:
1966 case VAProfileH264High:
1967 vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
1970 /* FIXME: add for other profile */
1972 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1980 gen6_mfc_context_destroy(void *context)
1982 struct gen6_mfc_context *mfc_context = context;
1985 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1986 mfc_context->post_deblocking_output.bo = NULL;
1988 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1989 mfc_context->pre_deblocking_output.bo = NULL;
1991 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1992 mfc_context->uncompressed_picture_source.bo = NULL;
1994 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
1995 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1997 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1998 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1999 mfc_context->direct_mv_buffers[i].bo = NULL;
2002 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
2003 mfc_context->intra_row_store_scratch_buffer.bo = NULL;
2005 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
2006 mfc_context->macroblock_status_buffer.bo = NULL;
2008 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
2009 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2011 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
2012 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2015 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
2016 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
2017 mfc_context->reference_surfaces[i].bo = NULL;
2020 i965_gpe_context_destroy(&mfc_context->gpe_context);
2022 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
2023 mfc_context->mfc_batchbuffer_surface.bo = NULL;
2025 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
2026 mfc_context->aux_batchbuffer_surface.bo = NULL;
2028 if (mfc_context->aux_batchbuffer)
2029 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
2031 mfc_context->aux_batchbuffer = NULL;
2036 void gen6_mfc_brc_prepare(struct encode_state *encode_state,
2037 struct intel_encoder_context *encoder_context)
2039 unsigned int rate_control_mode = encoder_context->rate_control_mode;
2040 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
2042 if (rate_control_mode == VA_RC_CBR) {
2043 /*Programing bit rate control */
2044 if ( mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0 ) {
2045 gen6_mfc_bit_rate_control_context_init(encode_state, mfc_context);
2046 gen6_mfc_brc_init(encode_state, encoder_context);
2049 /*Programing HRD control */
2050 if ( mfc_context->vui_hrd.i_cpb_size_value == 0 )
2051 gen6_mfc_hrd_context_init(encode_state, encoder_context);
2055 Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2057 struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
2059 mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
2061 mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
2062 mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
2064 mfc_context->gpe_context.curbe.length = 32 * 4;
2066 mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
2067 mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
2068 mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
2069 mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
2070 mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
2072 i965_gpe_load_kernels(ctx,
2073 &mfc_context->gpe_context,
2077 mfc_context->pipe_mode_select = gen6_mfc_pipe_mode_select;
2078 mfc_context->set_surface_state = gen6_mfc_surface_state;
2079 mfc_context->ind_obj_base_addr_state = gen6_mfc_ind_obj_base_addr_state;
2080 mfc_context->avc_img_state = gen6_mfc_avc_img_state;
2081 mfc_context->avc_qm_state = gen6_mfc_avc_qm_state;
2082 mfc_context->avc_fqm_state = gen6_mfc_avc_fqm_state;
2083 mfc_context->insert_object = gen6_mfc_avc_insert_object;
2084 mfc_context->buffer_suface_setup = i965_gpe_buffer_suface_setup;
2086 encoder_context->mfc_context = mfc_context;
2087 encoder_context->mfc_context_destroy = gen6_mfc_context_destroy;
2088 encoder_context->mfc_pipeline = gen6_mfc_pipeline;
2089 encoder_context->mfc_brc_prepare = gen6_mfc_brc_prepare;