2 * Copyright © 2010-2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Zhou Chang <chang.zhou@intel.com>
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
43 gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
45 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
47 BEGIN_BCS_BATCH(batch, 4);
49 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
51 (1 << 10) | /* disable Stream-Out , advanced QP/bitrate control need enable it*/
52 (1 << 9) | /* Post Deblocking Output */
53 (0 << 8) | /* Pre Deblocking Output */
54 (0 << 7) | /* disable TLB prefectch */
55 (0 << 5) | /* not in stitch mode */
56 (1 << 4) | /* encoding mode */
57 (2 << 0)); /* Standard Select: AVC */
59 (0 << 20) | /* round flag in PB slice */
60 (0 << 19) | /* round flag in Intra8x8 */
61 (0 << 7) | /* expand NOA bus flag */
62 (1 << 6) | /* must be 1 */
63 (0 << 5) | /* disable clock gating for NOA */
64 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
65 (0 << 3) | /* terminate if AVC mbdata error occurs */
66 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
67 (0 << 1) | /* AVC long field motion vector */
68 (0 << 0)); /* always calculate AVC ILDB boundary strength */
69 OUT_BCS_BATCH(batch, 0);
71 ADVANCE_BCS_BATCH(batch);
75 gen7_mfc_pipe_mode_select(VADriverContextP ctx,
77 struct gen6_encoder_context *gen6_encoder_context)
79 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
81 assert(standard_select == MFX_FORMAT_MPEG2 ||
82 standard_select == MFX_FORMAT_AVC);
84 BEGIN_BCS_BATCH(batch, 5);
85 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
87 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
88 (MFD_MODE_VLD << 15) | /* VLD mode */
89 (0 << 10) | /* disable Stream-Out */
90 (1 << 9) | /* Post Deblocking Output */
91 (0 << 8) | /* Pre Deblocking Output */
92 (0 << 5) | /* not in stitch mode */
93 (1 << 4) | /* encoding mode */
94 (standard_select << 0)); /* standard select: avc or mpeg2 */
96 (0 << 7) | /* expand NOA bus flag */
97 (0 << 6) | /* disable slice-level clock gating */
98 (0 << 5) | /* disable clock gating for NOA */
99 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
100 (0 << 3) | /* terminate if AVC mbdata error occurs */
101 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
104 OUT_BCS_BATCH(batch, 0);
105 OUT_BCS_BATCH(batch, 0);
107 ADVANCE_BCS_BATCH(batch);
111 gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
113 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
114 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
116 BEGIN_BCS_BATCH(batch, 6);
118 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
119 OUT_BCS_BATCH(batch, 0);
121 ((mfc_context->surface_state.height - 1) << 19) |
122 ((mfc_context->surface_state.width - 1) << 6));
124 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
125 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
126 (0 << 22) | /* surface object control state, FIXME??? */
127 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
128 (0 << 2) | /* must be 0 for interleave U/V */
129 (1 << 1) | /* must be y-tiled */
130 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
132 (0 << 16) | /* must be 0 for interleave U/V */
133 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
134 OUT_BCS_BATCH(batch, 0);
135 ADVANCE_BCS_BATCH(batch);
139 gen7_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
141 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
142 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
144 BEGIN_BCS_BATCH(batch, 6);
146 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
147 OUT_BCS_BATCH(batch, 0);
149 ((mfc_context->surface_state.height - 1) << 18) |
150 ((mfc_context->surface_state.width - 1) << 4));
152 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
153 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
154 (0 << 22) | /* surface object control state, FIXME??? */
155 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
156 (0 << 2) | /* must be 0 for interleave U/V */
157 (1 << 1) | /* must be tiled */
158 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
160 (0 << 16) | /* must be 0 for interleave U/V */
161 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
162 OUT_BCS_BATCH(batch, 0);
163 ADVANCE_BCS_BATCH(batch);
167 gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
169 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
170 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
173 BEGIN_BCS_BATCH(batch, 24);
175 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
177 OUT_BCS_BATCH(batch, 0); /* pre output addr */
179 OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
180 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
181 0); /* post output addr */
183 OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
184 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
185 0); /* uncompressed data */
186 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
187 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
188 0); /* StreamOut data*/
189 OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
190 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192 OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
193 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
195 /* 7..22 Reference pictures*/
196 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
197 if ( mfc_context->reference_surfaces[i].bo != NULL) {
198 OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
199 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
202 OUT_BCS_BATCH(batch, 0);
205 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
206 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
207 0); /* Macroblock status buffer*/
209 ADVANCE_BCS_BATCH(batch);
213 gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
215 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
216 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
217 struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
219 BEGIN_BCS_BATCH(batch, 11);
221 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
222 OUT_BCS_BATCH(batch, 0);
223 OUT_BCS_BATCH(batch, 0);
224 /* MFX Indirect MV Object Base Address */
225 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
226 OUT_BCS_BATCH(batch, 0);
227 OUT_BCS_BATCH(batch, 0);
228 OUT_BCS_BATCH(batch, 0);
229 OUT_BCS_BATCH(batch, 0);
230 OUT_BCS_BATCH(batch, 0);
231 /*MFC Indirect PAK-BSE Object Base Address for Encoder*/
233 mfc_context->mfc_indirect_pak_bse_object.bo,
234 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
237 mfc_context->mfc_indirect_pak_bse_object.bo,
238 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239 mfc_context->mfc_indirect_pak_bse_object.end_offset);
241 ADVANCE_BCS_BATCH(batch);
245 gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
247 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
248 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
249 struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
251 BEGIN_BCS_BATCH(batch, 11);
253 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
254 OUT_BCS_BATCH(batch, 0);
255 OUT_BCS_BATCH(batch, 0);
256 /* MFX Indirect MV Object Base Address */
257 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
258 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
259 OUT_BCS_BATCH(batch, 0);
260 OUT_BCS_BATCH(batch, 0);
261 OUT_BCS_BATCH(batch, 0);
262 OUT_BCS_BATCH(batch, 0);
263 /*MFC Indirect PAK-BSE Object Base Address for Encoder*/
265 mfc_context->mfc_indirect_pak_bse_object.bo,
266 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
269 mfc_context->mfc_indirect_pak_bse_object.bo,
270 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
271 mfc_context->mfc_indirect_pak_bse_object.end_offset);
273 ADVANCE_BCS_BATCH(batch);
277 gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
279 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
280 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
282 BEGIN_BCS_BATCH(batch, 4);
284 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
285 OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
286 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
288 OUT_BCS_BATCH(batch, 0);
289 OUT_BCS_BATCH(batch, 0);
291 ADVANCE_BCS_BATCH(batch);
295 gen6_mfc_avc_img_state(VADriverContextP ctx,struct encode_state *encode_state,
296 struct gen6_encoder_context *gen6_encoder_context)
298 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
299 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
300 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
301 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
302 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
303 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
305 BEGIN_BCS_BATCH(batch, 13);
306 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
308 ((width_in_mbs * height_in_mbs) & 0xFFFF));
310 (height_in_mbs << 16) |
311 (width_in_mbs << 0));
313 (0 << 24) | /*Second Chroma QP Offset*/
314 (0 << 16) | /*Chroma QP Offset*/
315 (0 << 14) | /*Max-bit conformance Intra flag*/
316 (0 << 13) | /*Max Macroblock size conformance Inter flag*/
317 (1 << 12) | /*Should always be written as "1" */
318 (0 << 10) | /*QM Preset FLag */
319 (0 << 8) | /*Image Structure*/
320 (0 << 0) ); /*Current Decoed Image Frame Store ID, reserved in Encode mode*/
322 (400 << 16) | /*Mininum Frame size*/
323 (0 << 15) | /*Disable reading of Macroblock Status Buffer*/
324 (0 << 14) | /*Load BitStream Pointer only once, 1 slic 1 frame*/
325 (0 << 13) | /*CABAC 0 word insertion test enable*/
326 (1 << 12) | /*MVUnpackedEnable,compliant to DXVA*/
327 (1 << 10) | /*Chroma Format IDC, 4:2:0*/
328 (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/
329 (0 << 6) | /*Only valid for VLD decoding mode*/
330 (0 << 5) | /*Constrained Intra Predition Flag, from PPS*/
331 (pSequenceParameter->direct_8x8_inference_flag << 4) | /*Direct 8x8 inference flag*/
332 (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/
333 (1 << 2) | /*Frame MB only flag*/
334 (0 << 1) | /*MBAFF mode is in active*/
335 (0 << 0) ); /*Field picture flag*/
337 (1<<16) | /*Frame Size Rate Control Flag*/
339 (1<<9) | /*MB level Rate Control Enabling Flag*/
340 (1 << 3) | /*FrameBitRateMinReportMask*/
341 (1 << 2) | /*FrameBitRateMaxReportMask*/
342 (1 << 1) | /*InterMBMaxSizeReportMask*/
343 (1 << 0) ); /*IntraMBMaxSizeReportMask*/
344 OUT_BCS_BATCH(batch, /*Inter and Intra Conformance Max size limit*/
345 (0x0600 << 16) | /*InterMbMaxSz 192 Byte*/
346 (0x0800) ); /*IntraMbMaxSz 256 Byte*/
347 OUT_BCS_BATCH(batch, 0x00000000); /*Reserved : MBZReserved*/
348 OUT_BCS_BATCH(batch, 0x01020304); /*Slice QP Delta for bitrate control*/
349 OUT_BCS_BATCH(batch, 0xFEFDFCFB);
350 OUT_BCS_BATCH(batch, 0x80601004); /*MAX = 128KB, MIN = 64KB*/
351 OUT_BCS_BATCH(batch, 0x00800001);
352 OUT_BCS_BATCH(batch, 0);
354 ADVANCE_BCS_BATCH(batch);
358 gen7_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
359 struct gen6_encoder_context *gen6_encoder_context)
361 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
362 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
363 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
365 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
366 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
368 BEGIN_BCS_BATCH(batch, 16);
369 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
371 ((width_in_mbs * height_in_mbs) & 0xFFFF));
373 ((height_in_mbs - 1) << 16) |
374 ((width_in_mbs - 1) << 0));
376 (0 << 24) | /* Second Chroma QP Offset */
377 (0 << 16) | /* Chroma QP Offset */
378 (0 << 14) | /* Max-bit conformance Intra flag */
379 (0 << 13) | /* Max Macroblock size conformance Inter flag */
380 (0 << 12) | /* FIXME: Weighted_Pred_Flag */
381 (0 << 10) | /* FIXME: Weighted_BiPred_Idc */
382 (0 << 8) | /* FIXME: Image Structure */
383 (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */
385 (0 << 16) | /* Mininum Frame size */
386 (0 << 15) | /* Disable reading of Macroblock Status Buffer */
387 (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */
388 (0 << 13) | /* CABAC 0 word insertion test enable */
389 (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */
390 (1 << 10) | /* Chroma Format IDC, 4:2:0 */
391 (0 << 9) | /* FIXME: MbMvFormatFlag */
392 (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/
393 (0 << 6) | /* Only valid for VLD decoding mode */
394 (0 << 5) | /* Constrained Intra Predition Flag, from PPS */
395 (0 << 4) | /* Direct 8x8 inference flag */
396 (0 << 3) | /* Only 8x8 IDCT Transform Mode Flag */
397 (1 << 2) | /* Frame MB only flag */
398 (0 << 1) | /* MBAFF mode is in active */
399 (0 << 0)); /* Field picture flag */
400 OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
401 OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
402 (0xBB8 << 16) | /* InterMbMaxSz */
403 (0xEE8) ); /* IntraMbMaxSz */
404 OUT_BCS_BATCH(batch, 0); /* Reserved */
405 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
406 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
407 OUT_BCS_BATCH(batch, 0x8C000000);
408 OUT_BCS_BATCH(batch, 0x00010000);
409 OUT_BCS_BATCH(batch, 0);
410 OUT_BCS_BATCH(batch, 0);
411 OUT_BCS_BATCH(batch, 0);
412 OUT_BCS_BATCH(batch, 0);
414 ADVANCE_BCS_BATCH(batch);
417 static void gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
419 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
420 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
424 BEGIN_BCS_BATCH(batch, 69);
426 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
428 /* Reference frames and Current frames */
429 for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
430 if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
431 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
432 I915_GEM_DOMAIN_INSTRUCTION, 0,
435 OUT_BCS_BATCH(batch, 0);
440 for(i = 0; i < 32; i++) {
441 OUT_BCS_BATCH(batch, i/2);
443 OUT_BCS_BATCH(batch, 0);
444 OUT_BCS_BATCH(batch, 0);
446 ADVANCE_BCS_BATCH(batch);
449 static void gen6_mfc_avc_slice_state(VADriverContextP ctx,
451 struct encode_state *encode_state,
452 struct gen6_encoder_context *gen6_encoder_context,
453 int rate_control_enable,
456 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
457 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
458 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; /* TODO: multi slices support */
459 int bit_rate_control_target;
460 if ( slice_type == SLICE_TYPE_I )
461 bit_rate_control_target = 0;
463 bit_rate_control_target = 1;
464 int maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
465 int maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
466 unsigned char correct[6];
469 for (i = 0; i < 6; i++)
470 correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
471 unsigned char grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit +
472 (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
473 unsigned char shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit +
474 (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
476 BEGIN_BCS_BATCH(batch, 11);;
478 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
480 OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/
482 if ( slice_type == SLICE_TYPE_I ) {
483 OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/
485 OUT_BCS_BATCH(batch, 0x00010000); /*1 reference frame*/
489 (pSliceParameter->direct_spatial_mv_pred_flag<<29) | /*Direct Prediction Type*/
490 (0<<24) | /*Enable deblocking operation*/
491 (qp<<16) | /*Slice Quantization Parameter*/
493 OUT_BCS_BATCH(batch, 0); /*First MB X&Y , the postion of current slice*/
494 OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) );
497 (rate_control_enable<<31) | /*in CBR mode RateControlCounterEnable = enable*/
498 (1<<30) | /*ResetRateControlCounter*/
499 (0<<28) | /*RC Triggle Mode = Always Rate Control*/
500 (4<<24) | /*RC Stable Tolerance, middle level*/
501 (rate_control_enable<<23) | /*RC Panic Enable*/
502 (0<<22) | /*QP mode, don't modfiy CBP*/
503 (0<<21) | /*MB Type Direct Conversion Enabled*/
504 (0<<20) | /*MB Type Skip Conversion Enabled*/
505 (1<<19) | /*IsLastSlice*/
506 (0<<18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
507 (1<<17) | /*HeaderPresentFlag*/
508 (1<<16) | /*SliceData PresentFlag*/
509 (1<<15) | /*TailPresentFlag*/
510 (1<<13) | /*RBSP NAL TYPE*/
511 (0<<12) ); /*CabacZeroWordInsertionEnable*/
513 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
515 OUT_BCS_BATCH(batch, (maxQpN<<24) | /*Target QP - 24 is lowest QP*/
516 (maxQpP<<16) | /*Target QP + 20 is highest QP*/
519 OUT_BCS_BATCH(batch, (correct[5] << 20) |
525 OUT_BCS_BATCH(batch, 0);
527 ADVANCE_BCS_BATCH(batch);
529 static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
531 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
534 BEGIN_BCS_BATCH(batch, 58);
536 OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56);
537 OUT_BCS_BATCH(batch, 0xFF ) ;
538 for( i = 0; i < 56; i++) {
539 OUT_BCS_BATCH(batch, 0x10101010);
542 ADVANCE_BCS_BATCH(batch);
545 static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
547 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
550 BEGIN_BCS_BATCH(batch, 113);
551 OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2));
553 for(i = 0; i < 112;i++) {
554 OUT_BCS_BATCH(batch, 0x10001000);
557 ADVANCE_BCS_BATCH(batch);
561 gen7_mfc_qm_state(VADriverContextP ctx,
565 struct gen6_encoder_context *gen6_encoder_context)
567 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
568 unsigned int qm_buffer[16];
570 assert(qm_length <= 16);
571 assert(sizeof(*qm) == 4);
572 memcpy(qm_buffer, qm, qm_length * 4);
574 BEGIN_BCS_BATCH(batch, 18);
575 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
576 OUT_BCS_BATCH(batch, qm_type << 0);
577 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
578 ADVANCE_BCS_BATCH(batch);
581 static void gen7_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
583 unsigned int qm[16] = {
584 0x10101010, 0x10101010, 0x10101010, 0x10101010,
585 0x10101010, 0x10101010, 0x10101010, 0x10101010,
586 0x10101010, 0x10101010, 0x10101010, 0x10101010,
587 0x10101010, 0x10101010, 0x10101010, 0x10101010
590 gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context);
591 gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context);
592 gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context);
593 gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context);
597 gen7_mfc_fqm_state(VADriverContextP ctx,
601 struct gen6_encoder_context *gen6_encoder_context)
603 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
604 unsigned int fqm_buffer[32];
606 assert(fqm_length <= 32);
607 assert(sizeof(*fqm) == 4);
608 memcpy(fqm_buffer, fqm, fqm_length * 4);
610 BEGIN_BCS_BATCH(batch, 34);
611 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
612 OUT_BCS_BATCH(batch, fqm_type << 0);
613 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
614 ADVANCE_BCS_BATCH(batch);
617 static void gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
619 unsigned int qm[32] = {
620 0x10001000, 0x10001000, 0x10001000, 0x10001000,
621 0x10001000, 0x10001000, 0x10001000, 0x10001000,
622 0x10001000, 0x10001000, 0x10001000, 0x10001000,
623 0x10001000, 0x10001000, 0x10001000, 0x10001000,
624 0x10001000, 0x10001000, 0x10001000, 0x10001000,
625 0x10001000, 0x10001000, 0x10001000, 0x10001000,
626 0x10001000, 0x10001000, 0x10001000, 0x10001000,
627 0x10001000, 0x10001000, 0x10001000, 0x10001000
630 gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context);
631 gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context);
632 gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context);
633 gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context);
636 static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
638 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
641 BEGIN_BCS_BATCH(batch, 10);
642 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
643 OUT_BCS_BATCH(batch, 0); //Select L0
644 OUT_BCS_BATCH(batch, 0x80808020); //Only 1 reference
645 for(i = 0; i < 7; i++) {
646 OUT_BCS_BATCH(batch, 0x80808080);
648 ADVANCE_BCS_BATCH(batch);
650 BEGIN_BCS_BATCH(batch, 10);
651 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
652 OUT_BCS_BATCH(batch, 1); //Select L1
653 OUT_BCS_BATCH(batch, 0x80808022); //Only 1 reference
654 for(i = 0; i < 7; i++) {
655 OUT_BCS_BATCH(batch, 0x80808080);
657 ADVANCE_BCS_BATCH(batch);
661 gen6_mfc_avc_insert_object(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context,
662 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
663 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag)
665 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
667 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
669 OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
671 (0 << 16) | /* always start at offset 0 */
672 (data_bits_in_last_dw << 8) |
673 (skip_emul_byte_count << 4) |
674 (!!emulation_flag << 3) |
675 ((!!is_last_header) << 2) |
676 ((!!is_end_of_slice) << 1) |
677 (0 << 0)); /* FIXME: ??? */
679 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
680 ADVANCE_BCS_BATCH(batch);
684 gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
685 struct gen6_encoder_context *gen6_encoder_context,
686 unsigned char target_mb_size, unsigned char max_mb_size)
688 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
689 int len_in_dwords = 11;
691 BEGIN_BCS_BATCH(batch, len_in_dwords);
693 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
694 OUT_BCS_BATCH(batch, 0);
695 OUT_BCS_BATCH(batch, 0);
697 (0 << 24) | /* PackedMvNum, Debug*/
698 (0 << 20) | /* No motion vector */
699 (1 << 19) | /* CbpDcY */
700 (1 << 18) | /* CbpDcU */
701 (1 << 17) | /* CbpDcV */
704 OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
705 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
706 OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
708 /*Stuff for Intra MB*/
709 OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/
710 OUT_BCS_BATCH(batch, msg[2]);
711 OUT_BCS_BATCH(batch, msg[3]&0xFC);
713 /*MaxSizeInWord and TargetSzieInWord*/
714 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
715 (target_mb_size << 16) );
717 ADVANCE_BCS_BATCH(batch);
719 return len_in_dwords;
722 static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, unsigned int offset,
723 struct gen6_encoder_context *gen6_encoder_context,
724 unsigned char target_mb_size,unsigned char max_mb_size, int slice_type)
726 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
727 int len_in_dwords = 11;
729 BEGIN_BCS_BATCH(batch, len_in_dwords);
731 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
733 OUT_BCS_BATCH(batch, 32); /* 32 MV*/
734 OUT_BCS_BATCH(batch, offset);
737 (1 << 24) | /* PackedMvNum, Debug*/
738 (4 << 20) | /* 8 MV, SNB don't use it*/
739 (1 << 19) | /* CbpDcY */
740 (1 << 18) | /* CbpDcU */
741 (1 << 17) | /* CbpDcV */
742 (0 << 15) | /* Transform8x8Flag = 0*/
743 (0 << 14) | /* Frame based*/
744 (0 << 13) | /* Inter MB */
745 (1 << 8) | /* MbType = P_L0_16x16 */
746 (0 << 7) | /* MBZ for frame */
748 (2 << 4) | /* MBZ for inter*/
750 (0 << 2) | /* SkipMbFlag */
751 (0 << 0)); /* InterMbMode */
753 OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
754 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
756 if ( slice_type == SLICE_TYPE_B) {
757 OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp); /* Last MB */
759 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
762 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
766 /*Stuff for Inter MB*/
767 OUT_BCS_BATCH(batch, 0x0);
768 OUT_BCS_BATCH(batch, 0x0);
769 OUT_BCS_BATCH(batch, 0x0);
771 /*MaxSizeInWord and TargetSzieInWord*/
772 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
773 (target_mb_size << 16) );
775 ADVANCE_BCS_BATCH(batch);
777 return len_in_dwords;
780 static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
782 struct i965_driver_data *i965 = i965_driver_data(ctx);
783 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
787 /*Encode common setup for MFC*/
788 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
789 mfc_context->post_deblocking_output.bo = NULL;
791 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
792 mfc_context->pre_deblocking_output.bo = NULL;
794 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
795 mfc_context->uncompressed_picture_source.bo = NULL;
797 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
798 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
800 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
801 if ( mfc_context->direct_mv_buffers[i].bo != NULL);
802 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
803 mfc_context->direct_mv_buffers[i].bo = NULL;
806 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
807 if (mfc_context->reference_surfaces[i].bo != NULL)
808 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
809 mfc_context->reference_surfaces[i].bo = NULL;
812 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
813 bo = dri_bo_alloc(i965->intel.bufmgr,
818 mfc_context->intra_row_store_scratch_buffer.bo = bo;
820 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
821 bo = dri_bo_alloc(i965->intel.bufmgr,
826 mfc_context->macroblock_status_buffer.bo = bo;
828 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
829 bo = dri_bo_alloc(i965->intel.bufmgr,
831 49152, /* 6 * 128 * 64 */
834 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
836 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
837 bo = dri_bo_alloc(i965->intel.bufmgr,
839 12288, /* 1.5 * 128 * 64 */
842 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
845 void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
846 struct encode_state *encode_state,
847 struct gen6_encoder_context *gen6_encoder_context)
849 struct i965_driver_data *i965 = i965_driver_data(ctx);
850 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
851 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
852 struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
853 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
854 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
855 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; /* FIXME: multi slices */
856 VAEncH264DecRefPicMarkingBuffer *pDecRefPicMarking = NULL;
857 unsigned int *msg = NULL, offset = 0;
858 int emit_new_state = 1, object_len_in_bytes;
859 int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
860 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
861 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
863 int rate_control_mode = pSequenceParameter->rate_control_method;
864 unsigned char target_mb_size = mfc_context->bit_rate_control_context[1-is_intra].TargetSizeInWord;
865 unsigned char max_mb_size = mfc_context->bit_rate_control_context[1-is_intra].MaxSizeInWord;
866 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
867 unsigned char *slice_header = NULL;
868 int slice_header_length_in_bits = 0;
869 unsigned int tail_data[] = { 0x0 };
871 if (encode_state->dec_ref_pic_marking)
872 pDecRefPicMarking = (VAEncH264DecRefPicMarkingBuffer *)encode_state->dec_ref_pic_marking->buffer;
874 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, pDecRefPicMarking, &slice_header);
876 if ( rate_control_mode == 0) {
877 qp = mfc_context->bit_rate_control_context[1-is_intra].QpPrimeY;
880 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
883 dri_bo_map(vme_context->vme_output.bo , 1);
884 msg = (unsigned int *)vme_context->vme_output.bo->virtual;
887 for (y = 0; y < height_in_mbs; y++) {
888 for (x = 0; x < width_in_mbs; x++) {
889 int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
891 if (emit_new_state) {
892 intel_batchbuffer_emit_mi_flush(batch);
894 if (IS_GEN7(i965->intel.device_id)) {
895 gen7_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context);
896 gen7_mfc_surface_state(ctx, gen6_encoder_context);
897 gen7_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
899 gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context);
900 gen6_mfc_surface_state(ctx, gen6_encoder_context);
901 gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
904 gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context);
905 gen6_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context);
907 if (IS_GEN7(i965->intel.device_id)) {
908 gen7_mfc_avc_img_state(ctx, encode_state, gen6_encoder_context);
909 gen7_mfc_avc_qm_state(ctx, gen6_encoder_context);
910 gen7_mfc_avc_fqm_state(ctx, gen6_encoder_context);
912 gen6_mfc_avc_img_state(ctx, encode_state,gen6_encoder_context);
913 gen6_mfc_avc_qm_state(ctx, gen6_encoder_context);
914 gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context);
917 gen6_mfc_avc_directmode_state(ctx, gen6_encoder_context);
918 gen6_mfc_avc_ref_idx_state(ctx, gen6_encoder_context);
919 gen6_mfc_avc_slice_state(ctx, pSliceParameter->slice_type,
920 encode_state, gen6_encoder_context,
921 rate_control_mode == 0, pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta);
923 if (encode_state->packed_header_data[VAEncPackedHeaderSPS]) {
924 VAEncPackedHeaderParameterBuffer *param = NULL;
925 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[VAEncPackedHeaderSPS]->buffer;
926 unsigned int length_in_bits;
928 assert(encode_state->packed_header_param[VAEncPackedHeaderSPS]);
929 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[VAEncPackedHeaderSPS]->buffer;
930 length_in_bits = param->length_in_bits[0];
932 gen6_mfc_avc_insert_object(ctx,
933 gen6_encoder_context,
935 ALIGN(length_in_bits, 32) >> 5,
936 length_in_bits & 0x1f,
937 param->skip_emulation_check_count,
940 param->insert_emulation_bytes);
943 if (encode_state->packed_header_data[VAEncPackedHeaderPPS]) {
944 VAEncPackedHeaderParameterBuffer *param = NULL;
945 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[VAEncPackedHeaderPPS]->buffer;
946 unsigned int length_in_bits;
948 assert(encode_state->packed_header_param[VAEncPackedHeaderPPS]);
949 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[VAEncPackedHeaderPPS]->buffer;
950 length_in_bits = param->length_in_bits[0];
952 gen6_mfc_avc_insert_object(ctx,
953 gen6_encoder_context,
955 ALIGN(length_in_bits, 32) >> 5,
956 length_in_bits & 0x1f,
957 param->skip_emulation_check_count,
960 param->insert_emulation_bytes);
963 gen6_mfc_avc_insert_object(ctx, gen6_encoder_context,
964 (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
965 5, /* first 5 bytes are start code + nal unit type */
972 object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context,target_mb_size, max_mb_size);
975 object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, gen6_encoder_context, target_mb_size, max_mb_size, pSliceParameter->slice_type);
979 if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
981 intel_batchbuffer_end_atomic(batch);
982 intel_batchbuffer_flush(batch);
984 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
989 gen6_mfc_avc_insert_object(ctx, gen6_encoder_context,
990 tail_data, sizeof(tail_data) >> 2, 32,
991 sizeof(tail_data), 1, 1, 1);
994 dri_bo_unmap(vme_context->vme_output.bo);
998 intel_batchbuffer_end_atomic(batch);
1002 gen6_mfc_free_avc_surface(void **data)
1004 struct gen6_mfc_avc_surface_aux *avc_surface = *data;
1009 dri_bo_unreference(avc_surface->dmv_top);
1010 avc_surface->dmv_top = NULL;
1011 dri_bo_unreference(avc_surface->dmv_bottom);
1012 avc_surface->dmv_bottom = NULL;
1018 static void gen6_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
1019 struct gen6_mfc_context *mfc_context)
1021 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1023 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1024 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1025 float fps = pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
1026 int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
1027 int intra_mb_size = inter_mb_size * 5.0;
1030 mfc_context->bit_rate_control_context[0].target_mb_size = intra_mb_size;
1031 mfc_context->bit_rate_control_context[0].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
1032 mfc_context->bit_rate_control_context[1].target_mb_size = inter_mb_size;
1033 mfc_context->bit_rate_control_context[1].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
1035 for(i = 0 ; i < 2; i++) {
1036 mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
1037 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
1038 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
1039 mfc_context->bit_rate_control_context[i].GrowInit = 6;
1040 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
1041 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
1042 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
1044 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
1045 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
1046 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
1047 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
1048 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
1049 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
1052 mfc_context->bit_rate_control_context[0].TargetSizeInWord = (intra_mb_size + 16)/ 16;
1053 mfc_context->bit_rate_control_context[1].TargetSizeInWord = (inter_mb_size + 16)/ 16;
1055 mfc_context->bit_rate_control_context[0].MaxSizeInWord = mfc_context->bit_rate_control_context[0].TargetSizeInWord * 1.5;
1056 mfc_context->bit_rate_control_context[1].MaxSizeInWord = mfc_context->bit_rate_control_context[1].TargetSizeInWord * 1.5;
1059 static int gen6_mfc_bit_rate_control_context_update(struct encode_state *encode_state,
1060 struct gen6_mfc_context *mfc_context,
1061 int current_frame_size)
1063 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1064 int control_index = 1 - (pSliceParameter->slice_type == SLICE_TYPE_I);
1065 int oldQp = mfc_context->bit_rate_control_context[control_index].QpPrimeY;
1068 printf("conrol_index = %d, start_qp = %d, result = %d, target = %d\n", control_index,
1069 mfc_context->bit_rate_control_context[control_index].QpPrimeY, current_frame_size,
1070 mfc_context->bit_rate_control_context[control_index].target_frame_size );
1073 if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 4.0 ) {
1074 mfc_context->bit_rate_control_context[control_index].QpPrimeY += 4;
1075 } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 2.0 ) {
1076 mfc_context->bit_rate_control_context[control_index].QpPrimeY += 3;
1077 } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 1.50 ) {
1078 mfc_context->bit_rate_control_context[control_index].QpPrimeY += 2;
1079 } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 1.20 ) {
1080 mfc_context->bit_rate_control_context[control_index].QpPrimeY ++;
1081 } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.30 ) {
1082 mfc_context->bit_rate_control_context[control_index].QpPrimeY -= 3;
1083 } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.50 ) {
1084 mfc_context->bit_rate_control_context[control_index].QpPrimeY -= 2;
1085 } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.80 ) {
1086 mfc_context->bit_rate_control_context[control_index].QpPrimeY --;
1089 if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY > 51)
1090 mfc_context->bit_rate_control_context[control_index].QpPrimeY = 51;
1091 if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY < 1)
1092 mfc_context->bit_rate_control_context[control_index].QpPrimeY = 1;
1094 if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY != oldQp)
1100 static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx,
1101 struct encode_state *encode_state,
1102 struct gen6_encoder_context *gen6_encoder_context)
1104 struct i965_driver_data *i965 = i965_driver_data(ctx);
1105 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1106 struct object_surface *obj_surface;
1107 struct object_buffer *obj_buffer;
1108 struct gen6_mfc_avc_surface_aux* gen6_avc_surface;
1110 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1111 VAStatus vaStatus = VA_STATUS_SUCCESS;
1114 /*Setup all the input&output object*/
1116 /* Setup current frame and current direct mv buffer*/
1117 obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
1118 assert(obj_surface);
1119 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
1120 if ( obj_surface->private_data == NULL) {
1121 gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
1122 gen6_avc_surface->dmv_top =
1123 dri_bo_alloc(i965->intel.bufmgr,
1127 gen6_avc_surface->dmv_bottom =
1128 dri_bo_alloc(i965->intel.bufmgr,
1132 assert(gen6_avc_surface->dmv_top);
1133 assert(gen6_avc_surface->dmv_bottom);
1134 obj_surface->private_data = (void *)gen6_avc_surface;
1135 obj_surface->free_private_data = (void *)gen6_mfc_free_avc_surface;
1137 gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
1138 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
1139 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
1140 dri_bo_reference(gen6_avc_surface->dmv_top);
1141 dri_bo_reference(gen6_avc_surface->dmv_bottom);
1143 mfc_context->post_deblocking_output.bo = obj_surface->bo;
1144 dri_bo_reference(mfc_context->post_deblocking_output.bo);
1146 mfc_context->surface_state.width = obj_surface->orig_width;
1147 mfc_context->surface_state.height = obj_surface->orig_height;
1148 mfc_context->surface_state.w_pitch = obj_surface->width;
1149 mfc_context->surface_state.h_pitch = obj_surface->height;
1151 /* Setup reference frames and direct mv buffers*/
1152 for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
1153 if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) {
1154 obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
1155 assert(obj_surface);
1156 if (obj_surface->bo != NULL) {
1157 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
1158 dri_bo_reference(obj_surface->bo);
1160 /* Check DMV buffer */
1161 if ( obj_surface->private_data == NULL) {
1163 gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
1164 gen6_avc_surface->dmv_top =
1165 dri_bo_alloc(i965->intel.bufmgr,
1169 gen6_avc_surface->dmv_bottom =
1170 dri_bo_alloc(i965->intel.bufmgr,
1174 assert(gen6_avc_surface->dmv_top);
1175 assert(gen6_avc_surface->dmv_bottom);
1176 obj_surface->private_data = gen6_avc_surface;
1177 obj_surface->free_private_data = gen6_mfc_free_avc_surface;
1180 gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
1181 /* Setup DMV buffer */
1182 mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
1183 mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
1184 dri_bo_reference(gen6_avc_surface->dmv_top);
1185 dri_bo_reference(gen6_avc_surface->dmv_bottom);
1191 obj_surface = SURFACE(encode_state->current_render_target);
1192 assert(obj_surface && obj_surface->bo);
1193 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
1194 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
1196 obj_buffer = BUFFER (pPicParameter->CodedBuf); /* FIXME: fix this later */
1197 bo = obj_buffer->buffer_store->bo;
1199 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
1200 mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
1201 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN (obj_buffer->size_element - 0x1000, 0x1000);
1202 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
1204 /*Programing bit rate control */
1205 if ( mfc_context->bit_rate_control_context[0].MaxSizeInWord == 0 )
1206 gen6_mfc_bit_rate_control_context_init(encode_state, mfc_context);
1208 /*Programing bcs pipeline*/
1209 gen6_mfc_avc_pipeline_programing(ctx, encode_state, gen6_encoder_context); //filling the pipeline
1214 static VAStatus gen6_mfc_run(VADriverContextP ctx,
1215 struct encode_state *encode_state,
1216 struct gen6_encoder_context *gen6_encoder_context)
1218 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
1220 intel_batchbuffer_flush(batch); //run the pipeline
1222 return VA_STATUS_SUCCESS;
1225 static VAStatus gen6_mfc_stop(VADriverContextP ctx,
1226 struct encode_state *encode_state,
1227 struct gen6_encoder_context *gen6_encoder_context,
1228 int *encoded_bits_size)
1230 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1231 unsigned int *status_mem;
1232 unsigned int buffer_size_bits = 0;
1233 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1234 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1237 dri_bo_map(mfc_context->macroblock_status_buffer.bo, 1);
1238 status_mem = (unsigned int *)mfc_context->macroblock_status_buffer.bo->virtual;
1239 //Detecting encoder buffer size and bit rate control result
1240 for(i = 0; i < width_in_mbs * height_in_mbs; i++) {
1241 unsigned short current_mb = status_mem[1] >> 16;
1242 buffer_size_bits += current_mb;
1245 dri_bo_unmap(mfc_context->macroblock_status_buffer.bo);
1247 *encoded_bits_size = buffer_size_bits;
1249 return VA_STATUS_SUCCESS;
1253 gen6_mfc_avc_encode_picture(VADriverContextP ctx,
1254 struct encode_state *encode_state,
1255 struct gen6_encoder_context *gen6_encoder_context)
1257 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1258 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1259 int rate_control_mode = pSequenceParameter->rate_control_method;
1260 int MAX_CBR_INTERATE = 4;
1261 int current_frame_bits_size;
1264 for(i = 0; i < MAX_CBR_INTERATE; i++) {
1265 gen6_mfc_init(ctx, gen6_encoder_context);
1266 gen6_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
1267 gen6_mfc_run(ctx, encode_state, gen6_encoder_context);
1268 gen6_mfc_stop(ctx, encode_state, gen6_encoder_context, ¤t_frame_bits_size);
1269 if ( rate_control_mode == 0) {
1270 if ( gen6_mfc_bit_rate_control_context_update( encode_state, mfc_context, current_frame_bits_size) )
1277 return VA_STATUS_SUCCESS;
1281 gen6_mfc_pipeline(VADriverContextP ctx,
1283 struct encode_state *encode_state,
1284 struct gen6_encoder_context *gen6_encoder_context)
1289 case VAProfileH264Baseline:
1290 vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, gen6_encoder_context);
1293 /* FIXME: add for other profile */
1295 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1302 Bool gen6_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context)
1307 Bool gen6_mfc_context_destroy(struct gen6_mfc_context *mfc_context)
1311 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1312 mfc_context->post_deblocking_output.bo = NULL;
1314 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1315 mfc_context->pre_deblocking_output.bo = NULL;
1317 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1318 mfc_context->uncompressed_picture_source.bo = NULL;
1320 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
1321 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1323 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1324 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1325 mfc_context->direct_mv_buffers[i].bo = NULL;
1328 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1329 mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1331 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1332 mfc_context->macroblock_status_buffer.bo = NULL;
1334 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1335 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1337 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1338 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1341 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
1342 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
1343 mfc_context->reference_surfaces[i].bo = NULL;