2 * Copyright © 2010-2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Zhou Chang <chang.zhou@intel.com>
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
43 gen6_mfc_pipe_mode_select(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
45 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
47 BEGIN_BCS_BATCH(batch, 4);
49 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
51 (1 << 10) | /* disable Stream-Out , advanced QP/bitrate control need enable it*/
52 (1 << 9) | /* Post Deblocking Output */
53 (0 << 8) | /* Pre Deblocking Output */
54 (0 << 7) | /* disable TLB prefectch */
55 (0 << 5) | /* not in stitch mode */
56 (1 << 4) | /* encoding mode */
57 (2 << 0)); /* Standard Select: AVC */
59 (0 << 20) | /* round flag in PB slice */
60 (0 << 19) | /* round flag in Intra8x8 */
61 (0 << 7) | /* expand NOA bus flag */
62 (1 << 6) | /* must be 1 */
63 (0 << 5) | /* disable clock gating for NOA */
64 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
65 (0 << 3) | /* terminate if AVC mbdata error occurs */
66 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
67 (0 << 1) | /* AVC long field motion vector */
68 (0 << 0)); /* always calculate AVC ILDB boundary strength */
69 OUT_BCS_BATCH(batch, 0);
71 ADVANCE_BCS_BATCH(batch);
75 gen7_mfc_pipe_mode_select(VADriverContextP ctx,
77 struct gen6_encoder_context *gen6_encoder_context)
79 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
81 assert(standard_select == MFX_FORMAT_MPEG2 ||
82 standard_select == MFX_FORMAT_AVC);
84 BEGIN_BCS_BATCH(batch, 5);
85 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
87 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
88 (MFD_MODE_VLD << 15) | /* VLD mode */
89 (0 << 10) | /* disable Stream-Out */
90 (1 << 9) | /* Post Deblocking Output */
91 (0 << 8) | /* Pre Deblocking Output */
92 (0 << 5) | /* not in stitch mode */
93 (1 << 4) | /* encoding mode */
94 (standard_select << 0)); /* standard select: avc or mpeg2 */
96 (0 << 7) | /* expand NOA bus flag */
97 (0 << 6) | /* disable slice-level clock gating */
98 (0 << 5) | /* disable clock gating for NOA */
99 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
100 (0 << 3) | /* terminate if AVC mbdata error occurs */
101 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
104 OUT_BCS_BATCH(batch, 0);
105 OUT_BCS_BATCH(batch, 0);
107 ADVANCE_BCS_BATCH(batch);
111 gen6_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
113 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
114 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
116 BEGIN_BCS_BATCH(batch, 6);
118 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
119 OUT_BCS_BATCH(batch, 0);
121 ((mfc_context->surface_state.height - 1) << 19) |
122 ((mfc_context->surface_state.width - 1) << 6));
124 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
125 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
126 (0 << 22) | /* surface object control state, FIXME??? */
127 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
128 (0 << 2) | /* must be 0 for interleave U/V */
129 (1 << 1) | /* must be y-tiled */
130 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
132 (0 << 16) | /* must be 0 for interleave U/V */
133 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
134 OUT_BCS_BATCH(batch, 0);
135 ADVANCE_BCS_BATCH(batch);
139 gen7_mfc_surface_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
141 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
142 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
144 BEGIN_BCS_BATCH(batch, 6);
146 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
147 OUT_BCS_BATCH(batch, 0);
149 ((mfc_context->surface_state.height - 1) << 18) |
150 ((mfc_context->surface_state.width - 1) << 4));
152 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
153 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
154 (0 << 22) | /* surface object control state, FIXME??? */
155 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
156 (0 << 2) | /* must be 0 for interleave U/V */
157 (1 << 1) | /* must be tiled */
158 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
160 (0 << 16) | /* must be 0 for interleave U/V */
161 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
162 OUT_BCS_BATCH(batch, 0);
163 ADVANCE_BCS_BATCH(batch);
167 gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
169 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
170 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
173 BEGIN_BCS_BATCH(batch, 24);
175 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
177 OUT_BCS_BATCH(batch, 0); /* pre output addr */
179 OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
180 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
181 0); /* post output addr */
183 OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
184 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
185 0); /* uncompressed data */
186 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
187 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
188 0); /* StreamOut data*/
189 OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
190 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192 OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
193 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
195 /* 7..22 Reference pictures*/
196 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
197 if ( mfc_context->reference_surfaces[i].bo != NULL) {
198 OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
199 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
202 OUT_BCS_BATCH(batch, 0);
205 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
206 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
207 0); /* Macroblock status buffer*/
209 ADVANCE_BCS_BATCH(batch);
213 gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
215 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
216 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
217 struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
219 BEGIN_BCS_BATCH(batch, 11);
221 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
222 OUT_BCS_BATCH(batch, 0);
223 OUT_BCS_BATCH(batch, 0);
224 /* MFX Indirect MV Object Base Address */
225 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
226 OUT_BCS_BATCH(batch, 0);
227 OUT_BCS_BATCH(batch, 0);
228 OUT_BCS_BATCH(batch, 0);
229 OUT_BCS_BATCH(batch, 0);
230 OUT_BCS_BATCH(batch, 0);
231 /*MFC Indirect PAK-BSE Object Base Address for Encoder*/
233 mfc_context->mfc_indirect_pak_bse_object.bo,
234 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
237 mfc_context->mfc_indirect_pak_bse_object.bo,
238 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239 mfc_context->mfc_indirect_pak_bse_object.end_offset);
241 ADVANCE_BCS_BATCH(batch);
245 gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
247 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
248 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
249 struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
251 BEGIN_BCS_BATCH(batch, 11);
253 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
254 OUT_BCS_BATCH(batch, 0);
255 OUT_BCS_BATCH(batch, 0);
256 /* MFX Indirect MV Object Base Address */
257 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
258 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
259 OUT_BCS_BATCH(batch, 0);
260 OUT_BCS_BATCH(batch, 0);
261 OUT_BCS_BATCH(batch, 0);
262 OUT_BCS_BATCH(batch, 0);
263 /*MFC Indirect PAK-BSE Object Base Address for Encoder*/
265 mfc_context->mfc_indirect_pak_bse_object.bo,
266 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
269 mfc_context->mfc_indirect_pak_bse_object.bo,
270 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
271 mfc_context->mfc_indirect_pak_bse_object.end_offset);
273 ADVANCE_BCS_BATCH(batch);
277 gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
279 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
280 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
282 BEGIN_BCS_BATCH(batch, 4);
284 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
285 OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
286 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
288 OUT_BCS_BATCH(batch, 0);
289 OUT_BCS_BATCH(batch, 0);
291 ADVANCE_BCS_BATCH(batch);
295 gen6_mfc_avc_img_state(VADriverContextP ctx,struct encode_state *encode_state,
296 struct gen6_encoder_context *gen6_encoder_context)
298 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
299 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
300 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
301 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
302 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
303 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
305 BEGIN_BCS_BATCH(batch, 13);
306 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
308 ((width_in_mbs * height_in_mbs) & 0xFFFF));
310 (height_in_mbs << 16) |
311 (width_in_mbs << 0));
313 (0 << 24) | /*Second Chroma QP Offset*/
314 (0 << 16) | /*Chroma QP Offset*/
315 (0 << 14) | /*Max-bit conformance Intra flag*/
316 (0 << 13) | /*Max Macroblock size conformance Inter flag*/
317 (1 << 12) | /*Should always be written as "1" */
318 (0 << 10) | /*QM Preset FLag */
319 (0 << 8) | /*Image Structure*/
320 (0 << 0) ); /*Current Decoed Image Frame Store ID, reserved in Encode mode*/
322 (400 << 16) | /*Mininum Frame size*/
323 (0 << 15) | /*Disable reading of Macroblock Status Buffer*/
324 (0 << 14) | /*Load BitStream Pointer only once, 1 slic 1 frame*/
325 (0 << 13) | /*CABAC 0 word insertion test enable*/
326 (1 << 12) | /*MVUnpackedEnable,compliant to DXVA*/
327 (1 << 10) | /*Chroma Format IDC, 4:2:0*/
328 (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/
329 (0 << 6) | /*Only valid for VLD decoding mode*/
330 (0 << 5) | /*Constrained Intra Predition Flag, from PPS*/
331 (pSequenceParameter->direct_8x8_inference_flag << 4) | /*Direct 8x8 inference flag*/
332 (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/
333 (1 << 2) | /*Frame MB only flag*/
334 (0 << 1) | /*MBAFF mode is in active*/
335 (0 << 0) ); /*Field picture flag*/
337 (1<<16) | /*Frame Size Rate Control Flag*/
339 (1<<9) | /*MB level Rate Control Enabling Flag*/
340 (1 << 3) | /*FrameBitRateMinReportMask*/
341 (1 << 2) | /*FrameBitRateMaxReportMask*/
342 (1 << 1) | /*InterMBMaxSizeReportMask*/
343 (1 << 0) ); /*IntraMBMaxSizeReportMask*/
344 OUT_BCS_BATCH(batch, /*Inter and Intra Conformance Max size limit*/
345 (0x0600 << 16) | /*InterMbMaxSz 192 Byte*/
346 (0x0800) ); /*IntraMbMaxSz 256 Byte*/
347 OUT_BCS_BATCH(batch, 0x00000000); /*Reserved : MBZReserved*/
348 OUT_BCS_BATCH(batch, 0x01020304); /*Slice QP Delta for bitrate control*/
349 OUT_BCS_BATCH(batch, 0xFEFDFCFB);
350 OUT_BCS_BATCH(batch, 0x80601004); /*MAX = 128KB, MIN = 64KB*/
351 OUT_BCS_BATCH(batch, 0x00800001);
352 OUT_BCS_BATCH(batch, 0);
354 ADVANCE_BCS_BATCH(batch);
358 gen7_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
359 struct gen6_encoder_context *gen6_encoder_context)
361 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
362 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
363 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
365 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
366 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
368 BEGIN_BCS_BATCH(batch, 16);
369 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
371 ((width_in_mbs * height_in_mbs) & 0xFFFF));
373 ((height_in_mbs - 1) << 16) |
374 ((width_in_mbs - 1) << 0));
376 (0 << 24) | /* Second Chroma QP Offset */
377 (0 << 16) | /* Chroma QP Offset */
378 (0 << 14) | /* Max-bit conformance Intra flag */
379 (0 << 13) | /* Max Macroblock size conformance Inter flag */
380 (0 << 12) | /* FIXME: Weighted_Pred_Flag */
381 (0 << 10) | /* FIXME: Weighted_BiPred_Idc */
382 (0 << 8) | /* FIXME: Image Structure */
383 (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */
385 (0 << 16) | /* Mininum Frame size */
386 (0 << 15) | /* Disable reading of Macroblock Status Buffer */
387 (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */
388 (0 << 13) | /* CABAC 0 word insertion test enable */
389 (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */
390 (1 << 10) | /* Chroma Format IDC, 4:2:0 */
391 (0 << 9) | /* FIXME: MbMvFormatFlag */
392 (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/
393 (0 << 6) | /* Only valid for VLD decoding mode */
394 (0 << 5) | /* Constrained Intra Predition Flag, from PPS */
395 (0 << 4) | /* Direct 8x8 inference flag */
396 (0 << 3) | /* Only 8x8 IDCT Transform Mode Flag */
397 (1 << 2) | /* Frame MB only flag */
398 (0 << 1) | /* MBAFF mode is in active */
399 (0 << 0)); /* Field picture flag */
400 OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
401 OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
402 (0xBB8 << 16) | /* InterMbMaxSz */
403 (0xEE8) ); /* IntraMbMaxSz */
404 OUT_BCS_BATCH(batch, 0); /* Reserved */
405 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
406 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
407 OUT_BCS_BATCH(batch, 0x8C000000);
408 OUT_BCS_BATCH(batch, 0x00010000);
409 OUT_BCS_BATCH(batch, 0);
410 OUT_BCS_BATCH(batch, 0);
411 OUT_BCS_BATCH(batch, 0);
412 OUT_BCS_BATCH(batch, 0);
414 ADVANCE_BCS_BATCH(batch);
417 static void gen6_mfc_avc_directmode_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
419 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
420 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
424 BEGIN_BCS_BATCH(batch, 69);
426 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
428 /* Reference frames and Current frames */
429 for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
430 if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
431 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
432 I915_GEM_DOMAIN_INSTRUCTION, 0,
435 OUT_BCS_BATCH(batch, 0);
440 for(i = 0; i < 32; i++) {
441 OUT_BCS_BATCH(batch, i/2);
443 OUT_BCS_BATCH(batch, 0);
444 OUT_BCS_BATCH(batch, 0);
446 ADVANCE_BCS_BATCH(batch);
449 static void gen6_mfc_avc_slice_state(VADriverContextP ctx,
451 struct encode_state *encode_state,
452 struct gen6_encoder_context *gen6_encoder_context,
453 int rate_control_enable,
456 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
457 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
458 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; /* TODO: multi slices support */
459 int bit_rate_control_target;
460 if ( slice_type == SLICE_TYPE_I )
461 bit_rate_control_target = 0;
463 bit_rate_control_target = 1;
464 int maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
465 int maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
466 unsigned char correct[6];
469 for (i = 0; i < 6; i++)
470 correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
471 unsigned char grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit +
472 (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
473 unsigned char shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit +
474 (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
476 BEGIN_BCS_BATCH(batch, 11);;
478 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
480 OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/
482 if ( slice_type == SLICE_TYPE_I ) {
483 OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/
485 OUT_BCS_BATCH(batch, 0x00010000); /*1 reference frame*/
489 (pSliceParameter->direct_spatial_mv_pred_flag<<29) | /*Direct Prediction Type*/
490 (0<<24) | /*Enable deblocking operation*/
491 (qp<<16) | /*Slice Quantization Parameter*/
493 OUT_BCS_BATCH(batch, 0); /*First MB X&Y , the postion of current slice*/
494 OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) );
497 (rate_control_enable<<31) | /*in CBR mode RateControlCounterEnable = enable*/
498 (1<<30) | /*ResetRateControlCounter*/
499 (0<<28) | /*RC Triggle Mode = Always Rate Control*/
500 (4<<24) | /*RC Stable Tolerance, middle level*/
501 (rate_control_enable<<23) | /*RC Panic Enable*/
502 (0<<22) | /*QP mode, don't modfiy CBP*/
503 (0<<21) | /*MB Type Direct Conversion Enabled*/
504 (0<<20) | /*MB Type Skip Conversion Enabled*/
505 (1<<19) | /*IsLastSlice*/
506 (0<<18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
507 (1<<17) | /*HeaderPresentFlag*/
508 (1<<16) | /*SliceData PresentFlag*/
509 (1<<15) | /*TailPresentFlag*/
510 (1<<13) | /*RBSP NAL TYPE*/
511 (0<<12) ); /*CabacZeroWordInsertionEnable*/
513 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
515 OUT_BCS_BATCH(batch, (maxQpN<<24) | /*Target QP - 24 is lowest QP*/
516 (maxQpP<<16) | /*Target QP + 20 is highest QP*/
519 OUT_BCS_BATCH(batch, (correct[5] << 20) |
525 OUT_BCS_BATCH(batch, 0);
527 ADVANCE_BCS_BATCH(batch);
529 static void gen6_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
531 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
534 BEGIN_BCS_BATCH(batch, 58);
536 OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56);
537 OUT_BCS_BATCH(batch, 0xFF ) ;
538 for( i = 0; i < 56; i++) {
539 OUT_BCS_BATCH(batch, 0x10101010);
542 ADVANCE_BCS_BATCH(batch);
545 static void gen6_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
547 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
550 BEGIN_BCS_BATCH(batch, 113);
551 OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2));
553 for(i = 0; i < 112;i++) {
554 OUT_BCS_BATCH(batch, 0x10001000);
557 ADVANCE_BCS_BATCH(batch);
561 gen7_mfc_qm_state(VADriverContextP ctx,
565 struct gen6_encoder_context *gen6_encoder_context)
567 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
568 unsigned int qm_buffer[16];
570 assert(qm_length <= 16);
571 assert(sizeof(*qm) == 4);
572 memcpy(qm_buffer, qm, qm_length * 4);
574 BEGIN_BCS_BATCH(batch, 18);
575 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
576 OUT_BCS_BATCH(batch, qm_type << 0);
577 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
578 ADVANCE_BCS_BATCH(batch);
581 static void gen7_mfc_avc_qm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
583 unsigned int qm[16] = {
584 0x10101010, 0x10101010, 0x10101010, 0x10101010,
585 0x10101010, 0x10101010, 0x10101010, 0x10101010,
586 0x10101010, 0x10101010, 0x10101010, 0x10101010,
587 0x10101010, 0x10101010, 0x10101010, 0x10101010
590 gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context);
591 gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context);
592 gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context);
593 gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context);
597 gen7_mfc_fqm_state(VADriverContextP ctx,
601 struct gen6_encoder_context *gen6_encoder_context)
603 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
604 unsigned int fqm_buffer[32];
606 assert(fqm_length <= 32);
607 assert(sizeof(*fqm) == 4);
608 memcpy(fqm_buffer, fqm, fqm_length * 4);
610 BEGIN_BCS_BATCH(batch, 34);
611 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
612 OUT_BCS_BATCH(batch, fqm_type << 0);
613 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
614 ADVANCE_BCS_BATCH(batch);
617 static void gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
619 unsigned int qm[32] = {
620 0x10001000, 0x10001000, 0x10001000, 0x10001000,
621 0x10001000, 0x10001000, 0x10001000, 0x10001000,
622 0x10001000, 0x10001000, 0x10001000, 0x10001000,
623 0x10001000, 0x10001000, 0x10001000, 0x10001000,
624 0x10001000, 0x10001000, 0x10001000, 0x10001000,
625 0x10001000, 0x10001000, 0x10001000, 0x10001000,
626 0x10001000, 0x10001000, 0x10001000, 0x10001000,
627 0x10001000, 0x10001000, 0x10001000, 0x10001000
630 gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context);
631 gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context);
632 gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context);
633 gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context);
636 static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
638 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
641 BEGIN_BCS_BATCH(batch, 10);
642 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
643 OUT_BCS_BATCH(batch, 0); //Select L0
644 OUT_BCS_BATCH(batch, 0x80808020); //Only 1 reference
645 for(i = 0; i < 7; i++) {
646 OUT_BCS_BATCH(batch, 0x80808080);
648 ADVANCE_BCS_BATCH(batch);
650 BEGIN_BCS_BATCH(batch, 10);
651 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
652 OUT_BCS_BATCH(batch, 1); //Select L1
653 OUT_BCS_BATCH(batch, 0x80808022); //Only 1 reference
654 for(i = 0; i < 7; i++) {
655 OUT_BCS_BATCH(batch, 0x80808080);
657 ADVANCE_BCS_BATCH(batch);
661 gen6_mfc_avc_insert_object(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context,
662 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
663 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag)
665 struct i965_driver_data *i965 = i965_driver_data(ctx);
666 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
668 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
670 if (IS_GEN7(i965->intel.device_id))
671 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
673 OUT_BCS_BATCH(batch, MFC_AVC_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
676 (0 << 16) | /* always start at offset 0 */
677 (data_bits_in_last_dw << 8) |
678 (skip_emul_byte_count << 4) |
679 (!!emulation_flag << 3) |
680 ((!!is_last_header) << 2) |
681 ((!!is_end_of_slice) << 1) |
682 (0 << 0)); /* FIXME: ??? */
684 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
685 ADVANCE_BCS_BATCH(batch);
689 gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
690 struct gen6_encoder_context *gen6_encoder_context,
691 unsigned char target_mb_size, unsigned char max_mb_size)
693 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
694 int len_in_dwords = 11;
696 BEGIN_BCS_BATCH(batch, len_in_dwords);
698 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
699 OUT_BCS_BATCH(batch, 0);
700 OUT_BCS_BATCH(batch, 0);
702 (0 << 24) | /* PackedMvNum, Debug*/
703 (0 << 20) | /* No motion vector */
704 (1 << 19) | /* CbpDcY */
705 (1 << 18) | /* CbpDcU */
706 (1 << 17) | /* CbpDcV */
709 OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
710 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
711 OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
713 /*Stuff for Intra MB*/
714 OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/
715 OUT_BCS_BATCH(batch, msg[2]);
716 OUT_BCS_BATCH(batch, msg[3]&0xFC);
718 /*MaxSizeInWord and TargetSzieInWord*/
719 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
720 (target_mb_size << 16) );
722 ADVANCE_BCS_BATCH(batch);
724 return len_in_dwords;
727 static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, unsigned int offset,
728 struct gen6_encoder_context *gen6_encoder_context,
729 unsigned char target_mb_size,unsigned char max_mb_size, int slice_type)
731 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
732 int len_in_dwords = 11;
734 BEGIN_BCS_BATCH(batch, len_in_dwords);
736 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
738 OUT_BCS_BATCH(batch, 32); /* 32 MV*/
739 OUT_BCS_BATCH(batch, offset);
742 (1 << 24) | /* PackedMvNum, Debug*/
743 (4 << 20) | /* 8 MV, SNB don't use it*/
744 (1 << 19) | /* CbpDcY */
745 (1 << 18) | /* CbpDcU */
746 (1 << 17) | /* CbpDcV */
747 (0 << 15) | /* Transform8x8Flag = 0*/
748 (0 << 14) | /* Frame based*/
749 (0 << 13) | /* Inter MB */
750 (1 << 8) | /* MbType = P_L0_16x16 */
751 (0 << 7) | /* MBZ for frame */
753 (2 << 4) | /* MBZ for inter*/
755 (0 << 2) | /* SkipMbFlag */
756 (0 << 0)); /* InterMbMode */
758 OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
759 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
761 if ( slice_type == SLICE_TYPE_B) {
762 OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp); /* Last MB */
764 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
767 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
771 /*Stuff for Inter MB*/
772 OUT_BCS_BATCH(batch, 0x0);
773 OUT_BCS_BATCH(batch, 0x0);
774 OUT_BCS_BATCH(batch, 0x0);
776 /*MaxSizeInWord and TargetSzieInWord*/
777 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
778 (target_mb_size << 16) );
780 ADVANCE_BCS_BATCH(batch);
782 return len_in_dwords;
785 static void gen6_mfc_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
787 struct i965_driver_data *i965 = i965_driver_data(ctx);
788 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
792 /*Encode common setup for MFC*/
793 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
794 mfc_context->post_deblocking_output.bo = NULL;
796 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
797 mfc_context->pre_deblocking_output.bo = NULL;
799 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
800 mfc_context->uncompressed_picture_source.bo = NULL;
802 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
803 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
805 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
806 if ( mfc_context->direct_mv_buffers[i].bo != NULL);
807 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
808 mfc_context->direct_mv_buffers[i].bo = NULL;
811 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
812 if (mfc_context->reference_surfaces[i].bo != NULL)
813 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
814 mfc_context->reference_surfaces[i].bo = NULL;
817 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
818 bo = dri_bo_alloc(i965->intel.bufmgr,
823 mfc_context->intra_row_store_scratch_buffer.bo = bo;
825 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
826 bo = dri_bo_alloc(i965->intel.bufmgr,
831 mfc_context->macroblock_status_buffer.bo = bo;
833 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
834 bo = dri_bo_alloc(i965->intel.bufmgr,
836 49152, /* 6 * 128 * 64 */
839 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
841 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
842 bo = dri_bo_alloc(i965->intel.bufmgr,
844 12288, /* 1.5 * 128 * 64 */
847 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
850 void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
851 struct encode_state *encode_state,
852 struct gen6_encoder_context *gen6_encoder_context)
854 struct i965_driver_data *i965 = i965_driver_data(ctx);
855 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
856 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
857 struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
858 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
859 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
860 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer; /* FIXME: multi slices */
861 VAEncH264DecRefPicMarkingBuffer *pDecRefPicMarking = NULL;
862 unsigned int *msg = NULL, offset = 0;
863 int emit_new_state = 1, object_len_in_bytes;
864 int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
865 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
866 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
868 int rate_control_mode = pSequenceParameter->rate_control_method;
869 unsigned char target_mb_size = mfc_context->bit_rate_control_context[1-is_intra].TargetSizeInWord;
870 unsigned char max_mb_size = mfc_context->bit_rate_control_context[1-is_intra].MaxSizeInWord;
871 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
872 unsigned char *slice_header = NULL;
873 int slice_header_length_in_bits = 0;
874 unsigned int tail_data[] = { 0x0 };
876 if (encode_state->dec_ref_pic_marking)
877 pDecRefPicMarking = (VAEncH264DecRefPicMarkingBuffer *)encode_state->dec_ref_pic_marking->buffer;
879 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, pDecRefPicMarking, &slice_header);
881 if ( rate_control_mode == 0) {
882 qp = mfc_context->bit_rate_control_context[1-is_intra].QpPrimeY;
885 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
888 dri_bo_map(vme_context->vme_output.bo , 1);
889 msg = (unsigned int *)vme_context->vme_output.bo->virtual;
892 for (y = 0; y < height_in_mbs; y++) {
893 for (x = 0; x < width_in_mbs; x++) {
894 int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
896 if (emit_new_state) {
897 intel_batchbuffer_emit_mi_flush(batch);
899 if (IS_GEN7(i965->intel.device_id)) {
900 gen7_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context);
901 gen7_mfc_surface_state(ctx, gen6_encoder_context);
902 gen7_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
904 gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context);
905 gen6_mfc_surface_state(ctx, gen6_encoder_context);
906 gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context);
909 gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context);
910 gen6_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context);
912 if (IS_GEN7(i965->intel.device_id)) {
913 gen7_mfc_avc_img_state(ctx, encode_state, gen6_encoder_context);
914 gen7_mfc_avc_qm_state(ctx, gen6_encoder_context);
915 gen7_mfc_avc_fqm_state(ctx, gen6_encoder_context);
917 gen6_mfc_avc_img_state(ctx, encode_state,gen6_encoder_context);
918 gen6_mfc_avc_qm_state(ctx, gen6_encoder_context);
919 gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context);
922 gen6_mfc_avc_directmode_state(ctx, gen6_encoder_context);
923 gen6_mfc_avc_ref_idx_state(ctx, gen6_encoder_context);
924 gen6_mfc_avc_slice_state(ctx, pSliceParameter->slice_type,
925 encode_state, gen6_encoder_context,
926 rate_control_mode == 0, pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta);
928 if (encode_state->packed_header_data[VAEncPackedHeaderSPS]) {
929 VAEncPackedHeaderParameterBuffer *param = NULL;
930 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[VAEncPackedHeaderSPS]->buffer;
931 unsigned int length_in_bits;
933 assert(encode_state->packed_header_param[VAEncPackedHeaderSPS]);
934 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[VAEncPackedHeaderSPS]->buffer;
935 length_in_bits = param->length_in_bits[0];
937 gen6_mfc_avc_insert_object(ctx,
938 gen6_encoder_context,
940 ALIGN(length_in_bits, 32) >> 5,
941 length_in_bits & 0x1f,
942 param->skip_emulation_check_count,
945 param->insert_emulation_bytes);
948 if (encode_state->packed_header_data[VAEncPackedHeaderPPS]) {
949 VAEncPackedHeaderParameterBuffer *param = NULL;
950 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[VAEncPackedHeaderPPS]->buffer;
951 unsigned int length_in_bits;
953 assert(encode_state->packed_header_param[VAEncPackedHeaderPPS]);
954 param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[VAEncPackedHeaderPPS]->buffer;
955 length_in_bits = param->length_in_bits[0];
957 gen6_mfc_avc_insert_object(ctx,
958 gen6_encoder_context,
960 ALIGN(length_in_bits, 32) >> 5,
961 length_in_bits & 0x1f,
962 param->skip_emulation_check_count,
965 param->insert_emulation_bytes);
968 gen6_mfc_avc_insert_object(ctx, gen6_encoder_context,
969 (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
970 5, /* first 5 bytes are start code + nal unit type */
977 object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context,target_mb_size, max_mb_size);
980 object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, gen6_encoder_context, target_mb_size, max_mb_size, pSliceParameter->slice_type);
984 if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
986 intel_batchbuffer_end_atomic(batch);
987 intel_batchbuffer_flush(batch);
989 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
994 gen6_mfc_avc_insert_object(ctx, gen6_encoder_context,
995 tail_data, sizeof(tail_data) >> 2, 32,
996 sizeof(tail_data), 1, 1, 1);
999 dri_bo_unmap(vme_context->vme_output.bo);
1003 intel_batchbuffer_end_atomic(batch);
1007 gen6_mfc_free_avc_surface(void **data)
1009 struct gen6_mfc_avc_surface_aux *avc_surface = *data;
1014 dri_bo_unreference(avc_surface->dmv_top);
1015 avc_surface->dmv_top = NULL;
1016 dri_bo_unreference(avc_surface->dmv_bottom);
1017 avc_surface->dmv_bottom = NULL;
1023 static void gen6_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
1024 struct gen6_mfc_context *mfc_context)
1026 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1028 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1029 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1030 float fps = pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
1031 int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
1032 int intra_mb_size = inter_mb_size * 5.0;
1035 mfc_context->bit_rate_control_context[0].target_mb_size = intra_mb_size;
1036 mfc_context->bit_rate_control_context[0].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
1037 mfc_context->bit_rate_control_context[1].target_mb_size = inter_mb_size;
1038 mfc_context->bit_rate_control_context[1].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
1040 for(i = 0 ; i < 2; i++) {
1041 mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
1042 mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
1043 mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
1044 mfc_context->bit_rate_control_context[i].GrowInit = 6;
1045 mfc_context->bit_rate_control_context[i].GrowResistance = 4;
1046 mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
1047 mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
1049 mfc_context->bit_rate_control_context[i].Correct[0] = 8;
1050 mfc_context->bit_rate_control_context[i].Correct[1] = 4;
1051 mfc_context->bit_rate_control_context[i].Correct[2] = 2;
1052 mfc_context->bit_rate_control_context[i].Correct[3] = 2;
1053 mfc_context->bit_rate_control_context[i].Correct[4] = 4;
1054 mfc_context->bit_rate_control_context[i].Correct[5] = 8;
1057 mfc_context->bit_rate_control_context[0].TargetSizeInWord = (intra_mb_size + 16)/ 16;
1058 mfc_context->bit_rate_control_context[1].TargetSizeInWord = (inter_mb_size + 16)/ 16;
1060 mfc_context->bit_rate_control_context[0].MaxSizeInWord = mfc_context->bit_rate_control_context[0].TargetSizeInWord * 1.5;
1061 mfc_context->bit_rate_control_context[1].MaxSizeInWord = mfc_context->bit_rate_control_context[1].TargetSizeInWord * 1.5;
1064 static int gen6_mfc_bit_rate_control_context_update(struct encode_state *encode_state,
1065 struct gen6_mfc_context *mfc_context,
1066 int current_frame_size)
1068 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1069 int control_index = 1 - (pSliceParameter->slice_type == SLICE_TYPE_I);
1070 int oldQp = mfc_context->bit_rate_control_context[control_index].QpPrimeY;
1073 printf("conrol_index = %d, start_qp = %d, result = %d, target = %d\n", control_index,
1074 mfc_context->bit_rate_control_context[control_index].QpPrimeY, current_frame_size,
1075 mfc_context->bit_rate_control_context[control_index].target_frame_size );
1078 if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 4.0 ) {
1079 mfc_context->bit_rate_control_context[control_index].QpPrimeY += 4;
1080 } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 2.0 ) {
1081 mfc_context->bit_rate_control_context[control_index].QpPrimeY += 3;
1082 } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 1.50 ) {
1083 mfc_context->bit_rate_control_context[control_index].QpPrimeY += 2;
1084 } else if ( current_frame_size > mfc_context->bit_rate_control_context[control_index].target_frame_size * 1.20 ) {
1085 mfc_context->bit_rate_control_context[control_index].QpPrimeY ++;
1086 } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.30 ) {
1087 mfc_context->bit_rate_control_context[control_index].QpPrimeY -= 3;
1088 } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.50 ) {
1089 mfc_context->bit_rate_control_context[control_index].QpPrimeY -= 2;
1090 } else if (current_frame_size < mfc_context->bit_rate_control_context[control_index].target_frame_size * 0.80 ) {
1091 mfc_context->bit_rate_control_context[control_index].QpPrimeY --;
1094 if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY > 51)
1095 mfc_context->bit_rate_control_context[control_index].QpPrimeY = 51;
1096 if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY < 1)
1097 mfc_context->bit_rate_control_context[control_index].QpPrimeY = 1;
1099 if ( mfc_context->bit_rate_control_context[control_index].QpPrimeY != oldQp)
1105 static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx,
1106 struct encode_state *encode_state,
1107 struct gen6_encoder_context *gen6_encoder_context)
1109 struct i965_driver_data *i965 = i965_driver_data(ctx);
1110 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1111 struct object_surface *obj_surface;
1112 struct object_buffer *obj_buffer;
1113 struct gen6_mfc_avc_surface_aux* gen6_avc_surface;
1115 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1116 VAStatus vaStatus = VA_STATUS_SUCCESS;
1119 /*Setup all the input&output object*/
1121 /* Setup current frame and current direct mv buffer*/
1122 obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
1123 assert(obj_surface);
1124 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
1125 if ( obj_surface->private_data == NULL) {
1126 gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
1127 gen6_avc_surface->dmv_top =
1128 dri_bo_alloc(i965->intel.bufmgr,
1132 gen6_avc_surface->dmv_bottom =
1133 dri_bo_alloc(i965->intel.bufmgr,
1137 assert(gen6_avc_surface->dmv_top);
1138 assert(gen6_avc_surface->dmv_bottom);
1139 obj_surface->private_data = (void *)gen6_avc_surface;
1140 obj_surface->free_private_data = (void *)gen6_mfc_free_avc_surface;
1142 gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
1143 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
1144 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
1145 dri_bo_reference(gen6_avc_surface->dmv_top);
1146 dri_bo_reference(gen6_avc_surface->dmv_bottom);
1148 mfc_context->post_deblocking_output.bo = obj_surface->bo;
1149 dri_bo_reference(mfc_context->post_deblocking_output.bo);
1151 mfc_context->surface_state.width = obj_surface->orig_width;
1152 mfc_context->surface_state.height = obj_surface->orig_height;
1153 mfc_context->surface_state.w_pitch = obj_surface->width;
1154 mfc_context->surface_state.h_pitch = obj_surface->height;
1156 /* Setup reference frames and direct mv buffers*/
1157 for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
1158 if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) {
1159 obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
1160 assert(obj_surface);
1161 if (obj_surface->bo != NULL) {
1162 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
1163 dri_bo_reference(obj_surface->bo);
1165 /* Check DMV buffer */
1166 if ( obj_surface->private_data == NULL) {
1168 gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
1169 gen6_avc_surface->dmv_top =
1170 dri_bo_alloc(i965->intel.bufmgr,
1174 gen6_avc_surface->dmv_bottom =
1175 dri_bo_alloc(i965->intel.bufmgr,
1179 assert(gen6_avc_surface->dmv_top);
1180 assert(gen6_avc_surface->dmv_bottom);
1181 obj_surface->private_data = gen6_avc_surface;
1182 obj_surface->free_private_data = gen6_mfc_free_avc_surface;
1185 gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
1186 /* Setup DMV buffer */
1187 mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
1188 mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
1189 dri_bo_reference(gen6_avc_surface->dmv_top);
1190 dri_bo_reference(gen6_avc_surface->dmv_bottom);
1196 obj_surface = SURFACE(encode_state->current_render_target);
1197 assert(obj_surface && obj_surface->bo);
1198 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
1199 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
1201 obj_buffer = BUFFER (pPicParameter->CodedBuf); /* FIXME: fix this later */
1202 bo = obj_buffer->buffer_store->bo;
1204 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
1205 mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
1206 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN (obj_buffer->size_element - 0x1000, 0x1000);
1207 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
1209 /*Programing bit rate control */
1210 if ( mfc_context->bit_rate_control_context[0].MaxSizeInWord == 0 )
1211 gen6_mfc_bit_rate_control_context_init(encode_state, mfc_context);
1213 /*Programing bcs pipeline*/
1214 gen6_mfc_avc_pipeline_programing(ctx, encode_state, gen6_encoder_context); //filling the pipeline
1219 static VAStatus gen6_mfc_run(VADriverContextP ctx,
1220 struct encode_state *encode_state,
1221 struct gen6_encoder_context *gen6_encoder_context)
1223 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
1225 intel_batchbuffer_flush(batch); //run the pipeline
1227 return VA_STATUS_SUCCESS;
1230 static VAStatus gen6_mfc_stop(VADriverContextP ctx,
1231 struct encode_state *encode_state,
1232 struct gen6_encoder_context *gen6_encoder_context,
1233 int *encoded_bits_size)
1235 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1236 unsigned int *status_mem;
1237 unsigned int buffer_size_bits = 0;
1238 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1239 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1242 dri_bo_map(mfc_context->macroblock_status_buffer.bo, 1);
1243 status_mem = (unsigned int *)mfc_context->macroblock_status_buffer.bo->virtual;
1244 //Detecting encoder buffer size and bit rate control result
1245 for(i = 0; i < width_in_mbs * height_in_mbs; i++) {
1246 unsigned short current_mb = status_mem[1] >> 16;
1247 buffer_size_bits += current_mb;
1250 dri_bo_unmap(mfc_context->macroblock_status_buffer.bo);
1252 *encoded_bits_size = buffer_size_bits;
1254 return VA_STATUS_SUCCESS;
1258 gen6_mfc_avc_encode_picture(VADriverContextP ctx,
1259 struct encode_state *encode_state,
1260 struct gen6_encoder_context *gen6_encoder_context)
1262 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1263 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
1264 int rate_control_mode = pSequenceParameter->rate_control_method;
1265 int MAX_CBR_INTERATE = 4;
1266 int current_frame_bits_size;
1269 for(i = 0; i < MAX_CBR_INTERATE; i++) {
1270 gen6_mfc_init(ctx, gen6_encoder_context);
1271 gen6_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
1272 gen6_mfc_run(ctx, encode_state, gen6_encoder_context);
1273 gen6_mfc_stop(ctx, encode_state, gen6_encoder_context, ¤t_frame_bits_size);
1274 if ( rate_control_mode == 0) {
1275 if ( gen6_mfc_bit_rate_control_context_update( encode_state, mfc_context, current_frame_bits_size) )
1282 return VA_STATUS_SUCCESS;
1286 gen6_mfc_pipeline(VADriverContextP ctx,
1288 struct encode_state *encode_state,
1289 struct gen6_encoder_context *gen6_encoder_context)
1294 case VAProfileH264Baseline:
1295 vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, gen6_encoder_context);
1298 /* FIXME: add for other profile */
1300 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1307 Bool gen6_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context)
1312 Bool gen6_mfc_context_destroy(struct gen6_mfc_context *mfc_context)
1316 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1317 mfc_context->post_deblocking_output.bo = NULL;
1319 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1320 mfc_context->pre_deblocking_output.bo = NULL;
1322 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1323 mfc_context->uncompressed_picture_source.bo = NULL;
1325 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
1326 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1328 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1329 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1330 mfc_context->direct_mv_buffers[i].bo = NULL;
1333 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1334 mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1336 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1337 mfc_context->macroblock_status_buffer.bo = NULL;
1339 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1340 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1342 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1343 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1346 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
1347 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
1348 mfc_context->reference_surfaces[i].bo = NULL;