2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Zhao Yakui <yakui.zhao@intel.com>
26 * Xiang Haihao <haihao.xiang@intel.com>
30 #ifndef HAVE_GEN_AVC_SURFACE
31 #define HAVE_GEN_AVC_SURFACE 1
40 #include "intel_batchbuffer.h"
41 #include "i965_defines.h"
42 #include "i965_structs.h"
43 #include "i965_drv_video.h"
44 #include "i965_encoder.h"
45 #include "i965_encoder_utils.h"
48 #include "intel_media.h"
50 #define MFC_SOFTWARE_HASWELL 1
53 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
55 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
59 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
60 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
63 static struct i965_kernel gen75_mfc_kernels[] = {
65 "MFC AVC INTRA BATCHBUFFER ",
66 MFC_BATCHBUFFER_AVC_INTRA,
67 gen75_mfc_batchbuffer_avc_intra,
68 sizeof(gen75_mfc_batchbuffer_avc_intra),
73 "MFC AVC INTER BATCHBUFFER ",
74 MFC_BATCHBUFFER_AVC_INTER,
75 gen75_mfc_batchbuffer_avc_inter,
76 sizeof(gen75_mfc_batchbuffer_avc_inter),
81 #define INTER_MODE_MASK 0x03
82 #define INTER_8X8 0x03
83 #define SUBMB_SHAPE_MASK 0x00FF00
85 #define INTER_MV8 (4 << 20)
86 #define INTER_MV32 (6 << 20)
90 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
92 struct intel_encoder_context *encoder_context)
94 struct intel_batchbuffer *batch = encoder_context->base.batch;
95 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
97 assert(standard_select == MFX_FORMAT_MPEG2 ||
98 standard_select == MFX_FORMAT_AVC);
100 BEGIN_BCS_BATCH(batch, 5);
102 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
104 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
105 (MFD_MODE_VLD << 15) | /* VLD mode */
106 (1 << 10) | /* Stream-Out Enable */
107 ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
108 ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
109 (0 << 8) | /* Pre Deblocking Output */
110 (0 << 5) | /* not in stitch mode */
111 (1 << 4) | /* encoding mode */
112 (standard_select << 0)); /* standard select: avc or mpeg2 */
114 (0 << 7) | /* expand NOA bus flag */
115 (0 << 6) | /* disable slice-level clock gating */
116 (0 << 5) | /* disable clock gating for NOA */
117 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
118 (0 << 3) | /* terminate if AVC mbdata error occurs */
119 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
122 OUT_BCS_BATCH(batch, 0);
123 OUT_BCS_BATCH(batch, 0);
125 ADVANCE_BCS_BATCH(batch);
129 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
131 struct intel_batchbuffer *batch = encoder_context->base.batch;
132 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
134 BEGIN_BCS_BATCH(batch, 6);
136 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
137 OUT_BCS_BATCH(batch, 0);
139 ((mfc_context->surface_state.height - 1) << 18) |
140 ((mfc_context->surface_state.width - 1) << 4));
142 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
143 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
144 (0 << 22) | /* surface object control state, FIXME??? */
145 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
146 (0 << 2) | /* must be 0 for interleave U/V */
147 (1 << 1) | /* must be tiled */
148 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
150 (0 << 16) | /* must be 0 for interleave U/V */
151 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
152 OUT_BCS_BATCH(batch, 0);
154 ADVANCE_BCS_BATCH(batch);
158 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
159 struct intel_encoder_context *encoder_context)
161 struct intel_batchbuffer *batch = encoder_context->base.batch;
162 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
163 struct gen6_vme_context *vme_context = encoder_context->vme_context;
165 BEGIN_BCS_BATCH(batch, 26);
167 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
168 /* the DW1-3 is for the MFX indirect bistream offset */
169 OUT_BCS_BATCH(batch, 0);
170 OUT_BCS_BATCH(batch, 0);
171 OUT_BCS_BATCH(batch, 0);
172 /* the DW4-5 is the MFX upper bound */
173 OUT_BCS_BATCH(batch, 0);
174 OUT_BCS_BATCH(batch, 0);
176 /* the DW6-10 is for MFX Indirect MV Object Base Address */
177 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
178 OUT_BCS_BATCH(batch, 0);
179 OUT_BCS_BATCH(batch, 0);
180 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
181 OUT_BCS_BATCH(batch, 0);
183 /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
184 OUT_BCS_BATCH(batch, 0);
185 OUT_BCS_BATCH(batch, 0);
186 OUT_BCS_BATCH(batch, 0);
187 OUT_BCS_BATCH(batch, 0);
188 OUT_BCS_BATCH(batch, 0);
190 /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
191 OUT_BCS_BATCH(batch, 0);
192 OUT_BCS_BATCH(batch, 0);
193 OUT_BCS_BATCH(batch, 0);
194 OUT_BCS_BATCH(batch, 0);
195 OUT_BCS_BATCH(batch, 0);
197 /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/
199 mfc_context->mfc_indirect_pak_bse_object.bo,
200 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
202 OUT_BCS_BATCH(batch, 0);
203 OUT_BCS_BATCH(batch, 0);
206 mfc_context->mfc_indirect_pak_bse_object.bo,
207 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
208 mfc_context->mfc_indirect_pak_bse_object.end_offset);
209 OUT_BCS_BATCH(batch, 0);
211 ADVANCE_BCS_BATCH(batch);
215 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
217 struct intel_batchbuffer *batch = encoder_context->base.batch;
218 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
219 struct gen6_vme_context *vme_context = encoder_context->vme_context;
220 struct i965_driver_data *i965 = i965_driver_data(ctx);
222 if (IS_STEPPING_BPLUS(i965)) {
223 gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
226 BEGIN_BCS_BATCH(batch, 11);
228 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
229 OUT_BCS_BATCH(batch, 0);
230 OUT_BCS_BATCH(batch, 0);
231 /* MFX Indirect MV Object Base Address */
232 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
233 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
234 OUT_BCS_BATCH(batch, 0);
235 OUT_BCS_BATCH(batch, 0);
236 OUT_BCS_BATCH(batch, 0);
237 OUT_BCS_BATCH(batch, 0);
238 /*MFC Indirect PAK-BSE Object Base Address for Encoder*/
240 mfc_context->mfc_indirect_pak_bse_object.bo,
241 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
244 mfc_context->mfc_indirect_pak_bse_object.bo,
245 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
246 mfc_context->mfc_indirect_pak_bse_object.end_offset);
248 ADVANCE_BCS_BATCH(batch);
252 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
253 struct intel_encoder_context *encoder_context)
255 struct intel_batchbuffer *batch = encoder_context->base.batch;
256 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
257 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
259 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
260 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
262 BEGIN_BCS_BATCH(batch, 16);
264 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
266 ((width_in_mbs * height_in_mbs) & 0xFFFF));
268 ((height_in_mbs - 1) << 16) |
269 ((width_in_mbs - 1) << 0));
271 (0 << 24) | /* Second Chroma QP Offset */
272 (0 << 16) | /* Chroma QP Offset */
273 (0 << 14) | /* Max-bit conformance Intra flag */
274 (0 << 13) | /* Max Macroblock size conformance Inter flag */
275 (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) | /*Weighted_Pred_Flag */
276 (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) | /* Weighted_BiPred_Idc */
277 (0 << 8) | /* FIXME: Image Structure */
278 (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */
280 (0 << 16) | /* Mininum Frame size */
281 (0 << 15) | /* Disable reading of Macroblock Status Buffer */
282 (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */
283 (0 << 13) | /* CABAC 0 word insertion test enable */
284 (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */
285 (1 << 10) | /* Chroma Format IDC, 4:2:0 */
286 (0 << 8) | /* FIXME: MbMvFormatFlag */
287 (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/
288 (0 << 6) | /* Only valid for VLD decoding mode */
289 (0 << 5) | /* Constrained Intra Predition Flag, from PPS */
290 (0 << 4) | /* Direct 8x8 inference flag */
291 (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/
292 (1 << 2) | /* Frame MB only flag */
293 (0 << 1) | /* MBAFF mode is in active */
294 (0 << 0)); /* Field picture flag */
295 OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
296 OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
297 (0xBB8 << 16) | /* InterMbMaxSz */
298 (0xEE8) ); /* IntraMbMaxSz */
299 OUT_BCS_BATCH(batch, 0); /* Reserved */
300 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
301 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
302 OUT_BCS_BATCH(batch, 0x8C000000);
303 OUT_BCS_BATCH(batch, 0x00010000);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
306 OUT_BCS_BATCH(batch, 0);
307 OUT_BCS_BATCH(batch, 0);
309 ADVANCE_BCS_BATCH(batch);
313 gen75_mfc_qm_state(VADriverContextP ctx,
317 struct intel_encoder_context *encoder_context)
319 struct intel_batchbuffer *batch = encoder_context->base.batch;
320 unsigned int qm_buffer[16];
322 assert(qm_length <= 16);
323 assert(sizeof(*qm) == 4);
324 memcpy(qm_buffer, qm, qm_length * 4);
326 BEGIN_BCS_BATCH(batch, 18);
327 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
328 OUT_BCS_BATCH(batch, qm_type << 0);
329 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
330 ADVANCE_BCS_BATCH(batch);
334 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
336 unsigned int qm[16] = {
337 0x10101010, 0x10101010, 0x10101010, 0x10101010,
338 0x10101010, 0x10101010, 0x10101010, 0x10101010,
339 0x10101010, 0x10101010, 0x10101010, 0x10101010,
340 0x10101010, 0x10101010, 0x10101010, 0x10101010
343 gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
344 gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
345 gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
346 gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
350 gen75_mfc_fqm_state(VADriverContextP ctx,
354 struct intel_encoder_context *encoder_context)
356 struct intel_batchbuffer *batch = encoder_context->base.batch;
357 unsigned int fqm_buffer[32];
359 assert(fqm_length <= 32);
360 assert(sizeof(*fqm) == 4);
361 memcpy(fqm_buffer, fqm, fqm_length * 4);
363 BEGIN_BCS_BATCH(batch, 34);
364 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
365 OUT_BCS_BATCH(batch, fqm_type << 0);
366 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
367 ADVANCE_BCS_BATCH(batch);
371 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
373 unsigned int qm[32] = {
374 0x10001000, 0x10001000, 0x10001000, 0x10001000,
375 0x10001000, 0x10001000, 0x10001000, 0x10001000,
376 0x10001000, 0x10001000, 0x10001000, 0x10001000,
377 0x10001000, 0x10001000, 0x10001000, 0x10001000,
378 0x10001000, 0x10001000, 0x10001000, 0x10001000,
379 0x10001000, 0x10001000, 0x10001000, 0x10001000,
380 0x10001000, 0x10001000, 0x10001000, 0x10001000,
381 0x10001000, 0x10001000, 0x10001000, 0x10001000
384 gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
385 gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
386 gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
387 gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
391 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
392 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
393 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
394 struct intel_batchbuffer *batch)
397 batch = encoder_context->base.batch;
399 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
401 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
403 (0 << 16) | /* always start at offset 0 */
404 (data_bits_in_last_dw << 8) |
405 (skip_emul_byte_count << 4) |
406 (!!emulation_flag << 3) |
407 ((!!is_last_header) << 2) |
408 ((!!is_end_of_slice) << 1) |
409 (0 << 0)); /* FIXME: ??? */
410 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
412 ADVANCE_BCS_BATCH(batch);
416 static void gen75_mfc_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
418 struct i965_driver_data *i965 = i965_driver_data(ctx);
419 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
423 /*Encode common setup for MFC*/
424 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
425 mfc_context->post_deblocking_output.bo = NULL;
427 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
428 mfc_context->pre_deblocking_output.bo = NULL;
430 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
431 mfc_context->uncompressed_picture_source.bo = NULL;
433 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
434 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
436 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
437 if ( mfc_context->direct_mv_buffers[i].bo != NULL);
438 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
439 mfc_context->direct_mv_buffers[i].bo = NULL;
442 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
443 if (mfc_context->reference_surfaces[i].bo != NULL)
444 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
445 mfc_context->reference_surfaces[i].bo = NULL;
448 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
449 bo = dri_bo_alloc(i965->intel.bufmgr,
454 mfc_context->intra_row_store_scratch_buffer.bo = bo;
456 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
457 bo = dri_bo_alloc(i965->intel.bufmgr,
462 mfc_context->macroblock_status_buffer.bo = bo;
464 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
465 bo = dri_bo_alloc(i965->intel.bufmgr,
467 49152, /* 6 * 128 * 64 */
470 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
472 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
473 bo = dri_bo_alloc(i965->intel.bufmgr,
475 12288, /* 1.5 * 128 * 64 */
478 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
480 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
481 mfc_context->mfc_batchbuffer_surface.bo = NULL;
483 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
484 mfc_context->aux_batchbuffer_surface.bo = NULL;
486 if (mfc_context->aux_batchbuffer)
487 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
489 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
490 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
491 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
492 mfc_context->aux_batchbuffer_surface.pitch = 16;
493 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
494 mfc_context->aux_batchbuffer_surface.size_block = 16;
496 i965_gpe_context_init(ctx, &mfc_context->gpe_context);
500 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
501 struct intel_encoder_context *encoder_context)
503 struct intel_batchbuffer *batch = encoder_context->base.batch;
504 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
507 BEGIN_BCS_BATCH(batch, 61);
509 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
511 /* the DW1-3 is for pre_deblocking */
512 if (mfc_context->pre_deblocking_output.bo)
513 OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
514 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
517 OUT_BCS_BATCH(batch, 0); /* pre output addr */
519 OUT_BCS_BATCH(batch, 0);
520 OUT_BCS_BATCH(batch, 0);
521 /* the DW4-6 is for the post_deblocking */
523 if (mfc_context->post_deblocking_output.bo)
524 OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
525 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
526 0); /* post output addr */
528 OUT_BCS_BATCH(batch, 0);
529 OUT_BCS_BATCH(batch, 0);
530 OUT_BCS_BATCH(batch, 0);
532 /* the DW7-9 is for the uncompressed_picture */
533 OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
534 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
535 0); /* uncompressed data */
537 OUT_BCS_BATCH(batch, 0);
538 OUT_BCS_BATCH(batch, 0);
540 /* the DW10-12 is for the mb status */
541 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
542 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
543 0); /* StreamOut data*/
544 OUT_BCS_BATCH(batch, 0);
545 OUT_BCS_BATCH(batch, 0);
547 /* the DW13-15 is for the intra_row_store_scratch */
548 OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
549 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
551 OUT_BCS_BATCH(batch, 0);
552 OUT_BCS_BATCH(batch, 0);
554 /* the DW16-18 is for the deblocking filter */
555 OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
556 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
558 OUT_BCS_BATCH(batch, 0);
559 OUT_BCS_BATCH(batch, 0);
561 /* the DW 19-50 is for Reference pictures*/
562 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
563 if ( mfc_context->reference_surfaces[i].bo != NULL) {
564 OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
565 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
568 OUT_BCS_BATCH(batch, 0);
570 OUT_BCS_BATCH(batch, 0);
572 OUT_BCS_BATCH(batch, 0);
574 /* The DW 52-54 is for the MB status buffer */
575 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
576 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
577 0); /* Macroblock status buffer*/
579 OUT_BCS_BATCH(batch, 0);
580 OUT_BCS_BATCH(batch, 0);
582 /* the DW 55-57 is the ILDB buffer */
583 OUT_BCS_BATCH(batch, 0);
584 OUT_BCS_BATCH(batch, 0);
585 OUT_BCS_BATCH(batch, 0);
587 /* the DW 58-60 is the second ILDB buffer */
588 OUT_BCS_BATCH(batch, 0);
589 OUT_BCS_BATCH(batch, 0);
590 OUT_BCS_BATCH(batch, 0);
591 ADVANCE_BCS_BATCH(batch);
595 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
597 struct intel_batchbuffer *batch = encoder_context->base.batch;
598 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
599 struct i965_driver_data *i965 = i965_driver_data(ctx);
602 if (IS_STEPPING_BPLUS(i965)) {
603 gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
607 BEGIN_BCS_BATCH(batch, 25);
609 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
611 if (mfc_context->pre_deblocking_output.bo)
612 OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
613 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
616 OUT_BCS_BATCH(batch, 0); /* pre output addr */
618 if (mfc_context->post_deblocking_output.bo)
619 OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
620 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
621 0); /* post output addr */
623 OUT_BCS_BATCH(batch, 0);
625 OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
626 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
627 0); /* uncompressed data */
628 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
629 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
630 0); /* StreamOut data*/
631 OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
632 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
634 OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
635 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
637 /* 7..22 Reference pictures*/
638 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
639 if ( mfc_context->reference_surfaces[i].bo != NULL) {
640 OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
641 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
644 OUT_BCS_BATCH(batch, 0);
647 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
648 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
649 0); /* Macroblock status buffer*/
651 OUT_BCS_BATCH(batch, 0);
653 ADVANCE_BCS_BATCH(batch);
657 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
658 struct intel_encoder_context *encoder_context)
660 struct intel_batchbuffer *batch = encoder_context->base.batch;
661 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
665 BEGIN_BCS_BATCH(batch, 71);
667 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
669 /* Reference frames and Current frames */
670 /* the DW1-32 is for the direct MV for reference */
671 for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
672 if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
673 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
674 I915_GEM_DOMAIN_INSTRUCTION, 0,
676 OUT_BCS_BATCH(batch, 0);
678 OUT_BCS_BATCH(batch, 0);
679 OUT_BCS_BATCH(batch, 0);
682 OUT_BCS_BATCH(batch, 0);
684 /* the DW34-36 is the MV for the current reference */
685 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
686 I915_GEM_DOMAIN_INSTRUCTION, 0,
689 OUT_BCS_BATCH(batch, 0);
690 OUT_BCS_BATCH(batch, 0);
693 for(i = 0; i < 32; i++) {
694 OUT_BCS_BATCH(batch, i/2);
696 OUT_BCS_BATCH(batch, 0);
697 OUT_BCS_BATCH(batch, 0);
699 ADVANCE_BCS_BATCH(batch);
703 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
705 struct intel_batchbuffer *batch = encoder_context->base.batch;
706 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
707 struct i965_driver_data *i965 = i965_driver_data(ctx);
710 if (IS_STEPPING_BPLUS(i965)) {
711 gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
715 BEGIN_BCS_BATCH(batch, 69);
717 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
719 /* Reference frames and Current frames */
720 for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
721 if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
722 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
723 I915_GEM_DOMAIN_INSTRUCTION, 0,
726 OUT_BCS_BATCH(batch, 0);
731 for(i = 0; i < 32; i++) {
732 OUT_BCS_BATCH(batch, i/2);
734 OUT_BCS_BATCH(batch, 0);
735 OUT_BCS_BATCH(batch, 0);
737 ADVANCE_BCS_BATCH(batch);
741 gen75_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
743 struct intel_batchbuffer *batch = encoder_context->base.batch;
746 BEGIN_BCS_BATCH(batch, 10);
747 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
748 OUT_BCS_BATCH(batch, 0); //Select L0
749 OUT_BCS_BATCH(batch, 0x80808020); //Only 1 reference
750 for(i = 0; i < 7; i++) {
751 OUT_BCS_BATCH(batch, 0x80808080);
753 ADVANCE_BCS_BATCH(batch);
755 BEGIN_BCS_BATCH(batch, 10);
756 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
757 OUT_BCS_BATCH(batch, 1); //Select L1
758 OUT_BCS_BATCH(batch, 0x80808022); //Only 1 reference
759 for(i = 0; i < 7; i++) {
760 OUT_BCS_BATCH(batch, 0x80808080);
762 ADVANCE_BCS_BATCH(batch);
767 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
768 struct intel_encoder_context *encoder_context)
770 struct intel_batchbuffer *batch = encoder_context->base.batch;
771 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
773 BEGIN_BCS_BATCH(batch, 10);
775 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
776 OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
777 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
779 OUT_BCS_BATCH(batch, 0);
780 OUT_BCS_BATCH(batch, 0);
782 /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
783 OUT_BCS_BATCH(batch, 0);
784 OUT_BCS_BATCH(batch, 0);
785 OUT_BCS_BATCH(batch, 0);
787 /* the DW7-9 is for Bitplane Read Buffer Base Address */
788 OUT_BCS_BATCH(batch, 0);
789 OUT_BCS_BATCH(batch, 0);
790 OUT_BCS_BATCH(batch, 0);
792 ADVANCE_BCS_BATCH(batch);
796 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
798 struct intel_batchbuffer *batch = encoder_context->base.batch;
799 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
800 struct i965_driver_data *i965 = i965_driver_data(ctx);
802 if (IS_STEPPING_BPLUS(i965)) {
803 gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
807 BEGIN_BCS_BATCH(batch, 4);
809 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
810 OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
811 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
813 OUT_BCS_BATCH(batch, 0);
814 OUT_BCS_BATCH(batch, 0);
816 ADVANCE_BCS_BATCH(batch);
820 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
821 struct encode_state *encode_state,
822 struct intel_encoder_context *encoder_context)
824 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
826 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
827 mfc_context->set_surface_state(ctx, encoder_context);
828 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
829 gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
830 gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
831 mfc_context->avc_img_state(ctx, encode_state, encoder_context);
832 mfc_context->avc_qm_state(ctx, encoder_context);
833 mfc_context->avc_fqm_state(ctx, encoder_context);
834 gen75_mfc_avc_directmode_state(ctx, encoder_context);
835 gen75_mfc_avc_ref_idx_state(ctx, encoder_context);
839 static VAStatus gen75_mfc_avc_prepare(VADriverContextP ctx,
840 struct encode_state *encode_state,
841 struct intel_encoder_context *encoder_context)
843 struct i965_driver_data *i965 = i965_driver_data(ctx);
844 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
845 struct object_surface *obj_surface;
846 struct object_buffer *obj_buffer;
847 GenAvcSurface *gen6_avc_surface;
849 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
850 VAStatus vaStatus = VA_STATUS_SUCCESS;
851 int i, j, enable_avc_ildb = 0;
852 VAEncSliceParameterBufferH264 *slice_param;
853 VACodedBufferSegment *coded_buffer_segment;
854 unsigned char *flag = NULL;
856 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
857 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
858 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
860 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
861 assert((slice_param->slice_type == SLICE_TYPE_I) ||
862 (slice_param->slice_type == SLICE_TYPE_SI) ||
863 (slice_param->slice_type == SLICE_TYPE_P) ||
864 (slice_param->slice_type == SLICE_TYPE_SP) ||
865 (slice_param->slice_type == SLICE_TYPE_B));
867 if (slice_param->disable_deblocking_filter_idc != 1) {
876 /*Setup all the input&output object*/
878 /* Setup current frame and current direct mv buffer*/
879 obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
881 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
883 if ( obj_surface->private_data == NULL) {
884 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
885 gen6_avc_surface->dmv_top =
886 dri_bo_alloc(i965->intel.bufmgr,
890 gen6_avc_surface->dmv_bottom =
891 dri_bo_alloc(i965->intel.bufmgr,
895 assert(gen6_avc_surface->dmv_top);
896 assert(gen6_avc_surface->dmv_bottom);
897 obj_surface->private_data = (void *)gen6_avc_surface;
898 obj_surface->free_private_data = (void *)gen_free_avc_surface;
900 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
901 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
902 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
903 dri_bo_reference(gen6_avc_surface->dmv_top);
904 dri_bo_reference(gen6_avc_surface->dmv_bottom);
906 if (enable_avc_ildb) {
907 mfc_context->post_deblocking_output.bo = obj_surface->bo;
908 dri_bo_reference(mfc_context->post_deblocking_output.bo);
910 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
911 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
914 mfc_context->surface_state.width = obj_surface->orig_width;
915 mfc_context->surface_state.height = obj_surface->orig_height;
916 mfc_context->surface_state.w_pitch = obj_surface->width;
917 mfc_context->surface_state.h_pitch = obj_surface->height;
919 /* Setup reference frames and direct mv buffers*/
920 for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
921 if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) {
922 obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
924 if (obj_surface->bo != NULL) {
925 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
926 dri_bo_reference(obj_surface->bo);
928 /* Check DMV buffer */
929 if ( obj_surface->private_data == NULL) {
931 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
932 gen6_avc_surface->dmv_top =
933 dri_bo_alloc(i965->intel.bufmgr,
937 gen6_avc_surface->dmv_bottom =
938 dri_bo_alloc(i965->intel.bufmgr,
942 assert(gen6_avc_surface->dmv_top);
943 assert(gen6_avc_surface->dmv_bottom);
944 obj_surface->private_data = gen6_avc_surface;
945 obj_surface->free_private_data = gen_free_avc_surface;
948 gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
949 /* Setup DMV buffer */
950 mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
951 mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
952 dri_bo_reference(gen6_avc_surface->dmv_top);
953 dri_bo_reference(gen6_avc_surface->dmv_bottom);
959 obj_surface = SURFACE(encoder_context->input_yuv_surface);
960 assert(obj_surface && obj_surface->bo);
961 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
962 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
964 obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
965 bo = obj_buffer->buffer_store->bo;
967 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
968 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_SIZE;
969 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
970 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
973 coded_buffer_segment = (VACodedBufferSegment *)bo->virtual;
974 flag = (unsigned char *)(coded_buffer_segment + 1);
982 static VAStatus gen75_mfc_run(VADriverContextP ctx,
983 struct encode_state *encode_state,
984 struct intel_encoder_context *encoder_context)
986 struct intel_batchbuffer *batch = encoder_context->base.batch;
988 intel_batchbuffer_flush(batch); //run the pipeline
990 return VA_STATUS_SUCCESS;
995 gen75_mfc_stop(VADriverContextP ctx,
996 struct encode_state *encode_state,
997 struct intel_encoder_context *encoder_context,
998 int *encoded_bits_size)
1000 VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
1001 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1002 VACodedBufferSegment *coded_buffer_segment;
1004 vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
1005 assert(vaStatus == VA_STATUS_SUCCESS);
1006 *encoded_bits_size = coded_buffer_segment->size * 8;
1007 i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
1009 return VA_STATUS_SUCCESS;
1014 gen75_mfc_avc_slice_state(VADriverContextP ctx,
1015 VAEncPictureParameterBufferH264 *pic_param,
1016 VAEncSliceParameterBufferH264 *slice_param,
1017 struct encode_state *encode_state,
1018 struct intel_encoder_context *encoder_context,
1019 int rate_control_enable,
1021 struct intel_batchbuffer *batch)
1023 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1024 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1025 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1026 int beginmb = slice_param->macroblock_address;
1027 int endmb = beginmb + slice_param->num_macroblocks;
1028 int beginx = beginmb % width_in_mbs;
1029 int beginy = beginmb / width_in_mbs;
1030 int nextx = endmb % width_in_mbs;
1031 int nexty = endmb / width_in_mbs;
1032 int slice_type = slice_param->slice_type;
1033 int last_slice = (endmb == (width_in_mbs * height_in_mbs));
1034 int bit_rate_control_target, maxQpN, maxQpP;
1035 unsigned char correct[6], grow, shrink;
1037 int weighted_pred_idc = 0;
1038 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
1039 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
1042 batch = encoder_context->base.batch;
1044 bit_rate_control_target = slice_type;
1045 if (slice_type == SLICE_TYPE_SP)
1046 bit_rate_control_target = SLICE_TYPE_P;
1047 else if (slice_type == SLICE_TYPE_SI)
1048 bit_rate_control_target = SLICE_TYPE_I;
1050 if (slice_type == SLICE_TYPE_P) {
1051 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
1052 } else if (slice_type == SLICE_TYPE_B) {
1053 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1055 if (weighted_pred_idc == 2) {
1056 /* 8.4.3 - Derivation process for prediction weights (8-279) */
1057 luma_log2_weight_denom = 5;
1058 chroma_log2_weight_denom = 5;
1062 maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
1063 maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
1065 for (i = 0; i < 6; i++)
1066 correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
1068 grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit +
1069 (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
1070 shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit +
1071 (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
1073 BEGIN_BCS_BATCH(batch, 11);;
1075 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
1076 OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/
1078 if (slice_type == SLICE_TYPE_I) {
1079 OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/
1081 OUT_BCS_BATCH(batch,
1082 (1 << 16) | /*1 reference frame*/
1083 (chroma_log2_weight_denom << 8) |
1084 (luma_log2_weight_denom << 0));
1087 OUT_BCS_BATCH(batch,
1088 (weighted_pred_idc << 30) |
1089 (slice_param->direct_spatial_mv_pred_flag<<29) | /*Direct Prediction Type*/
1090 (slice_param->disable_deblocking_filter_idc << 27) |
1091 (slice_param->cabac_init_idc << 24) |
1092 (qp<<16) | /*Slice Quantization Parameter*/
1093 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
1094 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
1095 OUT_BCS_BATCH(batch,
1096 (beginy << 24) | /*First MB X&Y , the begin postion of current slice*/
1098 slice_param->macroblock_address );
1099 OUT_BCS_BATCH(batch, (nexty << 16) | nextx); /*Next slice first MB X&Y*/
1100 OUT_BCS_BATCH(batch,
1101 (0/*rate_control_enable*/ << 31) | /*in CBR mode RateControlCounterEnable = enable*/
1102 (1 << 30) | /*ResetRateControlCounter*/
1103 (0 << 28) | /*RC Triggle Mode = Always Rate Control*/
1104 (4 << 24) | /*RC Stable Tolerance, middle level*/
1105 (0/*rate_control_enable*/ << 23) | /*RC Panic Enable*/
1106 (0 << 22) | /*QP mode, don't modfiy CBP*/
1107 (0 << 21) | /*MB Type Direct Conversion Enabled*/
1108 (0 << 20) | /*MB Type Skip Conversion Enabled*/
1109 (last_slice << 19) | /*IsLastSlice*/
1110 (0 << 18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
1111 (1 << 17) | /*HeaderPresentFlag*/
1112 (1 << 16) | /*SliceData PresentFlag*/
1113 (1 << 15) | /*TailPresentFlag*/
1114 (1 << 13) | /*RBSP NAL TYPE*/
1115 (0 << 12) ); /*CabacZeroWordInsertionEnable*/
1116 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1117 OUT_BCS_BATCH(batch,
1118 (maxQpN << 24) | /*Target QP - 24 is lowest QP*/
1119 (maxQpP << 16) | /*Target QP + 20 is highest QP*/
1122 OUT_BCS_BATCH(batch,
1123 (correct[5] << 20) |
1124 (correct[4] << 16) |
1125 (correct[3] << 12) |
1129 OUT_BCS_BATCH(batch, 0);
1131 ADVANCE_BCS_BATCH(batch);
1135 #ifdef MFC_SOFTWARE_HASWELL
1138 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
1139 int qp,unsigned int *msg,
1140 struct intel_encoder_context *encoder_context,
1141 unsigned char target_mb_size, unsigned char max_mb_size,
1142 struct intel_batchbuffer *batch)
1144 int len_in_dwords = 12;
1145 unsigned int intra_msg;
1146 #define INTRA_MSG_FLAG (1 << 13)
1147 #define INTRA_MBTYPE_MASK (0x1F0000)
1149 batch = encoder_context->base.batch;
1151 BEGIN_BCS_BATCH(batch, len_in_dwords);
1153 intra_msg = msg[0] & 0xC0FF;
1154 intra_msg |= INTRA_MSG_FLAG;
1155 intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1156 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1157 OUT_BCS_BATCH(batch, 0);
1158 OUT_BCS_BATCH(batch, 0);
1159 OUT_BCS_BATCH(batch,
1160 (0 << 24) | /* PackedMvNum, Debug*/
1161 (0 << 20) | /* No motion vector */
1162 (1 << 19) | /* CbpDcY */
1163 (1 << 18) | /* CbpDcU */
1164 (1 << 17) | /* CbpDcV */
1167 OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x); /* Code Block Pattern for Y*/
1168 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1169 OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
1171 /*Stuff for Intra MB*/
1172 OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/
1173 OUT_BCS_BATCH(batch, msg[2]);
1174 OUT_BCS_BATCH(batch, msg[3]&0xFC);
1176 /*MaxSizeInWord and TargetSzieInWord*/
1177 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1178 (target_mb_size << 16) );
1180 OUT_BCS_BATCH(batch, 0);
1182 ADVANCE_BCS_BATCH(batch);
1184 return len_in_dwords;
1188 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1189 unsigned int *msg, unsigned int offset,
1190 struct intel_encoder_context *encoder_context,
1191 unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1192 struct intel_batchbuffer *batch)
1194 int len_in_dwords = 12;
1195 unsigned int inter_msg = 0;
1197 batch = encoder_context->base.batch;
1199 BEGIN_BCS_BATCH(batch, len_in_dwords);
1201 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1205 if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1206 if (msg[1] & SUBMB_SHAPE_MASK)
1209 OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/
1210 OUT_BCS_BATCH(batch, offset);
1211 inter_msg = msg[0] & (0x1F00FFFF);
1212 inter_msg |= INTER_MV8;
1213 if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1214 (msg[1] & SUBMB_SHAPE_MASK)) {
1215 inter_msg |= INTER_MV32;
1218 OUT_BCS_BATCH(batch, inter_msg);
1220 OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
1221 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1223 if ( slice_type == SLICE_TYPE_B) {
1224 OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp); /* Last MB */
1226 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1229 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1232 inter_msg = msg[1] >> 8;
1233 /*Stuff for Inter MB*/
1234 OUT_BCS_BATCH(batch, inter_msg);
1235 OUT_BCS_BATCH(batch, 0x0);
1236 OUT_BCS_BATCH(batch, 0x0);
1238 /*MaxSizeInWord and TargetSzieInWord*/
1239 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1240 (target_mb_size << 16) );
1242 OUT_BCS_BATCH(batch, 0x0);
1244 ADVANCE_BCS_BATCH(batch);
1246 return len_in_dwords;
1249 #define INTRA_RDO_OFFSET 4
1250 #define INTER_RDO_OFFSET 54
1251 #define INTER_MSG_OFFSET 52
1252 #define INTER_MV_OFFSET 224
1253 #define RDO_MASK 0xFFFF
1256 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1257 struct encode_state *encode_state,
1258 struct intel_encoder_context *encoder_context,
1260 struct intel_batchbuffer *slice_batch)
1262 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1263 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1264 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1265 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1266 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1267 unsigned int *msg = NULL, offset = 0;
1268 unsigned char *msg_ptr = NULL;
1269 int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
1270 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1271 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1272 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1274 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1275 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1276 unsigned char *slice_header = NULL;
1277 int slice_header_length_in_bits = 0;
1278 unsigned int tail_data[] = { 0x0, 0x0 };
1279 int slice_type = pSliceParameter->slice_type;
1282 if (rate_control_mode == VA_RC_CBR) {
1283 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1284 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1287 /* only support for 8-bit pixel bit-depth */
1288 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1289 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1290 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1291 assert(qp >= 0 && qp < 52);
1293 gen75_mfc_avc_slice_state(ctx,
1296 encode_state, encoder_context,
1297 (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1299 if ( slice_index == 0)
1300 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1302 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1305 mfc_context->insert_object(ctx, encoder_context,
1306 (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1307 5, /* first 5 bytes are start code + nal unit type */
1308 1, 0, 1, slice_batch);
1310 dri_bo_map(vme_context->vme_output.bo , 1);
1311 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1314 msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1316 msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1319 for (i = pSliceParameter->macroblock_address;
1320 i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1321 int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1322 x = i % width_in_mbs;
1323 y = i / width_in_mbs;
1324 msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1328 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1330 int inter_rdo, intra_rdo;
1331 inter_rdo = msg[INTER_RDO_OFFSET] & RDO_MASK;
1332 intra_rdo = msg[INTRA_RDO_OFFSET] & RDO_MASK;
1333 offset = i * vme_context->vme_output.size_block + INTER_MV_OFFSET;
1334 if (intra_rdo < inter_rdo) {
1335 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1337 msg += INTER_MSG_OFFSET;
1338 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1343 dri_bo_unmap(vme_context->vme_output.bo);
1346 mfc_context->insert_object(ctx, encoder_context,
1348 2, 1, 1, 0, slice_batch);
1350 mfc_context->insert_object(ctx, encoder_context,
1352 1, 1, 1, 0, slice_batch);
1360 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1361 struct encode_state *encode_state,
1362 struct intel_encoder_context *encoder_context)
1364 struct i965_driver_data *i965 = i965_driver_data(ctx);
1365 struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
1366 dri_bo *batch_bo = batch->buffer;
1369 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1370 gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1373 intel_batchbuffer_align(batch, 8);
1375 BEGIN_BCS_BATCH(batch, 2);
1376 OUT_BCS_BATCH(batch, 0);
1377 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1378 ADVANCE_BCS_BATCH(batch);
1380 dri_bo_reference(batch_bo);
1381 intel_batchbuffer_free(batch);
1389 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1390 struct encode_state *encode_state,
1391 struct intel_encoder_context *encoder_context)
1394 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1395 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1397 assert(vme_context->vme_output.bo);
1398 mfc_context->buffer_suface_setup(ctx,
1399 &mfc_context->gpe_context,
1400 &vme_context->vme_output,
1401 BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1402 SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1403 assert(mfc_context->aux_batchbuffer_surface.bo);
1404 mfc_context->buffer_suface_setup(ctx,
1405 &mfc_context->gpe_context,
1406 &mfc_context->aux_batchbuffer_surface,
1407 BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1408 SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1412 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1413 struct encode_state *encode_state,
1414 struct intel_encoder_context *encoder_context)
1417 struct i965_driver_data *i965 = i965_driver_data(ctx);
1418 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1419 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1420 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1421 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1422 mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1423 mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1424 mfc_context->mfc_batchbuffer_surface.pitch = 16;
1425 mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr,
1427 mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1429 mfc_context->buffer_suface_setup(ctx,
1430 &mfc_context->gpe_context,
1431 &mfc_context->mfc_batchbuffer_surface,
1432 BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1433 SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1437 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
1438 struct encode_state *encode_state,
1439 struct intel_encoder_context *encoder_context)
1441 gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1442 gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1446 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
1447 struct encode_state *encode_state,
1448 struct intel_encoder_context *encoder_context)
1450 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1451 struct gen6_interface_descriptor_data *desc;
1455 bo = mfc_context->gpe_context.idrt.bo;
1457 assert(bo->virtual);
1460 for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1461 struct i965_kernel *kernel;
1463 kernel = &mfc_context->gpe_context.kernels[i];
1464 assert(sizeof(*desc) == 32);
1466 /*Setup the descritor table*/
1467 memset(desc, 0, sizeof(*desc));
1468 desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1469 desc->desc2.sampler_count = 0;
1470 desc->desc2.sampler_state_pointer = 0;
1471 desc->desc3.binding_table_entry_count = 2;
1472 desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1473 desc->desc4.constant_urb_entry_read_offset = 0;
1474 desc->desc4.constant_urb_entry_read_length = 4;
1477 dri_bo_emit_reloc(bo,
1478 I915_GEM_DOMAIN_INSTRUCTION, 0,
1480 i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1489 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
1490 struct encode_state *encode_state,
1491 struct intel_encoder_context *encoder_context)
1493 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1499 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1502 int batchbuffer_offset,
1514 BEGIN_BATCH(batch, 12);
1516 OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1517 OUT_BATCH(batch, index);
1518 OUT_BATCH(batch, 0);
1519 OUT_BATCH(batch, 0);
1520 OUT_BATCH(batch, 0);
1521 OUT_BATCH(batch, 0);
1524 OUT_BATCH(batch, head_offset);
1525 OUT_BATCH(batch, batchbuffer_offset);
1530 number_mb_cmds << 16 |
1541 ADVANCE_BATCH(batch);
1545 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1546 struct intel_encoder_context *encoder_context,
1547 VAEncSliceParameterBufferH264 *slice_param,
1549 unsigned short head_size,
1550 unsigned short tail_size,
1551 int batchbuffer_offset,
1555 struct intel_batchbuffer *batch = encoder_context->base.batch;
1556 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1557 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1558 int total_mbs = slice_param->num_macroblocks;
1559 int number_mb_cmds = 128;
1560 int starting_mb = 0;
1561 int last_object = 0;
1562 int first_object = 1;
1565 int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1567 for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1568 last_object = (total_mbs - starting_mb) == number_mb_cmds;
1569 mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1570 mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1571 assert(mb_x <= 255 && mb_y <= 255);
1573 starting_mb += number_mb_cmds;
1575 gen75_mfc_batchbuffer_emit_object_command(batch,
1591 head_offset += head_size;
1592 batchbuffer_offset += head_size;
1596 head_offset += tail_size;
1597 batchbuffer_offset += tail_size;
1600 batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1607 number_mb_cmds = total_mbs % number_mb_cmds;
1608 mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1609 mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1610 assert(mb_x <= 255 && mb_y <= 255);
1611 starting_mb += number_mb_cmds;
1613 gen75_mfc_batchbuffer_emit_object_command(batch,
1631 * return size in Owords (16bytes)
1634 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1635 struct encode_state *encode_state,
1636 struct intel_encoder_context *encoder_context,
1638 int batchbuffer_offset)
1640 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1641 struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1642 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1643 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1644 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1645 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1646 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1647 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1648 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1649 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1650 unsigned char *slice_header = NULL;
1651 int slice_header_length_in_bits = 0;
1652 unsigned int tail_data[] = { 0x0, 0x0 };
1654 int old_used = intel_batchbuffer_used_size(slice_batch), used;
1655 unsigned short head_size, tail_size;
1656 int slice_type = pSliceParameter->slice_type;
1658 if (rate_control_mode == VA_RC_CBR) {
1659 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1660 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1663 /* only support for 8-bit pixel bit-depth */
1664 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1665 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1666 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1667 assert(qp >= 0 && qp < 52);
1669 head_offset = old_used / 16;
1670 gen75_mfc_avc_slice_state(ctx,
1675 (rate_control_mode == VA_RC_CBR),
1679 if (slice_index == 0)
1680 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1682 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1685 mfc_context->insert_object(ctx,
1687 (unsigned int *)slice_header,
1688 ALIGN(slice_header_length_in_bits, 32) >> 5,
1689 slice_header_length_in_bits & 0x1f,
1690 5, /* first 5 bytes are start code + nal unit type */
1697 intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1698 used = intel_batchbuffer_used_size(slice_batch);
1699 head_size = (used - old_used) / 16;
1704 mfc_context->insert_object(ctx,
1715 mfc_context->insert_object(ctx,
1727 intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1728 used = intel_batchbuffer_used_size(slice_batch);
1729 tail_size = (used - old_used) / 16;
1732 gen75_mfc_avc_batchbuffer_slice_command(ctx,
1742 return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1746 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1747 struct encode_state *encode_state,
1748 struct intel_encoder_context *encoder_context)
1750 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1751 struct intel_batchbuffer *batch = encoder_context->base.batch;
1752 int i, size, offset = 0;
1753 intel_batchbuffer_start_atomic(batch, 0x4000);
1754 gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1756 for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1757 size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1761 intel_batchbuffer_end_atomic(batch);
1762 intel_batchbuffer_flush(batch);
1766 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx,
1767 struct encode_state *encode_state,
1768 struct intel_encoder_context *encoder_context)
1770 gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1771 gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1772 gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1773 gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1777 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1778 struct encode_state *encode_state,
1779 struct intel_encoder_context *encoder_context)
1781 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1783 gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1784 dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1786 return mfc_context->mfc_batchbuffer_surface.bo;
1792 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1793 struct encode_state *encode_state,
1794 struct intel_encoder_context *encoder_context)
1796 struct intel_batchbuffer *batch = encoder_context->base.batch;
1797 dri_bo *slice_batch_bo;
1799 if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1800 fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1805 #ifdef MFC_SOFTWARE_HASWELL
1806 slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1808 slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1812 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1813 intel_batchbuffer_emit_mi_flush(batch);
1815 // picture level programing
1816 gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1818 BEGIN_BCS_BATCH(batch, 2);
1819 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1820 OUT_BCS_RELOC(batch,
1822 I915_GEM_DOMAIN_COMMAND, 0,
1824 ADVANCE_BCS_BATCH(batch);
1827 intel_batchbuffer_end_atomic(batch);
1829 dri_bo_unreference(slice_batch_bo);
1834 gen75_mfc_avc_encode_picture(VADriverContextP ctx,
1835 struct encode_state *encode_state,
1836 struct intel_encoder_context *encoder_context)
1838 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1839 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1840 int current_frame_bits_size;
1844 gen75_mfc_init(ctx, encoder_context);
1845 gen75_mfc_avc_prepare(ctx, encode_state, encoder_context);
1846 /*Programing bcs pipeline*/
1847 gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
1848 gen75_mfc_run(ctx, encode_state, encoder_context);
1849 if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1850 gen75_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
1851 sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1852 if (sts == BRC_NO_HRD_VIOLATION) {
1853 intel_mfc_hrd_context_update(encode_state, mfc_context);
1856 else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1857 if (!mfc_context->hrd.violation_noted) {
1858 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1859 mfc_context->hrd.violation_noted = 1;
1861 return VA_STATUS_SUCCESS;
1868 return VA_STATUS_SUCCESS;
1873 gen75_mfc_context_destroy(void *context)
1875 struct gen6_mfc_context *mfc_context = context;
1878 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1879 mfc_context->post_deblocking_output.bo = NULL;
1881 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1882 mfc_context->pre_deblocking_output.bo = NULL;
1884 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1885 mfc_context->uncompressed_picture_source.bo = NULL;
1887 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
1888 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1890 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1891 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1892 mfc_context->direct_mv_buffers[i].bo = NULL;
1895 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1896 mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1898 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1899 mfc_context->macroblock_status_buffer.bo = NULL;
1901 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1902 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1904 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1905 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1908 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
1909 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
1910 mfc_context->reference_surfaces[i].bo = NULL;
1913 i965_gpe_context_destroy(&mfc_context->gpe_context);
1915 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
1916 mfc_context->mfc_batchbuffer_surface.bo = NULL;
1918 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
1919 mfc_context->aux_batchbuffer_surface.bo = NULL;
1921 if (mfc_context->aux_batchbuffer)
1922 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
1924 mfc_context->aux_batchbuffer = NULL;
1929 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
1931 struct encode_state *encode_state,
1932 struct intel_encoder_context *encoder_context)
1937 case VAProfileH264Baseline:
1938 case VAProfileH264Main:
1939 case VAProfileH264High:
1940 vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
1943 /* FIXME: add for other profile */
1945 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1952 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1954 struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
1956 mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1958 mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
1959 mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
1961 mfc_context->gpe_context.curbe.length = 32 * 4;
1963 mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1964 mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
1965 mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
1966 mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
1967 mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
1969 i965_gpe_load_kernels(ctx,
1970 &mfc_context->gpe_context,
1974 mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
1975 mfc_context->set_surface_state = gen75_mfc_surface_state;
1976 mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
1977 mfc_context->avc_img_state = gen75_mfc_avc_img_state;
1978 mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
1979 mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
1980 mfc_context->insert_object = gen75_mfc_avc_insert_object;
1981 mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
1983 encoder_context->mfc_context = mfc_context;
1984 encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
1985 encoder_context->mfc_pipeline = gen75_mfc_pipeline;
1986 encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;