2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Zhao Yakui <yakui.zhao@intel.com>
26 * Xiang Haihao <haihao.xiang@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
44 #include "intel_media.h"
46 #define MFC_SOFTWARE_HASWELL 1
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
52 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
55 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
56 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
59 static struct i965_kernel gen75_mfc_kernels[] = {
61 "MFC AVC INTRA BATCHBUFFER ",
62 MFC_BATCHBUFFER_AVC_INTRA,
63 gen75_mfc_batchbuffer_avc_intra,
64 sizeof(gen75_mfc_batchbuffer_avc_intra),
69 "MFC AVC INTER BATCHBUFFER ",
70 MFC_BATCHBUFFER_AVC_INTER,
71 gen75_mfc_batchbuffer_avc_inter,
72 sizeof(gen75_mfc_batchbuffer_avc_inter),
77 #define INTER_MODE_MASK 0x03
78 #define INTER_8X8 0x03
79 #define INTER_16X8 0x01
80 #define INTER_8X16 0x02
81 #define SUBMB_SHAPE_MASK 0x00FF00
83 #define INTER_MV8 (4 << 20)
84 #define INTER_MV32 (6 << 20)
88 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
90 struct intel_encoder_context *encoder_context)
92 struct intel_batchbuffer *batch = encoder_context->base.batch;
93 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
95 assert(standard_select == MFX_FORMAT_MPEG2 ||
96 standard_select == MFX_FORMAT_AVC);
98 BEGIN_BCS_BATCH(batch, 5);
100 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
102 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
103 (MFD_MODE_VLD << 15) | /* VLD mode */
104 (1 << 10) | /* Stream-Out Enable */
105 ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
106 ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
107 (0 << 8) | /* Pre Deblocking Output */
108 (0 << 5) | /* not in stitch mode */
109 (1 << 4) | /* encoding mode */
110 (standard_select << 0)); /* standard select: avc or mpeg2 */
112 (0 << 7) | /* expand NOA bus flag */
113 (0 << 6) | /* disable slice-level clock gating */
114 (0 << 5) | /* disable clock gating for NOA */
115 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
116 (0 << 3) | /* terminate if AVC mbdata error occurs */
117 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
120 OUT_BCS_BATCH(batch, 0);
121 OUT_BCS_BATCH(batch, 0);
123 ADVANCE_BCS_BATCH(batch);
127 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
129 struct intel_batchbuffer *batch = encoder_context->base.batch;
130 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
132 BEGIN_BCS_BATCH(batch, 6);
134 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
135 OUT_BCS_BATCH(batch, 0);
137 ((mfc_context->surface_state.height - 1) << 18) |
138 ((mfc_context->surface_state.width - 1) << 4));
140 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
141 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
142 (0 << 22) | /* surface object control state, FIXME??? */
143 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
144 (0 << 2) | /* must be 0 for interleave U/V */
145 (1 << 1) | /* must be tiled */
146 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
148 (0 << 16) | /* must be 0 for interleave U/V */
149 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
150 OUT_BCS_BATCH(batch, 0);
152 ADVANCE_BCS_BATCH(batch);
156 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
157 struct intel_encoder_context *encoder_context)
159 struct intel_batchbuffer *batch = encoder_context->base.batch;
160 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
161 struct gen6_vme_context *vme_context = encoder_context->vme_context;
163 BEGIN_BCS_BATCH(batch, 26);
165 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
166 /* the DW1-3 is for the MFX indirect bistream offset */
167 OUT_BCS_BATCH(batch, 0);
168 OUT_BCS_BATCH(batch, 0);
169 OUT_BCS_BATCH(batch, 0);
170 /* the DW4-5 is the MFX upper bound */
171 OUT_BCS_BATCH(batch, 0);
172 OUT_BCS_BATCH(batch, 0);
174 /* the DW6-10 is for MFX Indirect MV Object Base Address */
175 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
176 OUT_BCS_BATCH(batch, 0);
177 OUT_BCS_BATCH(batch, 0);
178 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
179 OUT_BCS_BATCH(batch, 0);
181 /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
182 OUT_BCS_BATCH(batch, 0);
183 OUT_BCS_BATCH(batch, 0);
184 OUT_BCS_BATCH(batch, 0);
185 OUT_BCS_BATCH(batch, 0);
186 OUT_BCS_BATCH(batch, 0);
188 /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
189 OUT_BCS_BATCH(batch, 0);
190 OUT_BCS_BATCH(batch, 0);
191 OUT_BCS_BATCH(batch, 0);
192 OUT_BCS_BATCH(batch, 0);
193 OUT_BCS_BATCH(batch, 0);
195 /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/
197 mfc_context->mfc_indirect_pak_bse_object.bo,
198 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
200 OUT_BCS_BATCH(batch, 0);
201 OUT_BCS_BATCH(batch, 0);
204 mfc_context->mfc_indirect_pak_bse_object.bo,
205 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
206 mfc_context->mfc_indirect_pak_bse_object.end_offset);
207 OUT_BCS_BATCH(batch, 0);
209 ADVANCE_BCS_BATCH(batch);
213 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
215 struct intel_batchbuffer *batch = encoder_context->base.batch;
216 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
217 struct gen6_vme_context *vme_context = encoder_context->vme_context;
218 struct i965_driver_data *i965 = i965_driver_data(ctx);
220 if (IS_STEPPING_BPLUS(i965)) {
221 gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
224 BEGIN_BCS_BATCH(batch, 11);
226 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
227 OUT_BCS_BATCH(batch, 0);
228 OUT_BCS_BATCH(batch, 0);
229 /* MFX Indirect MV Object Base Address */
230 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
231 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
232 OUT_BCS_BATCH(batch, 0);
233 OUT_BCS_BATCH(batch, 0);
234 OUT_BCS_BATCH(batch, 0);
235 OUT_BCS_BATCH(batch, 0);
236 /*MFC Indirect PAK-BSE Object Base Address for Encoder*/
238 mfc_context->mfc_indirect_pak_bse_object.bo,
239 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
242 mfc_context->mfc_indirect_pak_bse_object.bo,
243 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
244 mfc_context->mfc_indirect_pak_bse_object.end_offset);
246 ADVANCE_BCS_BATCH(batch);
250 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
251 struct intel_encoder_context *encoder_context)
253 struct intel_batchbuffer *batch = encoder_context->base.batch;
254 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
255 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
257 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
258 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
260 BEGIN_BCS_BATCH(batch, 16);
262 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
264 ((width_in_mbs * height_in_mbs) & 0xFFFF));
266 ((height_in_mbs - 1) << 16) |
267 ((width_in_mbs - 1) << 0));
269 (0 << 24) | /* Second Chroma QP Offset */
270 (0 << 16) | /* Chroma QP Offset */
271 (0 << 14) | /* Max-bit conformance Intra flag */
272 (0 << 13) | /* Max Macroblock size conformance Inter flag */
273 (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) | /*Weighted_Pred_Flag */
274 (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) | /* Weighted_BiPred_Idc */
275 (0 << 8) | /* FIXME: Image Structure */
276 (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */
278 (0 << 16) | /* Mininum Frame size */
279 (0 << 15) | /* Disable reading of Macroblock Status Buffer */
280 (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */
281 (0 << 13) | /* CABAC 0 word insertion test enable */
282 (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */
283 (1 << 10) | /* Chroma Format IDC, 4:2:0 */
284 (0 << 8) | /* FIXME: MbMvFormatFlag */
285 (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/
286 (0 << 6) | /* Only valid for VLD decoding mode */
287 (0 << 5) | /* Constrained Intra Predition Flag, from PPS */
288 (0 << 4) | /* Direct 8x8 inference flag */
289 (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/
290 (1 << 2) | /* Frame MB only flag */
291 (0 << 1) | /* MBAFF mode is in active */
292 (0 << 0)); /* Field picture flag */
293 OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
294 OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
295 (0xBB8 << 16) | /* InterMbMaxSz */
296 (0xEE8) ); /* IntraMbMaxSz */
297 OUT_BCS_BATCH(batch, 0); /* Reserved */
298 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
299 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
300 OUT_BCS_BATCH(batch, 0x8C000000);
301 OUT_BCS_BATCH(batch, 0x00010000);
302 OUT_BCS_BATCH(batch, 0);
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
307 ADVANCE_BCS_BATCH(batch);
311 gen75_mfc_qm_state(VADriverContextP ctx,
315 struct intel_encoder_context *encoder_context)
317 struct intel_batchbuffer *batch = encoder_context->base.batch;
318 unsigned int qm_buffer[16];
320 assert(qm_length <= 16);
321 assert(sizeof(*qm) == 4);
322 memcpy(qm_buffer, qm, qm_length * 4);
324 BEGIN_BCS_BATCH(batch, 18);
325 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
326 OUT_BCS_BATCH(batch, qm_type << 0);
327 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
328 ADVANCE_BCS_BATCH(batch);
332 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
334 unsigned int qm[16] = {
335 0x10101010, 0x10101010, 0x10101010, 0x10101010,
336 0x10101010, 0x10101010, 0x10101010, 0x10101010,
337 0x10101010, 0x10101010, 0x10101010, 0x10101010,
338 0x10101010, 0x10101010, 0x10101010, 0x10101010
341 gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
342 gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
343 gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
344 gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
348 gen75_mfc_fqm_state(VADriverContextP ctx,
352 struct intel_encoder_context *encoder_context)
354 struct intel_batchbuffer *batch = encoder_context->base.batch;
355 unsigned int fqm_buffer[32];
357 assert(fqm_length <= 32);
358 assert(sizeof(*fqm) == 4);
359 memcpy(fqm_buffer, fqm, fqm_length * 4);
361 BEGIN_BCS_BATCH(batch, 34);
362 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
363 OUT_BCS_BATCH(batch, fqm_type << 0);
364 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
365 ADVANCE_BCS_BATCH(batch);
369 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
371 unsigned int qm[32] = {
372 0x10001000, 0x10001000, 0x10001000, 0x10001000,
373 0x10001000, 0x10001000, 0x10001000, 0x10001000,
374 0x10001000, 0x10001000, 0x10001000, 0x10001000,
375 0x10001000, 0x10001000, 0x10001000, 0x10001000,
376 0x10001000, 0x10001000, 0x10001000, 0x10001000,
377 0x10001000, 0x10001000, 0x10001000, 0x10001000,
378 0x10001000, 0x10001000, 0x10001000, 0x10001000,
379 0x10001000, 0x10001000, 0x10001000, 0x10001000
382 gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
383 gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
384 gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
385 gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
389 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
390 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
391 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
392 struct intel_batchbuffer *batch)
395 batch = encoder_context->base.batch;
397 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
399 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
401 (0 << 16) | /* always start at offset 0 */
402 (data_bits_in_last_dw << 8) |
403 (skip_emul_byte_count << 4) |
404 (!!emulation_flag << 3) |
405 ((!!is_last_header) << 2) |
406 ((!!is_end_of_slice) << 1) |
407 (0 << 0)); /* FIXME: ??? */
408 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
410 ADVANCE_BCS_BATCH(batch);
414 static void gen75_mfc_init(VADriverContextP ctx,
415 struct encode_state *encode_state,
416 struct intel_encoder_context *encoder_context)
418 struct i965_driver_data *i965 = i965_driver_data(ctx);
419 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
422 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
423 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
424 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
426 /*Encode common setup for MFC*/
427 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
428 mfc_context->post_deblocking_output.bo = NULL;
430 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
431 mfc_context->pre_deblocking_output.bo = NULL;
433 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
434 mfc_context->uncompressed_picture_source.bo = NULL;
436 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
437 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
439 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
440 if ( mfc_context->direct_mv_buffers[i].bo != NULL);
441 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
442 mfc_context->direct_mv_buffers[i].bo = NULL;
445 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
446 if (mfc_context->reference_surfaces[i].bo != NULL)
447 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
448 mfc_context->reference_surfaces[i].bo = NULL;
451 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
452 bo = dri_bo_alloc(i965->intel.bufmgr,
457 mfc_context->intra_row_store_scratch_buffer.bo = bo;
459 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
460 bo = dri_bo_alloc(i965->intel.bufmgr,
462 width_in_mbs * height_in_mbs * 16,
465 mfc_context->macroblock_status_buffer.bo = bo;
467 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
468 bo = dri_bo_alloc(i965->intel.bufmgr,
470 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */
473 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
475 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
476 bo = dri_bo_alloc(i965->intel.bufmgr,
478 2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
481 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
483 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
484 mfc_context->mfc_batchbuffer_surface.bo = NULL;
486 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
487 mfc_context->aux_batchbuffer_surface.bo = NULL;
489 if (mfc_context->aux_batchbuffer)
490 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
492 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, 0);
493 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
494 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
495 mfc_context->aux_batchbuffer_surface.pitch = 16;
496 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
497 mfc_context->aux_batchbuffer_surface.size_block = 16;
499 i965_gpe_context_init(ctx, &mfc_context->gpe_context);
503 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
504 struct intel_encoder_context *encoder_context)
506 struct intel_batchbuffer *batch = encoder_context->base.batch;
507 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
510 BEGIN_BCS_BATCH(batch, 61);
512 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
514 /* the DW1-3 is for pre_deblocking */
515 if (mfc_context->pre_deblocking_output.bo)
516 OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
517 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
520 OUT_BCS_BATCH(batch, 0); /* pre output addr */
522 OUT_BCS_BATCH(batch, 0);
523 OUT_BCS_BATCH(batch, 0);
524 /* the DW4-6 is for the post_deblocking */
526 if (mfc_context->post_deblocking_output.bo)
527 OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
528 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
529 0); /* post output addr */
531 OUT_BCS_BATCH(batch, 0);
532 OUT_BCS_BATCH(batch, 0);
533 OUT_BCS_BATCH(batch, 0);
535 /* the DW7-9 is for the uncompressed_picture */
536 OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
537 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
538 0); /* uncompressed data */
540 OUT_BCS_BATCH(batch, 0);
541 OUT_BCS_BATCH(batch, 0);
543 /* the DW10-12 is for the mb status */
544 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
545 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
546 0); /* StreamOut data*/
547 OUT_BCS_BATCH(batch, 0);
548 OUT_BCS_BATCH(batch, 0);
550 /* the DW13-15 is for the intra_row_store_scratch */
551 OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
552 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
554 OUT_BCS_BATCH(batch, 0);
555 OUT_BCS_BATCH(batch, 0);
557 /* the DW16-18 is for the deblocking filter */
558 OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
559 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
561 OUT_BCS_BATCH(batch, 0);
562 OUT_BCS_BATCH(batch, 0);
564 /* the DW 19-50 is for Reference pictures*/
565 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
566 if ( mfc_context->reference_surfaces[i].bo != NULL) {
567 OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
568 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
571 OUT_BCS_BATCH(batch, 0);
573 OUT_BCS_BATCH(batch, 0);
575 OUT_BCS_BATCH(batch, 0);
577 /* The DW 52-54 is for the MB status buffer */
578 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
579 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
580 0); /* Macroblock status buffer*/
582 OUT_BCS_BATCH(batch, 0);
583 OUT_BCS_BATCH(batch, 0);
585 /* the DW 55-57 is the ILDB buffer */
586 OUT_BCS_BATCH(batch, 0);
587 OUT_BCS_BATCH(batch, 0);
588 OUT_BCS_BATCH(batch, 0);
590 /* the DW 58-60 is the second ILDB buffer */
591 OUT_BCS_BATCH(batch, 0);
592 OUT_BCS_BATCH(batch, 0);
593 OUT_BCS_BATCH(batch, 0);
594 ADVANCE_BCS_BATCH(batch);
598 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
600 struct intel_batchbuffer *batch = encoder_context->base.batch;
601 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
602 struct i965_driver_data *i965 = i965_driver_data(ctx);
605 if (IS_STEPPING_BPLUS(i965)) {
606 gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
610 BEGIN_BCS_BATCH(batch, 25);
612 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
614 if (mfc_context->pre_deblocking_output.bo)
615 OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
616 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
619 OUT_BCS_BATCH(batch, 0); /* pre output addr */
621 if (mfc_context->post_deblocking_output.bo)
622 OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
623 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
624 0); /* post output addr */
626 OUT_BCS_BATCH(batch, 0);
628 OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
629 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
630 0); /* uncompressed data */
631 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
632 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
633 0); /* StreamOut data*/
634 OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
635 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
637 OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
638 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
640 /* 7..22 Reference pictures*/
641 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
642 if ( mfc_context->reference_surfaces[i].bo != NULL) {
643 OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
644 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
647 OUT_BCS_BATCH(batch, 0);
650 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
651 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
652 0); /* Macroblock status buffer*/
654 OUT_BCS_BATCH(batch, 0);
656 ADVANCE_BCS_BATCH(batch);
660 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
661 struct intel_encoder_context *encoder_context)
663 struct intel_batchbuffer *batch = encoder_context->base.batch;
664 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
668 BEGIN_BCS_BATCH(batch, 71);
670 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
672 /* Reference frames and Current frames */
673 /* the DW1-32 is for the direct MV for reference */
674 for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
675 if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
676 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
677 I915_GEM_DOMAIN_INSTRUCTION, 0,
679 OUT_BCS_BATCH(batch, 0);
681 OUT_BCS_BATCH(batch, 0);
682 OUT_BCS_BATCH(batch, 0);
685 OUT_BCS_BATCH(batch, 0);
687 /* the DW34-36 is the MV for the current reference */
688 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
689 I915_GEM_DOMAIN_INSTRUCTION, 0,
692 OUT_BCS_BATCH(batch, 0);
693 OUT_BCS_BATCH(batch, 0);
696 for(i = 0; i < 32; i++) {
697 OUT_BCS_BATCH(batch, i/2);
699 OUT_BCS_BATCH(batch, 0);
700 OUT_BCS_BATCH(batch, 0);
702 ADVANCE_BCS_BATCH(batch);
706 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
708 struct intel_batchbuffer *batch = encoder_context->base.batch;
709 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
710 struct i965_driver_data *i965 = i965_driver_data(ctx);
713 if (IS_STEPPING_BPLUS(i965)) {
714 gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
718 BEGIN_BCS_BATCH(batch, 69);
720 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
722 /* Reference frames and Current frames */
723 for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
724 if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
725 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
726 I915_GEM_DOMAIN_INSTRUCTION, 0,
729 OUT_BCS_BATCH(batch, 0);
734 for(i = 0; i < 32; i++) {
735 OUT_BCS_BATCH(batch, i/2);
737 OUT_BCS_BATCH(batch, 0);
738 OUT_BCS_BATCH(batch, 0);
740 ADVANCE_BCS_BATCH(batch);
744 gen75_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
746 struct intel_batchbuffer *batch = encoder_context->base.batch;
749 BEGIN_BCS_BATCH(batch, 10);
750 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
751 OUT_BCS_BATCH(batch, 0); //Select L0
752 OUT_BCS_BATCH(batch, 0x80808020); //Only 1 reference
753 for(i = 0; i < 7; i++) {
754 OUT_BCS_BATCH(batch, 0x80808080);
756 ADVANCE_BCS_BATCH(batch);
758 BEGIN_BCS_BATCH(batch, 10);
759 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
760 OUT_BCS_BATCH(batch, 1); //Select L1
761 OUT_BCS_BATCH(batch, 0x80808022); //Only 1 reference
762 for(i = 0; i < 7; i++) {
763 OUT_BCS_BATCH(batch, 0x80808080);
765 ADVANCE_BCS_BATCH(batch);
770 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
771 struct intel_encoder_context *encoder_context)
773 struct intel_batchbuffer *batch = encoder_context->base.batch;
774 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
776 BEGIN_BCS_BATCH(batch, 10);
778 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
779 OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
780 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
782 OUT_BCS_BATCH(batch, 0);
783 OUT_BCS_BATCH(batch, 0);
785 /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
786 OUT_BCS_BATCH(batch, 0);
787 OUT_BCS_BATCH(batch, 0);
788 OUT_BCS_BATCH(batch, 0);
790 /* the DW7-9 is for Bitplane Read Buffer Base Address */
791 OUT_BCS_BATCH(batch, 0);
792 OUT_BCS_BATCH(batch, 0);
793 OUT_BCS_BATCH(batch, 0);
795 ADVANCE_BCS_BATCH(batch);
799 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
801 struct intel_batchbuffer *batch = encoder_context->base.batch;
802 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
803 struct i965_driver_data *i965 = i965_driver_data(ctx);
805 if (IS_STEPPING_BPLUS(i965)) {
806 gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
810 BEGIN_BCS_BATCH(batch, 4);
812 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
813 OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
814 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
816 OUT_BCS_BATCH(batch, 0);
817 OUT_BCS_BATCH(batch, 0);
819 ADVANCE_BCS_BATCH(batch);
823 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
824 struct encode_state *encode_state,
825 struct intel_encoder_context *encoder_context)
827 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
829 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
830 mfc_context->set_surface_state(ctx, encoder_context);
831 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
832 gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
833 gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
834 mfc_context->avc_img_state(ctx, encode_state, encoder_context);
835 mfc_context->avc_qm_state(ctx, encoder_context);
836 mfc_context->avc_fqm_state(ctx, encoder_context);
837 gen75_mfc_avc_directmode_state(ctx, encoder_context);
838 gen75_mfc_avc_ref_idx_state(ctx, encoder_context);
842 static VAStatus gen75_mfc_run(VADriverContextP ctx,
843 struct encode_state *encode_state,
844 struct intel_encoder_context *encoder_context)
846 struct intel_batchbuffer *batch = encoder_context->base.batch;
848 intel_batchbuffer_flush(batch); //run the pipeline
850 return VA_STATUS_SUCCESS;
855 gen75_mfc_stop(VADriverContextP ctx,
856 struct encode_state *encode_state,
857 struct intel_encoder_context *encoder_context,
858 int *encoded_bits_size)
860 VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
861 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
862 VACodedBufferSegment *coded_buffer_segment;
864 vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
865 assert(vaStatus == VA_STATUS_SUCCESS);
866 *encoded_bits_size = coded_buffer_segment->size * 8;
867 i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
869 return VA_STATUS_SUCCESS;
874 gen75_mfc_avc_slice_state(VADriverContextP ctx,
875 VAEncPictureParameterBufferH264 *pic_param,
876 VAEncSliceParameterBufferH264 *slice_param,
877 struct encode_state *encode_state,
878 struct intel_encoder_context *encoder_context,
879 int rate_control_enable,
881 struct intel_batchbuffer *batch)
883 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
884 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
885 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
886 int beginmb = slice_param->macroblock_address;
887 int endmb = beginmb + slice_param->num_macroblocks;
888 int beginx = beginmb % width_in_mbs;
889 int beginy = beginmb / width_in_mbs;
890 int nextx = endmb % width_in_mbs;
891 int nexty = endmb / width_in_mbs;
892 int slice_type = slice_param->slice_type;
893 int last_slice = (endmb == (width_in_mbs * height_in_mbs));
894 int bit_rate_control_target, maxQpN, maxQpP;
895 unsigned char correct[6], grow, shrink;
897 int weighted_pred_idc = 0;
898 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
899 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
902 batch = encoder_context->base.batch;
904 bit_rate_control_target = slice_type;
905 if (slice_type == SLICE_TYPE_SP)
906 bit_rate_control_target = SLICE_TYPE_P;
907 else if (slice_type == SLICE_TYPE_SI)
908 bit_rate_control_target = SLICE_TYPE_I;
910 if (slice_type == SLICE_TYPE_P) {
911 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
912 } else if (slice_type == SLICE_TYPE_B) {
913 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
915 if (weighted_pred_idc == 2) {
916 /* 8.4.3 - Derivation process for prediction weights (8-279) */
917 luma_log2_weight_denom = 5;
918 chroma_log2_weight_denom = 5;
922 maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
923 maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
925 for (i = 0; i < 6; i++)
926 correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
928 grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit +
929 (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
930 shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit +
931 (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
933 BEGIN_BCS_BATCH(batch, 11);;
935 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
936 OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/
938 if (slice_type == SLICE_TYPE_I) {
939 OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/
942 (1 << 16) | /*1 reference frame*/
943 (chroma_log2_weight_denom << 8) |
944 (luma_log2_weight_denom << 0));
948 (weighted_pred_idc << 30) |
949 (slice_param->direct_spatial_mv_pred_flag<<29) | /*Direct Prediction Type*/
950 (slice_param->disable_deblocking_filter_idc << 27) |
951 (slice_param->cabac_init_idc << 24) |
952 (qp<<16) | /*Slice Quantization Parameter*/
953 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
954 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
956 (beginy << 24) | /*First MB X&Y , the begin postion of current slice*/
958 slice_param->macroblock_address );
959 OUT_BCS_BATCH(batch, (nexty << 16) | nextx); /*Next slice first MB X&Y*/
961 (0/*rate_control_enable*/ << 31) | /*in CBR mode RateControlCounterEnable = enable*/
962 (1 << 30) | /*ResetRateControlCounter*/
963 (0 << 28) | /*RC Triggle Mode = Always Rate Control*/
964 (4 << 24) | /*RC Stable Tolerance, middle level*/
965 (0/*rate_control_enable*/ << 23) | /*RC Panic Enable*/
966 (0 << 22) | /*QP mode, don't modfiy CBP*/
967 (0 << 21) | /*MB Type Direct Conversion Enabled*/
968 (0 << 20) | /*MB Type Skip Conversion Enabled*/
969 (last_slice << 19) | /*IsLastSlice*/
970 (0 << 18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
971 (1 << 17) | /*HeaderPresentFlag*/
972 (1 << 16) | /*SliceData PresentFlag*/
973 (1 << 15) | /*TailPresentFlag*/
974 (1 << 13) | /*RBSP NAL TYPE*/
975 (0 << 12) ); /*CabacZeroWordInsertionEnable*/
976 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
978 (maxQpN << 24) | /*Target QP - 24 is lowest QP*/
979 (maxQpP << 16) | /*Target QP + 20 is highest QP*/
989 OUT_BCS_BATCH(batch, 0);
991 ADVANCE_BCS_BATCH(batch);
995 #ifdef MFC_SOFTWARE_HASWELL
998 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
999 int qp,unsigned int *msg,
1000 struct intel_encoder_context *encoder_context,
1001 unsigned char target_mb_size, unsigned char max_mb_size,
1002 struct intel_batchbuffer *batch)
1004 int len_in_dwords = 12;
1005 unsigned int intra_msg;
1006 #define INTRA_MSG_FLAG (1 << 13)
1007 #define INTRA_MBTYPE_MASK (0x1F0000)
1009 batch = encoder_context->base.batch;
1011 BEGIN_BCS_BATCH(batch, len_in_dwords);
1013 intra_msg = msg[0] & 0xC0FF;
1014 intra_msg |= INTRA_MSG_FLAG;
1015 intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1016 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1017 OUT_BCS_BATCH(batch, 0);
1018 OUT_BCS_BATCH(batch, 0);
1019 OUT_BCS_BATCH(batch,
1020 (0 << 24) | /* PackedMvNum, Debug*/
1021 (0 << 20) | /* No motion vector */
1022 (1 << 19) | /* CbpDcY */
1023 (1 << 18) | /* CbpDcU */
1024 (1 << 17) | /* CbpDcV */
1027 OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x); /* Code Block Pattern for Y*/
1028 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1029 OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
1031 /*Stuff for Intra MB*/
1032 OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/
1033 OUT_BCS_BATCH(batch, msg[2]);
1034 OUT_BCS_BATCH(batch, msg[3]&0xFF);
1036 /*MaxSizeInWord and TargetSzieInWord*/
1037 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1038 (target_mb_size << 16) );
1040 OUT_BCS_BATCH(batch, 0);
1042 ADVANCE_BCS_BATCH(batch);
1044 return len_in_dwords;
1048 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1049 unsigned int *msg, unsigned int offset,
1050 struct intel_encoder_context *encoder_context,
1051 unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1052 struct intel_batchbuffer *batch)
1054 int len_in_dwords = 12;
1055 unsigned int inter_msg = 0;
1057 batch = encoder_context->base.batch;
1059 #define MSG_MV_OFFSET 4
1060 unsigned int *mv_ptr;
1061 mv_ptr = msg + MSG_MV_OFFSET;
1062 /* MV of VME output is based on 16 sub-blocks. So it is necessary
1063 * to convert them to be compatible with the format of AVC_PAK
1066 if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
1067 /* MV[0] and MV[2] are replicated */
1068 mv_ptr[4] = mv_ptr[0];
1069 mv_ptr[5] = mv_ptr[1];
1070 mv_ptr[2] = mv_ptr[8];
1071 mv_ptr[3] = mv_ptr[9];
1072 mv_ptr[6] = mv_ptr[8];
1073 mv_ptr[7] = mv_ptr[9];
1074 } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
1075 /* MV[0] and MV[1] are replicated */
1076 mv_ptr[2] = mv_ptr[0];
1077 mv_ptr[3] = mv_ptr[1];
1078 mv_ptr[4] = mv_ptr[16];
1079 mv_ptr[5] = mv_ptr[17];
1080 mv_ptr[6] = mv_ptr[24];
1081 mv_ptr[7] = mv_ptr[25];
1082 } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1083 !(msg[1] & SUBMB_SHAPE_MASK)) {
1084 /* Don't touch MV[0] or MV[1] */
1085 mv_ptr[2] = mv_ptr[8];
1086 mv_ptr[3] = mv_ptr[9];
1087 mv_ptr[4] = mv_ptr[16];
1088 mv_ptr[5] = mv_ptr[17];
1089 mv_ptr[6] = mv_ptr[24];
1090 mv_ptr[7] = mv_ptr[25];
1094 BEGIN_BCS_BATCH(batch, len_in_dwords);
1096 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1100 if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1101 if (msg[1] & SUBMB_SHAPE_MASK)
1104 OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/
1105 OUT_BCS_BATCH(batch, offset);
1106 inter_msg = msg[0] & (0x1F00FFFF);
1107 inter_msg |= INTER_MV8;
1108 inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
1109 if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1110 (msg[1] & SUBMB_SHAPE_MASK)) {
1111 inter_msg |= INTER_MV32;
1114 OUT_BCS_BATCH(batch, inter_msg);
1116 OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
1117 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1119 if ( slice_type == SLICE_TYPE_B) {
1120 OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp); /* Last MB */
1122 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1125 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1128 inter_msg = msg[1] >> 8;
1129 /*Stuff for Inter MB*/
1130 OUT_BCS_BATCH(batch, inter_msg);
1131 OUT_BCS_BATCH(batch, 0x0);
1132 OUT_BCS_BATCH(batch, 0x0);
1134 /*MaxSizeInWord and TargetSzieInWord*/
1135 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1136 (target_mb_size << 16) );
1138 OUT_BCS_BATCH(batch, 0x0);
1140 ADVANCE_BCS_BATCH(batch);
1142 return len_in_dwords;
1145 #define INTRA_RDO_OFFSET 4
1146 #define INTER_RDO_OFFSET 54
1147 #define INTER_MSG_OFFSET 52
1148 #define INTER_MV_OFFSET 224
1149 #define RDO_MASK 0xFFFF
1152 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1153 struct encode_state *encode_state,
1154 struct intel_encoder_context *encoder_context,
1156 struct intel_batchbuffer *slice_batch)
1158 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1159 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1160 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1161 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1162 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1163 unsigned int *msg = NULL, offset = 0;
1164 unsigned char *msg_ptr = NULL;
1165 int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
1166 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1167 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1168 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1170 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1171 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1172 unsigned char *slice_header = NULL;
1173 int slice_header_length_in_bits = 0;
1174 unsigned int tail_data[] = { 0x0, 0x0 };
1175 int slice_type = pSliceParameter->slice_type;
1178 if (rate_control_mode == VA_RC_CBR) {
1179 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1180 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1183 /* only support for 8-bit pixel bit-depth */
1184 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1185 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1186 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1187 assert(qp >= 0 && qp < 52);
1189 gen75_mfc_avc_slice_state(ctx,
1192 encode_state, encoder_context,
1193 (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1195 if ( slice_index == 0)
1196 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1198 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1201 mfc_context->insert_object(ctx, encoder_context,
1202 (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1203 5, /* first 5 bytes are start code + nal unit type */
1204 1, 0, 1, slice_batch);
1206 dri_bo_map(vme_context->vme_output.bo , 1);
1207 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1210 msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1212 msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1215 for (i = pSliceParameter->macroblock_address;
1216 i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1217 int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1218 x = i % width_in_mbs;
1219 y = i / width_in_mbs;
1220 msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1224 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1226 int inter_rdo, intra_rdo;
1227 inter_rdo = msg[INTER_RDO_OFFSET] & RDO_MASK;
1228 intra_rdo = msg[INTRA_RDO_OFFSET] & RDO_MASK;
1229 offset = i * vme_context->vme_output.size_block + INTER_MV_OFFSET;
1230 if (intra_rdo < inter_rdo) {
1231 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1233 msg += INTER_MSG_OFFSET;
1234 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1239 dri_bo_unmap(vme_context->vme_output.bo);
1242 mfc_context->insert_object(ctx, encoder_context,
1244 2, 1, 1, 0, slice_batch);
1246 mfc_context->insert_object(ctx, encoder_context,
1248 1, 1, 1, 0, slice_batch);
1256 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1257 struct encode_state *encode_state,
1258 struct intel_encoder_context *encoder_context)
1260 struct i965_driver_data *i965 = i965_driver_data(ctx);
1261 struct intel_batchbuffer *batch;
1265 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1266 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1267 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1269 buffer_size = width_in_mbs * height_in_mbs * 64;
1270 batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, buffer_size);
1271 batch_bo = batch->buffer;
1272 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1273 gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1276 intel_batchbuffer_align(batch, 8);
1278 BEGIN_BCS_BATCH(batch, 2);
1279 OUT_BCS_BATCH(batch, 0);
1280 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1281 ADVANCE_BCS_BATCH(batch);
1283 dri_bo_reference(batch_bo);
1284 intel_batchbuffer_free(batch);
1292 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1293 struct encode_state *encode_state,
1294 struct intel_encoder_context *encoder_context)
1297 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1298 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1300 assert(vme_context->vme_output.bo);
1301 mfc_context->buffer_suface_setup(ctx,
1302 &mfc_context->gpe_context,
1303 &vme_context->vme_output,
1304 BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1305 SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1306 assert(mfc_context->aux_batchbuffer_surface.bo);
1307 mfc_context->buffer_suface_setup(ctx,
1308 &mfc_context->gpe_context,
1309 &mfc_context->aux_batchbuffer_surface,
1310 BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1311 SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1315 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1316 struct encode_state *encode_state,
1317 struct intel_encoder_context *encoder_context)
1320 struct i965_driver_data *i965 = i965_driver_data(ctx);
1321 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1322 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1323 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1324 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1325 mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1326 mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1327 mfc_context->mfc_batchbuffer_surface.pitch = 16;
1328 mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr,
1330 mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1332 mfc_context->buffer_suface_setup(ctx,
1333 &mfc_context->gpe_context,
1334 &mfc_context->mfc_batchbuffer_surface,
1335 BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1336 SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1340 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
1341 struct encode_state *encode_state,
1342 struct intel_encoder_context *encoder_context)
1344 gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1345 gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1349 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
1350 struct encode_state *encode_state,
1351 struct intel_encoder_context *encoder_context)
1353 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1354 struct gen6_interface_descriptor_data *desc;
1358 bo = mfc_context->gpe_context.idrt.bo;
1360 assert(bo->virtual);
1363 for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1364 struct i965_kernel *kernel;
1366 kernel = &mfc_context->gpe_context.kernels[i];
1367 assert(sizeof(*desc) == 32);
1369 /*Setup the descritor table*/
1370 memset(desc, 0, sizeof(*desc));
1371 desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1372 desc->desc2.sampler_count = 0;
1373 desc->desc2.sampler_state_pointer = 0;
1374 desc->desc3.binding_table_entry_count = 2;
1375 desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1376 desc->desc4.constant_urb_entry_read_offset = 0;
1377 desc->desc4.constant_urb_entry_read_length = 4;
1380 dri_bo_emit_reloc(bo,
1381 I915_GEM_DOMAIN_INSTRUCTION, 0,
1383 i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1392 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
1393 struct encode_state *encode_state,
1394 struct intel_encoder_context *encoder_context)
1396 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1402 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1405 int batchbuffer_offset,
1417 BEGIN_BATCH(batch, 12);
1419 OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1420 OUT_BATCH(batch, index);
1421 OUT_BATCH(batch, 0);
1422 OUT_BATCH(batch, 0);
1423 OUT_BATCH(batch, 0);
1424 OUT_BATCH(batch, 0);
1427 OUT_BATCH(batch, head_offset);
1428 OUT_BATCH(batch, batchbuffer_offset);
1433 number_mb_cmds << 16 |
1444 ADVANCE_BATCH(batch);
1448 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1449 struct intel_encoder_context *encoder_context,
1450 VAEncSliceParameterBufferH264 *slice_param,
1452 unsigned short head_size,
1453 unsigned short tail_size,
1454 int batchbuffer_offset,
1458 struct intel_batchbuffer *batch = encoder_context->base.batch;
1459 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1460 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1461 int total_mbs = slice_param->num_macroblocks;
1462 int number_mb_cmds = 128;
1463 int starting_mb = 0;
1464 int last_object = 0;
1465 int first_object = 1;
1468 int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1470 for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1471 last_object = (total_mbs - starting_mb) == number_mb_cmds;
1472 mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1473 mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1474 assert(mb_x <= 255 && mb_y <= 255);
1476 starting_mb += number_mb_cmds;
1478 gen75_mfc_batchbuffer_emit_object_command(batch,
1494 head_offset += head_size;
1495 batchbuffer_offset += head_size;
1499 head_offset += tail_size;
1500 batchbuffer_offset += tail_size;
1503 batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1510 number_mb_cmds = total_mbs % number_mb_cmds;
1511 mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1512 mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1513 assert(mb_x <= 255 && mb_y <= 255);
1514 starting_mb += number_mb_cmds;
1516 gen75_mfc_batchbuffer_emit_object_command(batch,
1534 * return size in Owords (16bytes)
1537 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1538 struct encode_state *encode_state,
1539 struct intel_encoder_context *encoder_context,
1541 int batchbuffer_offset)
1543 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1544 struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1545 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1546 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1547 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1548 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1549 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1550 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1551 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1552 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1553 unsigned char *slice_header = NULL;
1554 int slice_header_length_in_bits = 0;
1555 unsigned int tail_data[] = { 0x0, 0x0 };
1557 int old_used = intel_batchbuffer_used_size(slice_batch), used;
1558 unsigned short head_size, tail_size;
1559 int slice_type = pSliceParameter->slice_type;
1561 if (rate_control_mode == VA_RC_CBR) {
1562 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1563 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1566 /* only support for 8-bit pixel bit-depth */
1567 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1568 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1569 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1570 assert(qp >= 0 && qp < 52);
1572 head_offset = old_used / 16;
1573 gen75_mfc_avc_slice_state(ctx,
1578 (rate_control_mode == VA_RC_CBR),
1582 if (slice_index == 0)
1583 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1585 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1588 mfc_context->insert_object(ctx,
1590 (unsigned int *)slice_header,
1591 ALIGN(slice_header_length_in_bits, 32) >> 5,
1592 slice_header_length_in_bits & 0x1f,
1593 5, /* first 5 bytes are start code + nal unit type */
1600 intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1601 used = intel_batchbuffer_used_size(slice_batch);
1602 head_size = (used - old_used) / 16;
1607 mfc_context->insert_object(ctx,
1618 mfc_context->insert_object(ctx,
1630 intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1631 used = intel_batchbuffer_used_size(slice_batch);
1632 tail_size = (used - old_used) / 16;
1635 gen75_mfc_avc_batchbuffer_slice_command(ctx,
1645 return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1649 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1650 struct encode_state *encode_state,
1651 struct intel_encoder_context *encoder_context)
1653 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1654 struct intel_batchbuffer *batch = encoder_context->base.batch;
1655 int i, size, offset = 0;
1656 intel_batchbuffer_start_atomic(batch, 0x4000);
1657 gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1659 for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1660 size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1664 intel_batchbuffer_end_atomic(batch);
1665 intel_batchbuffer_flush(batch);
1669 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx,
1670 struct encode_state *encode_state,
1671 struct intel_encoder_context *encoder_context)
1673 gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1674 gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1675 gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1676 gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1680 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1681 struct encode_state *encode_state,
1682 struct intel_encoder_context *encoder_context)
1684 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1686 gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1687 dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1689 return mfc_context->mfc_batchbuffer_surface.bo;
1695 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1696 struct encode_state *encode_state,
1697 struct intel_encoder_context *encoder_context)
1699 struct intel_batchbuffer *batch = encoder_context->base.batch;
1700 dri_bo *slice_batch_bo;
1702 if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1703 fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1708 #ifdef MFC_SOFTWARE_HASWELL
1709 slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1711 slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1715 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1716 intel_batchbuffer_emit_mi_flush(batch);
1718 // picture level programing
1719 gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1721 BEGIN_BCS_BATCH(batch, 2);
1722 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1723 OUT_BCS_RELOC(batch,
1725 I915_GEM_DOMAIN_COMMAND, 0,
1727 ADVANCE_BCS_BATCH(batch);
1730 intel_batchbuffer_end_atomic(batch);
1732 dri_bo_unreference(slice_batch_bo);
1737 gen75_mfc_avc_encode_picture(VADriverContextP ctx,
1738 struct encode_state *encode_state,
1739 struct intel_encoder_context *encoder_context)
1741 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1742 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1743 int current_frame_bits_size;
1747 gen75_mfc_init(ctx, encode_state, encoder_context);
1748 intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
1749 /*Programing bcs pipeline*/
1750 gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
1751 gen75_mfc_run(ctx, encode_state, encoder_context);
1752 if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1753 gen75_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
1754 sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1755 if (sts == BRC_NO_HRD_VIOLATION) {
1756 intel_mfc_hrd_context_update(encode_state, mfc_context);
1759 else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1760 if (!mfc_context->hrd.violation_noted) {
1761 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1762 mfc_context->hrd.violation_noted = 1;
1764 return VA_STATUS_SUCCESS;
1771 return VA_STATUS_SUCCESS;
1776 gen75_mfc_context_destroy(void *context)
1778 struct gen6_mfc_context *mfc_context = context;
1781 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1782 mfc_context->post_deblocking_output.bo = NULL;
1784 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1785 mfc_context->pre_deblocking_output.bo = NULL;
1787 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1788 mfc_context->uncompressed_picture_source.bo = NULL;
1790 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
1791 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1793 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1794 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1795 mfc_context->direct_mv_buffers[i].bo = NULL;
1798 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1799 mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1801 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1802 mfc_context->macroblock_status_buffer.bo = NULL;
1804 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1805 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1807 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1808 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1811 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
1812 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
1813 mfc_context->reference_surfaces[i].bo = NULL;
1816 i965_gpe_context_destroy(&mfc_context->gpe_context);
1818 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
1819 mfc_context->mfc_batchbuffer_surface.bo = NULL;
1821 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
1822 mfc_context->aux_batchbuffer_surface.bo = NULL;
1824 if (mfc_context->aux_batchbuffer)
1825 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
1827 mfc_context->aux_batchbuffer = NULL;
1832 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
1834 struct encode_state *encode_state,
1835 struct intel_encoder_context *encoder_context)
1840 case VAProfileH264Baseline:
1841 case VAProfileH264Main:
1842 case VAProfileH264High:
1843 vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
1846 /* FIXME: add for other profile */
1848 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1855 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1857 struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
1859 mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1861 mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
1862 mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
1864 mfc_context->gpe_context.curbe.length = 32 * 4;
1866 mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1867 mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
1868 mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
1869 mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
1870 mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
1872 i965_gpe_load_kernels(ctx,
1873 &mfc_context->gpe_context,
1877 mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
1878 mfc_context->set_surface_state = gen75_mfc_surface_state;
1879 mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
1880 mfc_context->avc_img_state = gen75_mfc_avc_img_state;
1881 mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
1882 mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
1883 mfc_context->insert_object = gen75_mfc_avc_insert_object;
1884 mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
1886 encoder_context->mfc_context = mfc_context;
1887 encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
1888 encoder_context->mfc_pipeline = gen75_mfc_pipeline;
1889 encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;