2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Zhao Yakui <yakui.zhao@intel.com>
26 * Xiang Haihao <haihao.xiang@intel.com>
36 #include "intel_batchbuffer.h"
37 #include "i965_defines.h"
38 #include "i965_structs.h"
39 #include "i965_drv_video.h"
40 #include "i965_encoder.h"
41 #include "i965_encoder_utils.h"
45 #define MFC_SOFTWARE_HASWELL 1
48 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50 static const uint32_t gen75_mfc_batchbuffer_avc_intra[][4] = {
51 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
54 static const uint32_t gen75_mfc_batchbuffer_avc_inter[][4] = {
55 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
58 static struct i965_kernel gen75_mfc_kernels[] = {
60 "MFC AVC INTRA BATCHBUFFER ",
61 MFC_BATCHBUFFER_AVC_INTRA,
62 gen75_mfc_batchbuffer_avc_intra,
63 sizeof(gen75_mfc_batchbuffer_avc_intra),
68 "MFC AVC INTER BATCHBUFFER ",
69 MFC_BATCHBUFFER_AVC_INTER,
70 gen75_mfc_batchbuffer_avc_inter,
71 sizeof(gen75_mfc_batchbuffer_avc_inter),
76 #define INTER_MODE_MASK 0x03
77 #define INTER_8X8 0x03
78 #define SUBMB_SHAPE_MASK 0x00FF00
80 #define INTER_MV8 (4 << 20)
81 #define INTER_MV32 (6 << 20)
85 gen75_mfc_pipe_mode_select(VADriverContextP ctx,
87 struct intel_encoder_context *encoder_context)
89 struct intel_batchbuffer *batch = encoder_context->base.batch;
90 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
92 assert(standard_select == MFX_FORMAT_MPEG2 ||
93 standard_select == MFX_FORMAT_AVC);
95 BEGIN_BCS_BATCH(batch, 5);
97 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
99 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
100 (MFD_MODE_VLD << 15) | /* VLD mode */
101 (1 << 10) | /* Stream-Out Enable */
102 ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
103 ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
104 (0 << 8) | /* Pre Deblocking Output */
105 (0 << 5) | /* not in stitch mode */
106 (1 << 4) | /* encoding mode */
107 (standard_select << 0)); /* standard select: avc or mpeg2 */
109 (0 << 7) | /* expand NOA bus flag */
110 (0 << 6) | /* disable slice-level clock gating */
111 (0 << 5) | /* disable clock gating for NOA */
112 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
113 (0 << 3) | /* terminate if AVC mbdata error occurs */
114 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
117 OUT_BCS_BATCH(batch, 0);
118 OUT_BCS_BATCH(batch, 0);
120 ADVANCE_BCS_BATCH(batch);
124 gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
126 struct intel_batchbuffer *batch = encoder_context->base.batch;
127 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
129 BEGIN_BCS_BATCH(batch, 6);
131 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
132 OUT_BCS_BATCH(batch, 0);
134 ((mfc_context->surface_state.height - 1) << 18) |
135 ((mfc_context->surface_state.width - 1) << 4));
137 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
138 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
139 (0 << 22) | /* surface object control state, FIXME??? */
140 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
141 (0 << 2) | /* must be 0 for interleave U/V */
142 (1 << 1) | /* must be tiled */
143 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
145 (0 << 16) | /* must be 0 for interleave U/V */
146 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
147 OUT_BCS_BATCH(batch, 0);
149 ADVANCE_BCS_BATCH(batch);
153 gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
154 struct intel_encoder_context *encoder_context)
156 struct intel_batchbuffer *batch = encoder_context->base.batch;
157 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
158 struct gen6_vme_context *vme_context = encoder_context->vme_context;
160 BEGIN_BCS_BATCH(batch, 26);
162 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
163 /* the DW1-3 is for the MFX indirect bistream offset */
164 OUT_BCS_BATCH(batch, 0);
165 OUT_BCS_BATCH(batch, 0);
166 OUT_BCS_BATCH(batch, 0);
167 /* the DW4-5 is the MFX upper bound */
168 OUT_BCS_BATCH(batch, 0);
169 OUT_BCS_BATCH(batch, 0);
171 /* the DW6-10 is for MFX Indirect MV Object Base Address */
172 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
173 OUT_BCS_BATCH(batch, 0);
174 OUT_BCS_BATCH(batch, 0);
175 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
176 OUT_BCS_BATCH(batch, 0);
178 /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
179 OUT_BCS_BATCH(batch, 0);
180 OUT_BCS_BATCH(batch, 0);
181 OUT_BCS_BATCH(batch, 0);
182 OUT_BCS_BATCH(batch, 0);
183 OUT_BCS_BATCH(batch, 0);
185 /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
186 OUT_BCS_BATCH(batch, 0);
187 OUT_BCS_BATCH(batch, 0);
188 OUT_BCS_BATCH(batch, 0);
189 OUT_BCS_BATCH(batch, 0);
190 OUT_BCS_BATCH(batch, 0);
192 /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/
194 mfc_context->mfc_indirect_pak_bse_object.bo,
195 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
197 OUT_BCS_BATCH(batch, 0);
198 OUT_BCS_BATCH(batch, 0);
201 mfc_context->mfc_indirect_pak_bse_object.bo,
202 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
203 mfc_context->mfc_indirect_pak_bse_object.end_offset);
204 OUT_BCS_BATCH(batch, 0);
206 ADVANCE_BCS_BATCH(batch);
210 gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
212 struct intel_batchbuffer *batch = encoder_context->base.batch;
213 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
214 struct gen6_vme_context *vme_context = encoder_context->vme_context;
215 struct i965_driver_data *i965 = i965_driver_data(ctx);
217 if (IS_STEPPING_BPLUS(i965)) {
218 gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
221 BEGIN_BCS_BATCH(batch, 11);
223 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
224 OUT_BCS_BATCH(batch, 0);
225 OUT_BCS_BATCH(batch, 0);
226 /* MFX Indirect MV Object Base Address */
227 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
228 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
229 OUT_BCS_BATCH(batch, 0);
230 OUT_BCS_BATCH(batch, 0);
231 OUT_BCS_BATCH(batch, 0);
232 OUT_BCS_BATCH(batch, 0);
233 /*MFC Indirect PAK-BSE Object Base Address for Encoder*/
235 mfc_context->mfc_indirect_pak_bse_object.bo,
236 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
239 mfc_context->mfc_indirect_pak_bse_object.bo,
240 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
241 mfc_context->mfc_indirect_pak_bse_object.end_offset);
243 ADVANCE_BCS_BATCH(batch);
247 gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
248 struct intel_encoder_context *encoder_context)
250 struct intel_batchbuffer *batch = encoder_context->base.batch;
251 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
252 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
254 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
255 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
257 BEGIN_BCS_BATCH(batch, 16);
259 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
261 ((width_in_mbs * height_in_mbs) & 0xFFFF));
263 ((height_in_mbs - 1) << 16) |
264 ((width_in_mbs - 1) << 0));
266 (0 << 24) | /* Second Chroma QP Offset */
267 (0 << 16) | /* Chroma QP Offset */
268 (0 << 14) | /* Max-bit conformance Intra flag */
269 (0 << 13) | /* Max Macroblock size conformance Inter flag */
270 (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) | /*Weighted_Pred_Flag */
271 (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) | /* Weighted_BiPred_Idc */
272 (0 << 8) | /* FIXME: Image Structure */
273 (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */
275 (0 << 16) | /* Mininum Frame size */
276 (0 << 15) | /* Disable reading of Macroblock Status Buffer */
277 (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */
278 (0 << 13) | /* CABAC 0 word insertion test enable */
279 (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */
280 (1 << 10) | /* Chroma Format IDC, 4:2:0 */
281 (0 << 8) | /* FIXME: MbMvFormatFlag */
282 (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/
283 (0 << 6) | /* Only valid for VLD decoding mode */
284 (0 << 5) | /* Constrained Intra Predition Flag, from PPS */
285 (0 << 4) | /* Direct 8x8 inference flag */
286 (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/
287 (1 << 2) | /* Frame MB only flag */
288 (0 << 1) | /* MBAFF mode is in active */
289 (0 << 0)); /* Field picture flag */
290 OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
291 OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
292 (0xBB8 << 16) | /* InterMbMaxSz */
293 (0xEE8) ); /* IntraMbMaxSz */
294 OUT_BCS_BATCH(batch, 0); /* Reserved */
295 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
296 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
297 OUT_BCS_BATCH(batch, 0x8C000000);
298 OUT_BCS_BATCH(batch, 0x00010000);
299 OUT_BCS_BATCH(batch, 0);
300 OUT_BCS_BATCH(batch, 0);
301 OUT_BCS_BATCH(batch, 0);
302 OUT_BCS_BATCH(batch, 0);
304 ADVANCE_BCS_BATCH(batch);
308 gen75_mfc_qm_state(VADriverContextP ctx,
312 struct intel_encoder_context *encoder_context)
314 struct intel_batchbuffer *batch = encoder_context->base.batch;
315 unsigned int qm_buffer[16];
317 assert(qm_length <= 16);
318 assert(sizeof(*qm) == 4);
319 memcpy(qm_buffer, qm, qm_length * 4);
321 BEGIN_BCS_BATCH(batch, 18);
322 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
323 OUT_BCS_BATCH(batch, qm_type << 0);
324 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
325 ADVANCE_BCS_BATCH(batch);
329 gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
331 unsigned int qm[16] = {
332 0x10101010, 0x10101010, 0x10101010, 0x10101010,
333 0x10101010, 0x10101010, 0x10101010, 0x10101010,
334 0x10101010, 0x10101010, 0x10101010, 0x10101010,
335 0x10101010, 0x10101010, 0x10101010, 0x10101010
338 gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
339 gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
340 gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
341 gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
345 gen75_mfc_fqm_state(VADriverContextP ctx,
349 struct intel_encoder_context *encoder_context)
351 struct intel_batchbuffer *batch = encoder_context->base.batch;
352 unsigned int fqm_buffer[32];
354 assert(fqm_length <= 32);
355 assert(sizeof(*fqm) == 4);
356 memcpy(fqm_buffer, fqm, fqm_length * 4);
358 BEGIN_BCS_BATCH(batch, 34);
359 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
360 OUT_BCS_BATCH(batch, fqm_type << 0);
361 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
362 ADVANCE_BCS_BATCH(batch);
366 gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
368 unsigned int qm[32] = {
369 0x10001000, 0x10001000, 0x10001000, 0x10001000,
370 0x10001000, 0x10001000, 0x10001000, 0x10001000,
371 0x10001000, 0x10001000, 0x10001000, 0x10001000,
372 0x10001000, 0x10001000, 0x10001000, 0x10001000,
373 0x10001000, 0x10001000, 0x10001000, 0x10001000,
374 0x10001000, 0x10001000, 0x10001000, 0x10001000,
375 0x10001000, 0x10001000, 0x10001000, 0x10001000,
376 0x10001000, 0x10001000, 0x10001000, 0x10001000
379 gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
380 gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
381 gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
382 gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
386 gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
387 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
388 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
389 struct intel_batchbuffer *batch)
392 batch = encoder_context->base.batch;
394 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
396 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
398 (0 << 16) | /* always start at offset 0 */
399 (data_bits_in_last_dw << 8) |
400 (skip_emul_byte_count << 4) |
401 (!!emulation_flag << 3) |
402 ((!!is_last_header) << 2) |
403 ((!!is_end_of_slice) << 1) |
404 (0 << 0)); /* FIXME: ??? */
405 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
407 ADVANCE_BCS_BATCH(batch);
413 gen75_mfc_free_avc_surface(void **data)
415 struct gen6_mfc_avc_surface_aux *avc_surface = *data;
420 dri_bo_unreference(avc_surface->dmv_top);
421 avc_surface->dmv_top = NULL;
422 dri_bo_unreference(avc_surface->dmv_bottom);
423 avc_surface->dmv_bottom = NULL;
429 static void gen75_mfc_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
431 struct i965_driver_data *i965 = i965_driver_data(ctx);
432 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
436 /*Encode common setup for MFC*/
437 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
438 mfc_context->post_deblocking_output.bo = NULL;
440 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
441 mfc_context->pre_deblocking_output.bo = NULL;
443 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
444 mfc_context->uncompressed_picture_source.bo = NULL;
446 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
447 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
449 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
450 if ( mfc_context->direct_mv_buffers[i].bo != NULL);
451 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
452 mfc_context->direct_mv_buffers[i].bo = NULL;
455 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
456 if (mfc_context->reference_surfaces[i].bo != NULL)
457 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
458 mfc_context->reference_surfaces[i].bo = NULL;
461 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
462 bo = dri_bo_alloc(i965->intel.bufmgr,
467 mfc_context->intra_row_store_scratch_buffer.bo = bo;
469 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
470 bo = dri_bo_alloc(i965->intel.bufmgr,
475 mfc_context->macroblock_status_buffer.bo = bo;
477 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
478 bo = dri_bo_alloc(i965->intel.bufmgr,
480 49152, /* 6 * 128 * 64 */
483 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
485 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
486 bo = dri_bo_alloc(i965->intel.bufmgr,
488 12288, /* 1.5 * 128 * 64 */
491 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
493 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
494 mfc_context->mfc_batchbuffer_surface.bo = NULL;
496 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
497 mfc_context->aux_batchbuffer_surface.bo = NULL;
499 if (mfc_context->aux_batchbuffer)
500 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
502 mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
503 mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
504 dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
505 mfc_context->aux_batchbuffer_surface.pitch = 16;
506 mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
507 mfc_context->aux_batchbuffer_surface.size_block = 16;
509 i965_gpe_context_init(ctx, &mfc_context->gpe_context);
513 gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
514 struct intel_encoder_context *encoder_context)
516 struct intel_batchbuffer *batch = encoder_context->base.batch;
517 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
520 BEGIN_BCS_BATCH(batch, 61);
522 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
524 /* the DW1-3 is for pre_deblocking */
525 if (mfc_context->pre_deblocking_output.bo)
526 OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
527 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
530 OUT_BCS_BATCH(batch, 0); /* pre output addr */
532 OUT_BCS_BATCH(batch, 0);
533 OUT_BCS_BATCH(batch, 0);
534 /* the DW4-6 is for the post_deblocking */
536 if (mfc_context->post_deblocking_output.bo)
537 OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
538 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
539 0); /* post output addr */
541 OUT_BCS_BATCH(batch, 0);
542 OUT_BCS_BATCH(batch, 0);
543 OUT_BCS_BATCH(batch, 0);
545 /* the DW7-9 is for the uncompressed_picture */
546 OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
547 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
548 0); /* uncompressed data */
550 OUT_BCS_BATCH(batch, 0);
551 OUT_BCS_BATCH(batch, 0);
553 /* the DW10-12 is for the mb status */
554 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
555 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
556 0); /* StreamOut data*/
557 OUT_BCS_BATCH(batch, 0);
558 OUT_BCS_BATCH(batch, 0);
560 /* the DW13-15 is for the intra_row_store_scratch */
561 OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
562 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
564 OUT_BCS_BATCH(batch, 0);
565 OUT_BCS_BATCH(batch, 0);
567 /* the DW16-18 is for the deblocking filter */
568 OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
569 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
571 OUT_BCS_BATCH(batch, 0);
572 OUT_BCS_BATCH(batch, 0);
574 /* the DW 19-50 is for Reference pictures*/
575 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
576 if ( mfc_context->reference_surfaces[i].bo != NULL) {
577 OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
578 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
581 OUT_BCS_BATCH(batch, 0);
583 OUT_BCS_BATCH(batch, 0);
585 OUT_BCS_BATCH(batch, 0);
587 /* The DW 52-54 is for the MB status buffer */
588 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
589 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
590 0); /* Macroblock status buffer*/
592 OUT_BCS_BATCH(batch, 0);
593 OUT_BCS_BATCH(batch, 0);
595 /* the DW 55-57 is the ILDB buffer */
596 OUT_BCS_BATCH(batch, 0);
597 OUT_BCS_BATCH(batch, 0);
598 OUT_BCS_BATCH(batch, 0);
600 /* the DW 58-60 is the second ILDB buffer */
601 OUT_BCS_BATCH(batch, 0);
602 OUT_BCS_BATCH(batch, 0);
603 OUT_BCS_BATCH(batch, 0);
604 ADVANCE_BCS_BATCH(batch);
608 gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
610 struct intel_batchbuffer *batch = encoder_context->base.batch;
611 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
612 struct i965_driver_data *i965 = i965_driver_data(ctx);
615 if (IS_STEPPING_BPLUS(i965)) {
616 gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
620 BEGIN_BCS_BATCH(batch, 25);
622 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
624 if (mfc_context->pre_deblocking_output.bo)
625 OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
626 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
629 OUT_BCS_BATCH(batch, 0); /* pre output addr */
631 if (mfc_context->post_deblocking_output.bo)
632 OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
633 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
634 0); /* post output addr */
636 OUT_BCS_BATCH(batch, 0);
638 OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
639 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
640 0); /* uncompressed data */
641 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
642 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
643 0); /* StreamOut data*/
644 OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
645 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
647 OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
648 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
650 /* 7..22 Reference pictures*/
651 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
652 if ( mfc_context->reference_surfaces[i].bo != NULL) {
653 OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
654 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
657 OUT_BCS_BATCH(batch, 0);
660 OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
661 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
662 0); /* Macroblock status buffer*/
664 OUT_BCS_BATCH(batch, 0);
666 ADVANCE_BCS_BATCH(batch);
670 gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
671 struct intel_encoder_context *encoder_context)
673 struct intel_batchbuffer *batch = encoder_context->base.batch;
674 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
678 BEGIN_BCS_BATCH(batch, 71);
680 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
682 /* Reference frames and Current frames */
683 /* the DW1-32 is for the direct MV for reference */
684 for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
685 if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
686 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
687 I915_GEM_DOMAIN_INSTRUCTION, 0,
689 OUT_BCS_BATCH(batch, 0);
691 OUT_BCS_BATCH(batch, 0);
692 OUT_BCS_BATCH(batch, 0);
695 OUT_BCS_BATCH(batch, 0);
697 /* the DW34-36 is the MV for the current reference */
698 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
699 I915_GEM_DOMAIN_INSTRUCTION, 0,
702 OUT_BCS_BATCH(batch, 0);
703 OUT_BCS_BATCH(batch, 0);
706 for(i = 0; i < 32; i++) {
707 OUT_BCS_BATCH(batch, i/2);
709 OUT_BCS_BATCH(batch, 0);
710 OUT_BCS_BATCH(batch, 0);
712 ADVANCE_BCS_BATCH(batch);
716 gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
718 struct intel_batchbuffer *batch = encoder_context->base.batch;
719 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
720 struct i965_driver_data *i965 = i965_driver_data(ctx);
723 if (IS_STEPPING_BPLUS(i965)) {
724 gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
728 BEGIN_BCS_BATCH(batch, 69);
730 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
732 /* Reference frames and Current frames */
733 for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
734 if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
735 OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
736 I915_GEM_DOMAIN_INSTRUCTION, 0,
739 OUT_BCS_BATCH(batch, 0);
744 for(i = 0; i < 32; i++) {
745 OUT_BCS_BATCH(batch, i/2);
747 OUT_BCS_BATCH(batch, 0);
748 OUT_BCS_BATCH(batch, 0);
750 ADVANCE_BCS_BATCH(batch);
754 gen75_mfc_avc_ref_idx_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
756 struct intel_batchbuffer *batch = encoder_context->base.batch;
759 BEGIN_BCS_BATCH(batch, 10);
760 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
761 OUT_BCS_BATCH(batch, 0); //Select L0
762 OUT_BCS_BATCH(batch, 0x80808020); //Only 1 reference
763 for(i = 0; i < 7; i++) {
764 OUT_BCS_BATCH(batch, 0x80808080);
766 ADVANCE_BCS_BATCH(batch);
768 BEGIN_BCS_BATCH(batch, 10);
769 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
770 OUT_BCS_BATCH(batch, 1); //Select L1
771 OUT_BCS_BATCH(batch, 0x80808022); //Only 1 reference
772 for(i = 0; i < 7; i++) {
773 OUT_BCS_BATCH(batch, 0x80808080);
775 ADVANCE_BCS_BATCH(batch);
780 gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
781 struct intel_encoder_context *encoder_context)
783 struct intel_batchbuffer *batch = encoder_context->base.batch;
784 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
786 BEGIN_BCS_BATCH(batch, 10);
788 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
789 OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
790 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
792 OUT_BCS_BATCH(batch, 0);
793 OUT_BCS_BATCH(batch, 0);
795 /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
796 OUT_BCS_BATCH(batch, 0);
797 OUT_BCS_BATCH(batch, 0);
798 OUT_BCS_BATCH(batch, 0);
800 /* the DW7-9 is for Bitplane Read Buffer Base Address */
801 OUT_BCS_BATCH(batch, 0);
802 OUT_BCS_BATCH(batch, 0);
803 OUT_BCS_BATCH(batch, 0);
805 ADVANCE_BCS_BATCH(batch);
809 gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
811 struct intel_batchbuffer *batch = encoder_context->base.batch;
812 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
813 struct i965_driver_data *i965 = i965_driver_data(ctx);
815 if (IS_STEPPING_BPLUS(i965)) {
816 gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
820 BEGIN_BCS_BATCH(batch, 4);
822 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
823 OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
824 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
826 OUT_BCS_BATCH(batch, 0);
827 OUT_BCS_BATCH(batch, 0);
829 ADVANCE_BCS_BATCH(batch);
833 static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
834 struct encode_state *encode_state,
835 struct intel_encoder_context *encoder_context)
837 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
839 mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
840 mfc_context->set_surface_state(ctx, encoder_context);
841 mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
842 gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
843 gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
844 mfc_context->avc_img_state(ctx, encode_state, encoder_context);
845 mfc_context->avc_qm_state(ctx, encoder_context);
846 mfc_context->avc_fqm_state(ctx, encoder_context);
847 gen75_mfc_avc_directmode_state(ctx, encoder_context);
848 gen75_mfc_avc_ref_idx_state(ctx, encoder_context);
852 static VAStatus gen75_mfc_avc_prepare(VADriverContextP ctx,
853 struct encode_state *encode_state,
854 struct intel_encoder_context *encoder_context)
856 struct i965_driver_data *i965 = i965_driver_data(ctx);
857 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
858 struct object_surface *obj_surface;
859 struct object_buffer *obj_buffer;
860 struct gen6_mfc_avc_surface_aux* gen6_avc_surface;
862 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
863 VAStatus vaStatus = VA_STATUS_SUCCESS;
864 int i, j, enable_avc_ildb = 0;
865 VAEncSliceParameterBufferH264 *slice_param;
866 VACodedBufferSegment *coded_buffer_segment;
867 unsigned char *flag = NULL;
869 for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
870 assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
871 slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
873 for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
874 assert((slice_param->slice_type == SLICE_TYPE_I) ||
875 (slice_param->slice_type == SLICE_TYPE_SI) ||
876 (slice_param->slice_type == SLICE_TYPE_P) ||
877 (slice_param->slice_type == SLICE_TYPE_SP) ||
878 (slice_param->slice_type == SLICE_TYPE_B));
880 if (slice_param->disable_deblocking_filter_idc != 1) {
889 /*Setup all the input&output object*/
891 /* Setup current frame and current direct mv buffer*/
892 obj_surface = SURFACE(pPicParameter->CurrPic.picture_id);
894 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
896 if ( obj_surface->private_data == NULL) {
897 gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
898 gen6_avc_surface->dmv_top =
899 dri_bo_alloc(i965->intel.bufmgr,
903 gen6_avc_surface->dmv_bottom =
904 dri_bo_alloc(i965->intel.bufmgr,
908 assert(gen6_avc_surface->dmv_top);
909 assert(gen6_avc_surface->dmv_bottom);
910 obj_surface->private_data = (void *)gen6_avc_surface;
911 obj_surface->free_private_data = (void *)gen75_mfc_free_avc_surface;
913 gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
914 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
915 mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
916 dri_bo_reference(gen6_avc_surface->dmv_top);
917 dri_bo_reference(gen6_avc_surface->dmv_bottom);
919 if (enable_avc_ildb) {
920 mfc_context->post_deblocking_output.bo = obj_surface->bo;
921 dri_bo_reference(mfc_context->post_deblocking_output.bo);
923 mfc_context->pre_deblocking_output.bo = obj_surface->bo;
924 dri_bo_reference(mfc_context->pre_deblocking_output.bo);
927 mfc_context->surface_state.width = obj_surface->orig_width;
928 mfc_context->surface_state.height = obj_surface->orig_height;
929 mfc_context->surface_state.w_pitch = obj_surface->width;
930 mfc_context->surface_state.h_pitch = obj_surface->height;
932 /* Setup reference frames and direct mv buffers*/
933 for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
934 if ( pPicParameter->ReferenceFrames[i].picture_id != VA_INVALID_ID ) {
935 obj_surface = SURFACE(pPicParameter->ReferenceFrames[i].picture_id);
937 if (obj_surface->bo != NULL) {
938 mfc_context->reference_surfaces[i].bo = obj_surface->bo;
939 dri_bo_reference(obj_surface->bo);
941 /* Check DMV buffer */
942 if ( obj_surface->private_data == NULL) {
944 gen6_avc_surface = calloc(sizeof(struct gen6_mfc_avc_surface_aux), 1);
945 gen6_avc_surface->dmv_top =
946 dri_bo_alloc(i965->intel.bufmgr,
950 gen6_avc_surface->dmv_bottom =
951 dri_bo_alloc(i965->intel.bufmgr,
955 assert(gen6_avc_surface->dmv_top);
956 assert(gen6_avc_surface->dmv_bottom);
957 obj_surface->private_data = gen6_avc_surface;
958 obj_surface->free_private_data = gen75_mfc_free_avc_surface;
961 gen6_avc_surface = (struct gen6_mfc_avc_surface_aux*) obj_surface->private_data;
962 /* Setup DMV buffer */
963 mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
964 mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
965 dri_bo_reference(gen6_avc_surface->dmv_top);
966 dri_bo_reference(gen6_avc_surface->dmv_bottom);
972 obj_surface = SURFACE(encoder_context->input_yuv_surface);
973 assert(obj_surface && obj_surface->bo);
974 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
975 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
977 obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
978 bo = obj_buffer->buffer_store->bo;
980 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
981 mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_SIZE;
982 mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
983 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
986 coded_buffer_segment = (VACodedBufferSegment *)bo->virtual;
987 flag = (unsigned char *)(coded_buffer_segment + 1);
995 static VAStatus gen75_mfc_run(VADriverContextP ctx,
996 struct encode_state *encode_state,
997 struct intel_encoder_context *encoder_context)
999 struct intel_batchbuffer *batch = encoder_context->base.batch;
1001 intel_batchbuffer_flush(batch); //run the pipeline
1003 return VA_STATUS_SUCCESS;
1008 gen75_mfc_stop(VADriverContextP ctx,
1009 struct encode_state *encode_state,
1010 struct intel_encoder_context *encoder_context,
1011 int *encoded_bits_size)
1013 VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
1014 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1015 VACodedBufferSegment *coded_buffer_segment;
1017 vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
1018 assert(vaStatus == VA_STATUS_SUCCESS);
1019 *encoded_bits_size = coded_buffer_segment->size * 8;
1020 i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
1022 return VA_STATUS_SUCCESS;
1027 gen75_mfc_avc_slice_state(VADriverContextP ctx,
1028 VAEncPictureParameterBufferH264 *pic_param,
1029 VAEncSliceParameterBufferH264 *slice_param,
1030 struct encode_state *encode_state,
1031 struct intel_encoder_context *encoder_context,
1032 int rate_control_enable,
1034 struct intel_batchbuffer *batch)
1036 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1037 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1038 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1039 int beginmb = slice_param->macroblock_address;
1040 int endmb = beginmb + slice_param->num_macroblocks;
1041 int beginx = beginmb % width_in_mbs;
1042 int beginy = beginmb / width_in_mbs;
1043 int nextx = endmb % width_in_mbs;
1044 int nexty = endmb / width_in_mbs;
1045 int slice_type = slice_param->slice_type;
1046 int last_slice = (endmb == (width_in_mbs * height_in_mbs));
1047 int bit_rate_control_target, maxQpN, maxQpP;
1048 unsigned char correct[6], grow, shrink;
1050 int weighted_pred_idc = 0;
1051 unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
1052 unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
1055 batch = encoder_context->base.batch;
1057 bit_rate_control_target = slice_type;
1058 if (slice_type == SLICE_TYPE_SP)
1059 bit_rate_control_target = SLICE_TYPE_P;
1060 else if (slice_type == SLICE_TYPE_SI)
1061 bit_rate_control_target = SLICE_TYPE_I;
1063 if (slice_type == SLICE_TYPE_P) {
1064 weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
1065 } else if (slice_type == SLICE_TYPE_B) {
1066 weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1068 if (weighted_pred_idc == 2) {
1069 /* 8.4.3 - Derivation process for prediction weights (8-279) */
1070 luma_log2_weight_denom = 5;
1071 chroma_log2_weight_denom = 5;
1075 maxQpN = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpNegModifier;
1076 maxQpP = mfc_context->bit_rate_control_context[bit_rate_control_target].MaxQpPosModifier;
1078 for (i = 0; i < 6; i++)
1079 correct[i] = mfc_context->bit_rate_control_context[bit_rate_control_target].Correct[i];
1081 grow = mfc_context->bit_rate_control_context[bit_rate_control_target].GrowInit +
1082 (mfc_context->bit_rate_control_context[bit_rate_control_target].GrowResistance << 4);
1083 shrink = mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkInit +
1084 (mfc_context->bit_rate_control_context[bit_rate_control_target].ShrinkResistance << 4);
1086 BEGIN_BCS_BATCH(batch, 11);;
1088 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
1089 OUT_BCS_BATCH(batch, slice_type); /*Slice Type: I:P:B Slice*/
1091 if (slice_type == SLICE_TYPE_I) {
1092 OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/
1094 OUT_BCS_BATCH(batch,
1095 (1 << 16) | /*1 reference frame*/
1096 (chroma_log2_weight_denom << 8) |
1097 (luma_log2_weight_denom << 0));
1100 OUT_BCS_BATCH(batch,
1101 (weighted_pred_idc << 30) |
1102 (slice_param->direct_spatial_mv_pred_flag<<29) | /*Direct Prediction Type*/
1103 (slice_param->disable_deblocking_filter_idc << 27) |
1104 (slice_param->cabac_init_idc << 24) |
1105 (qp<<16) | /*Slice Quantization Parameter*/
1106 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
1107 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
1108 OUT_BCS_BATCH(batch,
1109 (beginy << 24) | /*First MB X&Y , the begin postion of current slice*/
1111 slice_param->macroblock_address );
1112 OUT_BCS_BATCH(batch, (nexty << 16) | nextx); /*Next slice first MB X&Y*/
1113 OUT_BCS_BATCH(batch,
1114 (0/*rate_control_enable*/ << 31) | /*in CBR mode RateControlCounterEnable = enable*/
1115 (1 << 30) | /*ResetRateControlCounter*/
1116 (0 << 28) | /*RC Triggle Mode = Always Rate Control*/
1117 (4 << 24) | /*RC Stable Tolerance, middle level*/
1118 (0/*rate_control_enable*/ << 23) | /*RC Panic Enable*/
1119 (0 << 22) | /*QP mode, don't modfiy CBP*/
1120 (0 << 21) | /*MB Type Direct Conversion Enabled*/
1121 (0 << 20) | /*MB Type Skip Conversion Enabled*/
1122 (last_slice << 19) | /*IsLastSlice*/
1123 (0 << 18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
1124 (1 << 17) | /*HeaderPresentFlag*/
1125 (1 << 16) | /*SliceData PresentFlag*/
1126 (1 << 15) | /*TailPresentFlag*/
1127 (1 << 13) | /*RBSP NAL TYPE*/
1128 (0 << 12) ); /*CabacZeroWordInsertionEnable*/
1129 OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
1130 OUT_BCS_BATCH(batch,
1131 (maxQpN << 24) | /*Target QP - 24 is lowest QP*/
1132 (maxQpP << 16) | /*Target QP + 20 is highest QP*/
1135 OUT_BCS_BATCH(batch,
1136 (correct[5] << 20) |
1137 (correct[4] << 16) |
1138 (correct[3] << 12) |
1142 OUT_BCS_BATCH(batch, 0);
1144 ADVANCE_BCS_BATCH(batch);
1148 #ifdef MFC_SOFTWARE_HASWELL
1151 gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
1152 int qp,unsigned int *msg,
1153 struct intel_encoder_context *encoder_context,
1154 unsigned char target_mb_size, unsigned char max_mb_size,
1155 struct intel_batchbuffer *batch)
1157 int len_in_dwords = 12;
1158 unsigned int intra_msg;
1159 #define INTRA_MSG_FLAG (1 << 13)
1160 #define INTRA_MBTYPE_MASK (0x1F0000)
1162 batch = encoder_context->base.batch;
1164 BEGIN_BCS_BATCH(batch, len_in_dwords);
1166 intra_msg = msg[0] & 0xC0FF;
1167 intra_msg |= INTRA_MSG_FLAG;
1168 intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
1169 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1170 OUT_BCS_BATCH(batch, 0);
1171 OUT_BCS_BATCH(batch, 0);
1172 OUT_BCS_BATCH(batch,
1173 (0 << 24) | /* PackedMvNum, Debug*/
1174 (0 << 20) | /* No motion vector */
1175 (1 << 19) | /* CbpDcY */
1176 (1 << 18) | /* CbpDcU */
1177 (1 << 17) | /* CbpDcV */
1180 OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x); /* Code Block Pattern for Y*/
1181 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1182 OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
1184 /*Stuff for Intra MB*/
1185 OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/
1186 OUT_BCS_BATCH(batch, msg[2]);
1187 OUT_BCS_BATCH(batch, msg[3]&0xFC);
1189 /*MaxSizeInWord and TargetSzieInWord*/
1190 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1191 (target_mb_size << 16) );
1193 OUT_BCS_BATCH(batch, 0);
1195 ADVANCE_BCS_BATCH(batch);
1197 return len_in_dwords;
1201 gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
1202 unsigned int *msg, unsigned int offset,
1203 struct intel_encoder_context *encoder_context,
1204 unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
1205 struct intel_batchbuffer *batch)
1207 int len_in_dwords = 12;
1208 unsigned int inter_msg = 0;
1210 batch = encoder_context->base.batch;
1212 BEGIN_BCS_BATCH(batch, len_in_dwords);
1214 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
1218 if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
1219 if (msg[1] & SUBMB_SHAPE_MASK)
1222 OUT_BCS_BATCH(batch, inter_msg); /* 32 MV*/
1223 OUT_BCS_BATCH(batch, offset);
1224 inter_msg = msg[0] & (0x1F00FFFF);
1225 inter_msg |= INTER_MV8;
1226 if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
1227 (msg[1] & SUBMB_SHAPE_MASK)) {
1228 inter_msg |= INTER_MV32;
1231 OUT_BCS_BATCH(batch, inter_msg);
1233 OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
1234 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
1236 if ( slice_type == SLICE_TYPE_B) {
1237 OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp); /* Last MB */
1239 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1242 OUT_BCS_BATCH(batch, (end_mb << 26) | qp); /* Last MB */
1245 inter_msg = msg[1] >> 8;
1246 /*Stuff for Inter MB*/
1247 OUT_BCS_BATCH(batch, inter_msg);
1248 OUT_BCS_BATCH(batch, 0x0);
1249 OUT_BCS_BATCH(batch, 0x0);
1251 /*MaxSizeInWord and TargetSzieInWord*/
1252 OUT_BCS_BATCH(batch, (max_mb_size << 24) |
1253 (target_mb_size << 16) );
1255 OUT_BCS_BATCH(batch, 0x0);
1257 ADVANCE_BCS_BATCH(batch);
1259 return len_in_dwords;
1262 #define INTRA_RDO_OFFSET 4
1263 #define INTER_RDO_OFFSET 54
1264 #define INTER_MSG_OFFSET 52
1265 #define INTER_MV_OFFSET 224
1266 #define RDO_MASK 0xFFFF
1269 gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
1270 struct encode_state *encode_state,
1271 struct intel_encoder_context *encoder_context,
1273 struct intel_batchbuffer *slice_batch)
1275 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1276 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1277 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1278 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1279 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1280 unsigned int *msg = NULL, offset = 0;
1281 unsigned char *msg_ptr = NULL;
1282 int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
1283 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1284 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1285 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1287 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1288 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1289 unsigned char *slice_header = NULL;
1290 int slice_header_length_in_bits = 0;
1291 unsigned int tail_data[] = { 0x0, 0x0 };
1292 int slice_type = pSliceParameter->slice_type;
1295 if (rate_control_mode == VA_RC_CBR) {
1296 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1297 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1300 /* only support for 8-bit pixel bit-depth */
1301 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1302 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1303 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1304 assert(qp >= 0 && qp < 52);
1306 gen75_mfc_avc_slice_state(ctx,
1309 encode_state, encoder_context,
1310 (rate_control_mode == VA_RC_CBR), qp, slice_batch);
1312 if ( slice_index == 0)
1313 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1315 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1318 mfc_context->insert_object(ctx, encoder_context,
1319 (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
1320 5, /* first 5 bytes are start code + nal unit type */
1321 1, 0, 1, slice_batch);
1323 dri_bo_map(vme_context->vme_output.bo , 1);
1324 msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
1327 msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1329 msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
1332 for (i = pSliceParameter->macroblock_address;
1333 i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
1334 int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
1335 x = i % width_in_mbs;
1336 y = i / width_in_mbs;
1337 msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
1341 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1343 int inter_rdo, intra_rdo;
1344 inter_rdo = msg[INTER_RDO_OFFSET] & RDO_MASK;
1345 intra_rdo = msg[INTRA_RDO_OFFSET] & RDO_MASK;
1346 offset = i * vme_context->vme_output.size_block + INTER_MV_OFFSET;
1347 if (intra_rdo < inter_rdo) {
1348 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
1350 msg += INTER_MSG_OFFSET;
1351 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
1356 dri_bo_unmap(vme_context->vme_output.bo);
1359 mfc_context->insert_object(ctx, encoder_context,
1361 2, 1, 1, 0, slice_batch);
1363 mfc_context->insert_object(ctx, encoder_context,
1365 1, 1, 1, 0, slice_batch);
1373 gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
1374 struct encode_state *encode_state,
1375 struct intel_encoder_context *encoder_context)
1377 struct i965_driver_data *i965 = i965_driver_data(ctx);
1378 struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD);
1379 dri_bo *batch_bo = batch->buffer;
1382 for (i = 0; i < encode_state->num_slice_params_ext; i++) {
1383 gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
1386 intel_batchbuffer_align(batch, 8);
1388 BEGIN_BCS_BATCH(batch, 2);
1389 OUT_BCS_BATCH(batch, 0);
1390 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
1391 ADVANCE_BCS_BATCH(batch);
1393 dri_bo_reference(batch_bo);
1394 intel_batchbuffer_free(batch);
1402 gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
1403 struct encode_state *encode_state,
1404 struct intel_encoder_context *encoder_context)
1407 struct gen6_vme_context *vme_context = encoder_context->vme_context;
1408 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1410 assert(vme_context->vme_output.bo);
1411 mfc_context->buffer_suface_setup(ctx,
1412 &mfc_context->gpe_context,
1413 &vme_context->vme_output,
1414 BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
1415 SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
1416 assert(mfc_context->aux_batchbuffer_surface.bo);
1417 mfc_context->buffer_suface_setup(ctx,
1418 &mfc_context->gpe_context,
1419 &mfc_context->aux_batchbuffer_surface,
1420 BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
1421 SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
1425 gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
1426 struct encode_state *encode_state,
1427 struct intel_encoder_context *encoder_context)
1430 struct i965_driver_data *i965 = i965_driver_data(ctx);
1431 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1432 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1433 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
1434 int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
1435 mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
1436 mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
1437 mfc_context->mfc_batchbuffer_surface.pitch = 16;
1438 mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr,
1440 mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
1442 mfc_context->buffer_suface_setup(ctx,
1443 &mfc_context->gpe_context,
1444 &mfc_context->mfc_batchbuffer_surface,
1445 BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
1446 SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
1450 gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
1451 struct encode_state *encode_state,
1452 struct intel_encoder_context *encoder_context)
1454 gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
1455 gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
1459 gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
1460 struct encode_state *encode_state,
1461 struct intel_encoder_context *encoder_context)
1463 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1464 struct gen6_interface_descriptor_data *desc;
1468 bo = mfc_context->gpe_context.idrt.bo;
1470 assert(bo->virtual);
1473 for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
1474 struct i965_kernel *kernel;
1476 kernel = &mfc_context->gpe_context.kernels[i];
1477 assert(sizeof(*desc) == 32);
1479 /*Setup the descritor table*/
1480 memset(desc, 0, sizeof(*desc));
1481 desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
1482 desc->desc2.sampler_count = 0;
1483 desc->desc2.sampler_state_pointer = 0;
1484 desc->desc3.binding_table_entry_count = 2;
1485 desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
1486 desc->desc4.constant_urb_entry_read_offset = 0;
1487 desc->desc4.constant_urb_entry_read_length = 4;
1490 dri_bo_emit_reloc(bo,
1491 I915_GEM_DOMAIN_INSTRUCTION, 0,
1493 i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
1502 gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
1503 struct encode_state *encode_state,
1504 struct intel_encoder_context *encoder_context)
1506 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1512 gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
1515 int batchbuffer_offset,
1527 BEGIN_BATCH(batch, 12);
1529 OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
1530 OUT_BATCH(batch, index);
1531 OUT_BATCH(batch, 0);
1532 OUT_BATCH(batch, 0);
1533 OUT_BATCH(batch, 0);
1534 OUT_BATCH(batch, 0);
1537 OUT_BATCH(batch, head_offset);
1538 OUT_BATCH(batch, batchbuffer_offset);
1543 number_mb_cmds << 16 |
1554 ADVANCE_BATCH(batch);
1558 gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
1559 struct intel_encoder_context *encoder_context,
1560 VAEncSliceParameterBufferH264 *slice_param,
1562 unsigned short head_size,
1563 unsigned short tail_size,
1564 int batchbuffer_offset,
1568 struct intel_batchbuffer *batch = encoder_context->base.batch;
1569 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1570 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1571 int total_mbs = slice_param->num_macroblocks;
1572 int number_mb_cmds = 128;
1573 int starting_mb = 0;
1574 int last_object = 0;
1575 int first_object = 1;
1578 int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
1580 for (i = 0; i < total_mbs / number_mb_cmds; i++) {
1581 last_object = (total_mbs - starting_mb) == number_mb_cmds;
1582 mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1583 mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1584 assert(mb_x <= 255 && mb_y <= 255);
1586 starting_mb += number_mb_cmds;
1588 gen75_mfc_batchbuffer_emit_object_command(batch,
1604 head_offset += head_size;
1605 batchbuffer_offset += head_size;
1609 head_offset += tail_size;
1610 batchbuffer_offset += tail_size;
1613 batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
1620 number_mb_cmds = total_mbs % number_mb_cmds;
1621 mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
1622 mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
1623 assert(mb_x <= 255 && mb_y <= 255);
1624 starting_mb += number_mb_cmds;
1626 gen75_mfc_batchbuffer_emit_object_command(batch,
1644 * return size in Owords (16bytes)
1647 gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
1648 struct encode_state *encode_state,
1649 struct intel_encoder_context *encoder_context,
1651 int batchbuffer_offset)
1653 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1654 struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
1655 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1656 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1657 VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1658 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
1659 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
1660 int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
1661 int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
1662 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1663 unsigned char *slice_header = NULL;
1664 int slice_header_length_in_bits = 0;
1665 unsigned int tail_data[] = { 0x0, 0x0 };
1667 int old_used = intel_batchbuffer_used_size(slice_batch), used;
1668 unsigned short head_size, tail_size;
1669 int slice_type = pSliceParameter->slice_type;
1671 if (rate_control_mode == VA_RC_CBR) {
1672 qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
1673 pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
1676 /* only support for 8-bit pixel bit-depth */
1677 assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
1678 assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
1679 assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
1680 assert(qp >= 0 && qp < 52);
1682 head_offset = old_used / 16;
1683 gen75_mfc_avc_slice_state(ctx,
1688 (rate_control_mode == VA_RC_CBR),
1692 if (slice_index == 0)
1693 intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
1695 slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header);
1698 mfc_context->insert_object(ctx,
1700 (unsigned int *)slice_header,
1701 ALIGN(slice_header_length_in_bits, 32) >> 5,
1702 slice_header_length_in_bits & 0x1f,
1703 5, /* first 5 bytes are start code + nal unit type */
1710 intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1711 used = intel_batchbuffer_used_size(slice_batch);
1712 head_size = (used - old_used) / 16;
1717 mfc_context->insert_object(ctx,
1728 mfc_context->insert_object(ctx,
1740 intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
1741 used = intel_batchbuffer_used_size(slice_batch);
1742 tail_size = (used - old_used) / 16;
1745 gen75_mfc_avc_batchbuffer_slice_command(ctx,
1755 return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
1759 gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
1760 struct encode_state *encode_state,
1761 struct intel_encoder_context *encoder_context)
1763 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1764 struct intel_batchbuffer *batch = encoder_context->base.batch;
1765 int i, size, offset = 0;
1766 intel_batchbuffer_start_atomic(batch, 0x4000);
1767 gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
1769 for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
1770 size = gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
1774 intel_batchbuffer_end_atomic(batch);
1775 intel_batchbuffer_flush(batch);
1779 gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx,
1780 struct encode_state *encode_state,
1781 struct intel_encoder_context *encoder_context)
1783 gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
1784 gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
1785 gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
1786 gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
1790 gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
1791 struct encode_state *encode_state,
1792 struct intel_encoder_context *encoder_context)
1794 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1796 gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
1797 dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
1799 return mfc_context->mfc_batchbuffer_surface.bo;
1805 gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
1806 struct encode_state *encode_state,
1807 struct intel_encoder_context *encoder_context)
1809 struct intel_batchbuffer *batch = encoder_context->base.batch;
1810 dri_bo *slice_batch_bo;
1812 if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
1813 fprintf(stderr, "Current VA driver don't support interlace mode!\n");
1818 #ifdef MFC_SOFTWARE_HASWELL
1819 slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
1821 slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
1825 intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
1826 intel_batchbuffer_emit_mi_flush(batch);
1828 // picture level programing
1829 gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
1831 BEGIN_BCS_BATCH(batch, 2);
1832 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
1833 OUT_BCS_RELOC(batch,
1835 I915_GEM_DOMAIN_COMMAND, 0,
1837 ADVANCE_BCS_BATCH(batch);
1840 intel_batchbuffer_end_atomic(batch);
1842 dri_bo_unreference(slice_batch_bo);
1847 gen75_mfc_avc_encode_picture(VADriverContextP ctx,
1848 struct encode_state *encode_state,
1849 struct intel_encoder_context *encoder_context)
1851 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1852 unsigned int rate_control_mode = encoder_context->rate_control_mode;
1853 int current_frame_bits_size;
1857 gen75_mfc_init(ctx, encoder_context);
1858 gen75_mfc_avc_prepare(ctx, encode_state, encoder_context);
1859 /*Programing bcs pipeline*/
1860 gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
1861 gen75_mfc_run(ctx, encode_state, encoder_context);
1862 if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
1863 gen75_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
1864 sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
1865 if (sts == BRC_NO_HRD_VIOLATION) {
1866 intel_mfc_hrd_context_update(encode_state, mfc_context);
1869 else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
1870 if (!mfc_context->hrd.violation_noted) {
1871 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
1872 mfc_context->hrd.violation_noted = 1;
1874 return VA_STATUS_SUCCESS;
1881 return VA_STATUS_SUCCESS;
1886 gen75_mfc_context_destroy(void *context)
1888 struct gen6_mfc_context *mfc_context = context;
1891 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1892 mfc_context->post_deblocking_output.bo = NULL;
1894 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1895 mfc_context->pre_deblocking_output.bo = NULL;
1897 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1898 mfc_context->uncompressed_picture_source.bo = NULL;
1900 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
1901 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1903 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1904 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1905 mfc_context->direct_mv_buffers[i].bo = NULL;
1908 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1909 mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1911 dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
1912 mfc_context->macroblock_status_buffer.bo = NULL;
1914 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1915 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1917 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1918 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1921 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
1922 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
1923 mfc_context->reference_surfaces[i].bo = NULL;
1926 i965_gpe_context_destroy(&mfc_context->gpe_context);
1928 dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
1929 mfc_context->mfc_batchbuffer_surface.bo = NULL;
1931 dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
1932 mfc_context->aux_batchbuffer_surface.bo = NULL;
1934 if (mfc_context->aux_batchbuffer)
1935 intel_batchbuffer_free(mfc_context->aux_batchbuffer);
1937 mfc_context->aux_batchbuffer = NULL;
1942 static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
1944 struct encode_state *encode_state,
1945 struct intel_encoder_context *encoder_context)
1950 case VAProfileH264Baseline:
1951 case VAProfileH264Main:
1952 case VAProfileH264High:
1953 vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
1956 /* FIXME: add for other profile */
1958 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
1965 Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1967 struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
1969 mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
1971 mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
1972 mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
1974 mfc_context->gpe_context.curbe.length = 32 * 4;
1976 mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
1977 mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
1978 mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
1979 mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
1980 mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
1982 i965_gpe_load_kernels(ctx,
1983 &mfc_context->gpe_context,
1987 mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
1988 mfc_context->set_surface_state = gen75_mfc_surface_state;
1989 mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
1990 mfc_context->avc_img_state = gen75_mfc_avc_img_state;
1991 mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
1992 mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
1993 mfc_context->insert_object = gen75_mfc_avc_insert_object;
1994 mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
1996 encoder_context->mfc_context = mfc_context;
1997 encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
1998 encoder_context->mfc_pipeline = gen75_mfc_pipeline;
1999 encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;