2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Zhou Chang <chang.zhou@intel.com>
26 * Xiang, Haihao <haihao.xiang@intel.com>
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
40 #include "i965_encoder_utils.h"
44 static const uint32_t gen7_mfc_batchbuffer_avc_intra[][4] = {
45 #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
48 static const uint32_t gen7_mfc_batchbuffer_avc_inter[][4] = {
49 #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
52 static struct i965_kernel gen7_mfc_kernels[] = {
54 "MFC AVC INTRA BATCHBUFFER ",
55 MFC_BATCHBUFFER_AVC_INTRA,
56 gen7_mfc_batchbuffer_avc_intra,
57 sizeof(gen7_mfc_batchbuffer_avc_intra),
62 "MFC AVC INTER BATCHBUFFER ",
63 MFC_BATCHBUFFER_AVC_INTER,
64 gen7_mfc_batchbuffer_avc_inter,
65 sizeof(gen7_mfc_batchbuffer_avc_inter),
71 gen7_mfc_pipe_mode_select(VADriverContextP ctx,
73 struct intel_encoder_context *encoder_context)
75 struct intel_batchbuffer *batch = encoder_context->base.batch;
76 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
78 assert(standard_select == MFX_FORMAT_MPEG2 ||
79 standard_select == MFX_FORMAT_AVC);
81 BEGIN_BCS_BATCH(batch, 5);
83 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
85 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
86 (MFD_MODE_VLD << 15) | /* VLD mode */
87 (1 << 10) | /* Stream-Out Enable */
88 ((!!mfc_context->post_deblocking_output.bo) << 9) | /* Post Deblocking Output */
89 ((!!mfc_context->pre_deblocking_output.bo) << 8) | /* Pre Deblocking Output */
90 (0 << 8) | /* Pre Deblocking Output */
91 (0 << 5) | /* not in stitch mode */
92 (1 << 4) | /* encoding mode */
93 (standard_select << 0)); /* standard select: avc or mpeg2 */
95 (0 << 7) | /* expand NOA bus flag */
96 (0 << 6) | /* disable slice-level clock gating */
97 (0 << 5) | /* disable clock gating for NOA */
98 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
99 (0 << 3) | /* terminate if AVC mbdata error occurs */
100 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
103 OUT_BCS_BATCH(batch, 0);
104 OUT_BCS_BATCH(batch, 0);
106 ADVANCE_BCS_BATCH(batch);
110 gen7_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
112 struct intel_batchbuffer *batch = encoder_context->base.batch;
113 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
115 BEGIN_BCS_BATCH(batch, 6);
117 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
118 OUT_BCS_BATCH(batch, 0);
120 ((mfc_context->surface_state.height - 1) << 18) |
121 ((mfc_context->surface_state.width - 1) << 4));
123 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
124 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
125 (0 << 22) | /* surface object control state, FIXME??? */
126 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
127 (0 << 2) | /* must be 0 for interleave U/V */
128 (1 << 1) | /* must be tiled */
129 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
131 (0 << 16) | /* must be 0 for interleave U/V */
132 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
133 OUT_BCS_BATCH(batch, 0);
135 ADVANCE_BCS_BATCH(batch);
139 gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
141 struct intel_batchbuffer *batch = encoder_context->base.batch;
142 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
143 struct gen6_vme_context *vme_context = encoder_context->vme_context;
145 BEGIN_BCS_BATCH(batch, 11);
147 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
148 OUT_BCS_BATCH(batch, 0);
149 OUT_BCS_BATCH(batch, 0);
150 /* MFX Indirect MV Object Base Address */
151 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
152 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
153 OUT_BCS_BATCH(batch, 0);
154 OUT_BCS_BATCH(batch, 0);
155 OUT_BCS_BATCH(batch, 0);
156 OUT_BCS_BATCH(batch, 0);
157 /*MFC Indirect PAK-BSE Object Base Address for Encoder*/
159 mfc_context->mfc_indirect_pak_bse_object.bo,
160 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
163 mfc_context->mfc_indirect_pak_bse_object.bo,
164 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
165 mfc_context->mfc_indirect_pak_bse_object.end_offset);
167 ADVANCE_BCS_BATCH(batch);
171 gen7_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,
172 struct intel_encoder_context *encoder_context)
174 struct intel_batchbuffer *batch = encoder_context->base.batch;
175 struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
176 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
178 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
179 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
181 BEGIN_BCS_BATCH(batch, 16);
183 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
185 ((width_in_mbs * height_in_mbs) & 0xFFFF));
187 ((height_in_mbs - 1) << 16) |
188 ((width_in_mbs - 1) << 0));
190 (0 << 24) | /* Second Chroma QP Offset */
191 (0 << 16) | /* Chroma QP Offset */
192 (0 << 14) | /* Max-bit conformance Intra flag */
193 (0 << 13) | /* Max Macroblock size conformance Inter flag */
194 (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) | /*Weighted_Pred_Flag */
195 (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) | /* Weighted_BiPred_Idc */
196 (0 << 8) | /* FIXME: Image Structure */
197 (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */
199 (0 << 16) | /* Mininum Frame size */
200 (0 << 15) | /* Disable reading of Macroblock Status Buffer */
201 (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */
202 (0 << 13) | /* CABAC 0 word insertion test enable */
203 (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */
204 (1 << 10) | /* Chroma Format IDC, 4:2:0 */
205 (0 << 9) | /* FIXME: MbMvFormatFlag */
206 (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7) | /*0:CAVLC encoding mode,1:CABAC*/
207 (0 << 6) | /* Only valid for VLD decoding mode */
208 (0 << 5) | /* Constrained Intra Predition Flag, from PPS */
209 (0 << 4) | /* Direct 8x8 inference flag */
210 (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3) | /*8x8 or 4x4 IDCT Transform Mode Flag*/
211 (1 << 2) | /* Frame MB only flag */
212 (0 << 1) | /* MBAFF mode is in active */
213 (0 << 0)); /* Field picture flag */
214 OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
215 OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
216 (0xBB8 << 16) | /* InterMbMaxSz */
217 (0xEE8) ); /* IntraMbMaxSz */
218 OUT_BCS_BATCH(batch, 0); /* Reserved */
219 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
220 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
221 OUT_BCS_BATCH(batch, 0x8C000000);
222 OUT_BCS_BATCH(batch, 0x00010000);
223 OUT_BCS_BATCH(batch, 0);
224 OUT_BCS_BATCH(batch, 0);
225 OUT_BCS_BATCH(batch, 0);
226 OUT_BCS_BATCH(batch, 0);
228 ADVANCE_BCS_BATCH(batch);
232 gen7_mfc_qm_state(VADriverContextP ctx,
236 struct intel_encoder_context *encoder_context)
238 struct intel_batchbuffer *batch = encoder_context->base.batch;
239 unsigned int qm_buffer[16];
241 assert(qm_length <= 16);
242 assert(sizeof(*qm) == 4);
243 memcpy(qm_buffer, qm, qm_length * 4);
245 BEGIN_BCS_BATCH(batch, 18);
246 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
247 OUT_BCS_BATCH(batch, qm_type << 0);
248 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
249 ADVANCE_BCS_BATCH(batch);
253 gen7_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
255 unsigned int qm[16] = {
256 0x10101010, 0x10101010, 0x10101010, 0x10101010,
257 0x10101010, 0x10101010, 0x10101010, 0x10101010,
258 0x10101010, 0x10101010, 0x10101010, 0x10101010,
259 0x10101010, 0x10101010, 0x10101010, 0x10101010
262 gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
263 gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
264 gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
265 gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
269 gen7_mfc_fqm_state(VADriverContextP ctx,
273 struct intel_encoder_context *encoder_context)
275 struct intel_batchbuffer *batch = encoder_context->base.batch;
276 unsigned int fqm_buffer[32];
278 assert(fqm_length <= 32);
279 assert(sizeof(*fqm) == 4);
280 memcpy(fqm_buffer, fqm, fqm_length * 4);
282 BEGIN_BCS_BATCH(batch, 34);
283 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
284 OUT_BCS_BATCH(batch, fqm_type << 0);
285 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
286 ADVANCE_BCS_BATCH(batch);
290 gen7_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
292 unsigned int qm[32] = {
293 0x10001000, 0x10001000, 0x10001000, 0x10001000,
294 0x10001000, 0x10001000, 0x10001000, 0x10001000,
295 0x10001000, 0x10001000, 0x10001000, 0x10001000,
296 0x10001000, 0x10001000, 0x10001000, 0x10001000,
297 0x10001000, 0x10001000, 0x10001000, 0x10001000,
298 0x10001000, 0x10001000, 0x10001000, 0x10001000,
299 0x10001000, 0x10001000, 0x10001000, 0x10001000,
300 0x10001000, 0x10001000, 0x10001000, 0x10001000
303 gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
304 gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
305 gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
306 gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
310 gen7_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
311 unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
312 int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
313 struct intel_batchbuffer *batch)
316 batch = encoder_context->base.batch;
318 BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
320 OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
322 (0 << 16) | /* always start at offset 0 */
323 (data_bits_in_last_dw << 8) |
324 (skip_emul_byte_count << 4) |
325 (!!emulation_flag << 3) |
326 ((!!is_last_header) << 2) |
327 ((!!is_end_of_slice) << 1) |
328 (0 << 0)); /* FIXME: ??? */
329 intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
331 ADVANCE_BCS_BATCH(batch);
335 gen7_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
337 struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
339 mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
341 mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
342 mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
344 mfc_context->gpe_context.curbe.length = 32 * 4;
346 mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
347 mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
348 mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
349 mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
350 mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
352 i965_gpe_load_kernels(ctx,
353 &mfc_context->gpe_context,
357 mfc_context->pipe_mode_select = gen7_mfc_pipe_mode_select;
358 mfc_context->set_surface_state = gen7_mfc_surface_state;
359 mfc_context->ind_obj_base_addr_state = gen7_mfc_ind_obj_base_addr_state;
360 mfc_context->avc_img_state = gen7_mfc_avc_img_state;
361 mfc_context->avc_qm_state = gen7_mfc_avc_qm_state;
362 mfc_context->avc_fqm_state = gen7_mfc_avc_fqm_state;
363 mfc_context->insert_object = gen7_mfc_avc_insert_object;
364 mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
366 encoder_context->mfc_context = mfc_context;
367 encoder_context->mfc_context_destroy = gen6_mfc_context_destroy;
368 encoder_context->mfc_pipeline = gen6_mfc_pipeline;
369 encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;