2 * Copyright © 2010-2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Zhou Chang <chang.zhou@intel.com>
35 #include "intel_batchbuffer.h"
36 #include "i965_defines.h"
37 #include "i965_structs.h"
38 #include "i965_drv_video.h"
39 #include "i965_encoder.h"
42 gen6_mfc_pipe_mode_select(VADriverContextP ctx,
43 struct gen6_encoder_context *gen6_encoder_context,
44 struct intel_batchbuffer *batch)
47 batch = gen6_encoder_context->base.batch;
49 BEGIN_BCS_BATCH(batch, 4);
51 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
53 (0 << 10) | /* disable Stream-Out */
54 (1 << 9) | /* Post Deblocking Output */
55 (0 << 8) | /* Pre Deblocking Output */
56 (0 << 7) | /* disable TLB prefectch */
57 (0 << 5) | /* not in stitch mode */
58 (1 << 4) | /* encoding mode */
59 (2 << 0)); /* Standard Select: AVC */
61 (0 << 20) | /* round flag in PB slice */
62 (0 << 19) | /* round flag in Intra8x8 */
63 (0 << 7) | /* expand NOA bus flag */
64 (1 << 6) | /* must be 1 */
65 (0 << 5) | /* disable clock gating for NOA */
66 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
67 (0 << 3) | /* terminate if AVC mbdata error occurs */
68 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
69 (0 << 1) | /* AVC long field motion vector */
70 (0 << 0)); /* always calculate AVC ILDB boundary strength */
71 OUT_BCS_BATCH(batch, 0);
73 ADVANCE_BCS_BATCH(batch);
77 gen7_mfc_pipe_mode_select(VADriverContextP ctx,
79 struct gen6_encoder_context *gen6_encoder_context,
80 struct intel_batchbuffer *batch)
83 batch = gen6_encoder_context->base.batch;
85 assert(standard_select == MFX_FORMAT_MPEG2 ||
86 standard_select == MFX_FORMAT_AVC);
88 BEGIN_BCS_BATCH(batch, 5);
89 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
91 (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
92 (MFD_MODE_VLD << 15) | /* VLD mode */
93 (0 << 10) | /* disable Stream-Out */
94 (1 << 9) | /* Post Deblocking Output */
95 (0 << 8) | /* Pre Deblocking Output */
96 (0 << 5) | /* not in stitch mode */
97 (1 << 4) | /* encoding mode */
98 (standard_select << 0)); /* standard select: avc or mpeg2 */
100 (0 << 7) | /* expand NOA bus flag */
101 (0 << 6) | /* disable slice-level clock gating */
102 (0 << 5) | /* disable clock gating for NOA */
103 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
104 (0 << 3) | /* terminate if AVC mbdata error occurs */
105 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
108 OUT_BCS_BATCH(batch, 0);
109 OUT_BCS_BATCH(batch, 0);
111 ADVANCE_BCS_BATCH(batch);
115 gen6_mfc_surface_state(VADriverContextP ctx,
116 struct gen6_encoder_context *gen6_encoder_context,
117 struct intel_batchbuffer *batch)
119 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
122 batch = gen6_encoder_context->base.batch;
124 BEGIN_BCS_BATCH(batch, 6);
126 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
127 OUT_BCS_BATCH(batch, 0);
129 ((mfc_context->surface_state.height - 1) << 19) |
130 ((mfc_context->surface_state.width - 1) << 6));
132 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
133 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
134 (0 << 22) | /* surface object control state, FIXME??? */
135 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
136 (0 << 2) | /* must be 0 for interleave U/V */
137 (1 << 1) | /* must be y-tiled */
138 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
140 (0 << 16) | /* must be 0 for interleave U/V */
141 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
142 OUT_BCS_BATCH(batch, 0);
143 ADVANCE_BCS_BATCH(batch);
147 gen7_mfc_surface_state(VADriverContextP ctx,
148 struct gen6_encoder_context *gen6_encoder_context,
149 struct intel_batchbuffer *batch)
151 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
154 batch = gen6_encoder_context->base.batch;
156 BEGIN_BCS_BATCH(batch, 6);
158 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
159 OUT_BCS_BATCH(batch, 0);
161 ((mfc_context->surface_state.height - 1) << 18) |
162 ((mfc_context->surface_state.width - 1) << 4));
164 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
165 (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
166 (0 << 22) | /* surface object control state, FIXME??? */
167 ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
168 (0 << 2) | /* must be 0 for interleave U/V */
169 (1 << 1) | /* must be tiled */
170 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
172 (0 << 16) | /* must be 0 for interleave U/V */
173 (mfc_context->surface_state.h_pitch)); /* y offset for U(cb) */
174 OUT_BCS_BATCH(batch, 0);
175 ADVANCE_BCS_BATCH(batch);
179 gen6_mfc_pipe_buf_addr_state(VADriverContextP ctx,
180 struct gen6_encoder_context *gen6_encoder_context,
181 struct intel_batchbuffer *batch)
183 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
187 batch = gen6_encoder_context->base.batch;
189 BEGIN_BCS_BATCH(batch, 24);
191 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
193 OUT_BCS_BATCH(batch, 0); /* pre output addr */
195 OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
196 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
197 0); /* post output addr */
199 OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
200 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
201 0); /* uncompressed data */
203 OUT_BCS_BATCH(batch, 0); /* StreamOut data*/
204 OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
205 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
207 OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
208 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
210 /* 7..22 Reference pictures*/
211 for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
212 if ( mfc_context->reference_surfaces[i].bo != NULL) {
213 OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
214 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
217 OUT_BCS_BATCH(batch, 0);
220 OUT_BCS_BATCH(batch, 0); /* no block status */
222 ADVANCE_BCS_BATCH(batch);
226 gen6_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
227 struct gen6_encoder_context *gen6_encoder_context,
228 struct intel_batchbuffer *batch)
230 struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
233 batch = gen6_encoder_context->base.batch;
235 BEGIN_BCS_BATCH(batch, 11);
237 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
238 OUT_BCS_BATCH(batch, 0);
239 OUT_BCS_BATCH(batch, 0);
240 /* MFX Indirect MV Object Base Address */
241 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
242 OUT_BCS_BATCH(batch, 0);
243 OUT_BCS_BATCH(batch, 0);
244 OUT_BCS_BATCH(batch, 0);
245 OUT_BCS_BATCH(batch, 0);
246 OUT_BCS_BATCH(batch, 0);
247 /*MFC Indirect PAK-BSE Object Base Address for Encoder*/
248 OUT_BCS_BATCH(batch, 0);
249 OUT_BCS_BATCH(batch, 0);
251 ADVANCE_BCS_BATCH(batch);
255 gen7_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
256 struct gen6_encoder_context *gen6_encoder_context,
257 struct intel_batchbuffer *batch)
259 struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
262 batch = gen6_encoder_context->base.batch;
264 BEGIN_BCS_BATCH(batch, 11);
266 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
267 OUT_BCS_BATCH(batch, 0);
268 OUT_BCS_BATCH(batch, 0);
269 /* MFX Indirect MV Object Base Address */
270 OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
271 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
272 OUT_BCS_BATCH(batch, 0);
273 OUT_BCS_BATCH(batch, 0);
274 OUT_BCS_BATCH(batch, 0);
275 OUT_BCS_BATCH(batch, 0);
276 /*MFC Indirect PAK-BSE Object Base Address for Encoder*/
277 OUT_BCS_BATCH(batch, 0);
278 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
280 ADVANCE_BCS_BATCH(batch);
284 gen6_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
285 struct gen6_encoder_context *gen6_encoder_context,
286 struct intel_batchbuffer *batch)
288 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
291 batch = gen6_encoder_context->base.batch;
293 BEGIN_BCS_BATCH(batch, 4);
295 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
296 OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
297 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
299 OUT_BCS_BATCH(batch, 0);
300 OUT_BCS_BATCH(batch, 0);
302 ADVANCE_BCS_BATCH(batch);
306 gen6_mfc_avc_img_state(VADriverContextP ctx,
307 struct gen6_encoder_context *gen6_encoder_context,
308 struct intel_batchbuffer *batch)
310 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
311 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
312 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
315 batch = gen6_encoder_context->base.batch;
317 BEGIN_BCS_BATCH(batch, 13);
318 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
320 ((width_in_mbs * height_in_mbs) & 0xFFFF));
322 (height_in_mbs << 16) |
323 (width_in_mbs << 0));
325 (0 << 24) | /*Second Chroma QP Offset*/
326 (0 << 16) | /*Chroma QP Offset*/
327 (0 << 14) | /*Max-bit conformance Intra flag*/
328 (0 << 13) | /*Max Macroblock size conformance Inter flag*/
329 (1 << 12) | /*Should always be written as "1" */
330 (0 << 10) | /*QM Preset FLag */
331 (0 << 8) | /*Image Structure*/
332 (0 << 0) ); /*Current Decoed Image Frame Store ID, reserved in Encode mode*/
334 (0 << 16) | /*Mininum Frame size*/
335 (0 << 15) | /*Disable reading of Macroblock Status Buffer*/
336 (0 << 14) | /*Load BitStream Pointer only once, 1 slic 1 frame*/
337 (0 << 13) | /*CABAC 0 word insertion test enable*/
338 (1 << 12) | /*MVUnpackedEnable,compliant to DXVA*/
339 (1 << 10) | /*Chroma Format IDC, 4:2:0*/
340 (1 << 7) | /*0:CAVLC encoding mode,1:CABAC*/
341 (0 << 6) | /*Only valid for VLD decoding mode*/
342 (0 << 5) | /*Constrained Intra Predition Flag, from PPS*/
343 (0 << 4) | /*Direct 8x8 inference flag*/
344 (0 << 3) | /*Only 8x8 IDCT Transform Mode Flag*/
345 (1 << 2) | /*Frame MB only flag*/
346 (0 << 1) | /*MBAFF mode is in active*/
347 (0 << 0) ); /*Field picture flag*/
348 OUT_BCS_BATCH(batch, 0); /*Mainly about MB rate control and debug, just ignoring*/
349 OUT_BCS_BATCH(batch, /*Inter and Intra Conformance Max size limit*/
350 (0xBB8 << 16) | /*InterMbMaxSz*/
351 (0xEE8) ); /*IntraMbMaxSz*/
352 OUT_BCS_BATCH(batch, 0); /*Reserved*/
353 OUT_BCS_BATCH(batch, 0); /*Slice QP Delta for bitrate control*/
354 OUT_BCS_BATCH(batch, 0); /*Slice QP Delta for bitrate control*/
355 OUT_BCS_BATCH(batch, 0x8C000000);
356 OUT_BCS_BATCH(batch, 0x00010000);
357 OUT_BCS_BATCH(batch, 0);
359 ADVANCE_BCS_BATCH(batch);
363 gen7_mfc_avc_img_state(VADriverContextP ctx,
364 struct gen6_encoder_context *gen6_encoder_context,
365 struct intel_batchbuffer *batch)
367 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
368 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
369 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
372 batch = gen6_encoder_context->base.batch;
374 BEGIN_BCS_BATCH(batch, 16);
375 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
377 ((width_in_mbs * height_in_mbs) & 0xFFFF));
379 ((height_in_mbs - 1) << 16) |
380 ((width_in_mbs - 1) << 0));
382 (0 << 24) | /* Second Chroma QP Offset */
383 (0 << 16) | /* Chroma QP Offset */
384 (0 << 14) | /* Max-bit conformance Intra flag */
385 (0 << 13) | /* Max Macroblock size conformance Inter flag */
386 (0 << 12) | /* FIXME: Weighted_Pred_Flag */
387 (0 << 10) | /* FIXME: Weighted_BiPred_Idc */
388 (0 << 8) | /* FIXME: Image Structure */
389 (0 << 0) ); /* Current Decoed Image Frame Store ID, reserved in Encode mode */
391 (0 << 16) | /* Mininum Frame size */
392 (0 << 15) | /* Disable reading of Macroblock Status Buffer */
393 (0 << 14) | /* Load BitStream Pointer only once, 1 slic 1 frame */
394 (0 << 13) | /* CABAC 0 word insertion test enable */
395 (1 << 12) | /* MVUnpackedEnable,compliant to DXVA */
396 (1 << 10) | /* Chroma Format IDC, 4:2:0 */
397 (0 << 9) | /* FIXME: MbMvFormatFlag */
398 (1 << 7) | /* 0:CAVLC encoding mode,1:CABAC */
399 (0 << 6) | /* Only valid for VLD decoding mode */
400 (0 << 5) | /* Constrained Intra Predition Flag, from PPS */
401 (0 << 4) | /* Direct 8x8 inference flag */
402 (0 << 3) | /* Only 8x8 IDCT Transform Mode Flag */
403 (1 << 2) | /* Frame MB only flag */
404 (0 << 1) | /* MBAFF mode is in active */
405 (0 << 0)); /* Field picture flag */
406 OUT_BCS_BATCH(batch, 0); /* Mainly about MB rate control and debug, just ignoring */
407 OUT_BCS_BATCH(batch, /* Inter and Intra Conformance Max size limit */
408 (0xBB8 << 16) | /* InterMbMaxSz */
409 (0xEE8) ); /* IntraMbMaxSz */
410 OUT_BCS_BATCH(batch, 0); /* Reserved */
411 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
412 OUT_BCS_BATCH(batch, 0); /* Slice QP Delta for bitrate control */
413 OUT_BCS_BATCH(batch, 0x8C000000);
414 OUT_BCS_BATCH(batch, 0x00010000);
415 OUT_BCS_BATCH(batch, 0);
416 OUT_BCS_BATCH(batch, 0);
417 OUT_BCS_BATCH(batch, 0);
418 OUT_BCS_BATCH(batch, 0);
420 ADVANCE_BCS_BATCH(batch);
423 static void gen6_mfc_avc_slice_state(VADriverContextP ctx,
425 struct gen6_encoder_context *gen6_encoder_context,
426 struct intel_batchbuffer *batch)
428 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
431 batch = gen6_encoder_context->base.batch;
433 BEGIN_BCS_BATCH(batch, 11);;
435 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
438 OUT_BCS_BATCH(batch, 2); /*Slice Type: I Slice*/
440 OUT_BCS_BATCH(batch, 0); /*Slice Type: P Slice*/
443 OUT_BCS_BATCH(batch, 0); /*no reference frames and pred_weight_table*/
445 OUT_BCS_BATCH(batch, 0x00010000); /*1 reference frame*/
447 OUT_BCS_BATCH(batch, (0<<24) | /*Enable deblocking operation*/
448 (26<<16) | /*Slice Quantization Parameter*/
450 OUT_BCS_BATCH(batch, 0); /*First MB X&Y , the postion of current slice*/
451 OUT_BCS_BATCH(batch, ( ((mfc_context->surface_state.height+15)/16) << 16) );
454 (0<<31) | /*RateControlCounterEnable = disable*/
455 (1<<30) | /*ResetRateControlCounter*/
456 (2<<28) | /*RC Triggle Mode = Loose Rate Control*/
457 (1<<19) | /*IsLastSlice*/
458 (0<<18) | /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
459 (0<<17) | /*HeaderPresentFlag*/
460 (1<<16) | /*SliceData PresentFlag*/
461 (0<<15) | /*TailPresentFlag*/
462 (1<<13) | /*RBSP NAL TYPE*/
463 (0<<12) ); /*CabacZeroWordInsertionEnable*/
465 OUT_BCS_RELOC(batch, mfc_context->mfc_indirect_pak_bse_object.bo,
466 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
467 mfc_context->mfc_indirect_pak_bse_object.offset);
469 OUT_BCS_BATCH(batch, 0);
470 OUT_BCS_BATCH(batch, 0);
471 OUT_BCS_BATCH(batch, 0);
473 ADVANCE_BCS_BATCH(batch);
475 static void gen6_mfc_avc_qm_state(VADriverContextP ctx,
476 struct gen6_encoder_context *gen6_encoder_context,
477 struct intel_batchbuffer *batch)
482 batch = gen6_encoder_context->base.batch;
484 BEGIN_BCS_BATCH(batch, 58);
486 OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | 56);
487 OUT_BCS_BATCH(batch, 0xFF ) ;
488 for( i = 0; i < 56; i++) {
489 OUT_BCS_BATCH(batch, 0x10101010);
492 ADVANCE_BCS_BATCH(batch);
495 static void gen6_mfc_avc_fqm_state(VADriverContextP ctx,
496 struct gen6_encoder_context *gen6_encoder_context,
497 struct intel_batchbuffer *batch)
502 batch = gen6_encoder_context->base.batch;
504 BEGIN_BCS_BATCH(batch, 113);
505 OUT_BCS_BATCH(batch, MFC_AVC_FQM_STATE | (113 - 2));
507 for(i = 0; i < 112;i++) {
508 OUT_BCS_BATCH(batch, 0x10001000);
511 ADVANCE_BCS_BATCH(batch);
515 gen7_mfc_qm_state(VADriverContextP ctx,
519 struct gen6_encoder_context *gen6_encoder_context,
520 struct intel_batchbuffer *batch)
522 unsigned int qm_buffer[16];
525 batch = gen6_encoder_context->base.batch;
527 assert(qm_length <= 16);
528 assert(sizeof(*qm) == 4);
529 memcpy(qm_buffer, qm, qm_length * 4);
531 BEGIN_BCS_BATCH(batch, 18);
532 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
533 OUT_BCS_BATCH(batch, qm_type << 0);
534 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
535 ADVANCE_BCS_BATCH(batch);
538 static void gen7_mfc_avc_qm_state(VADriverContextP ctx,
539 struct gen6_encoder_context *gen6_encoder_context,
540 struct intel_batchbuffer *batch)
542 unsigned int qm[16] = {
543 0x10101010, 0x10101010, 0x10101010, 0x10101010,
544 0x10101010, 0x10101010, 0x10101010, 0x10101010,
545 0x10101010, 0x10101010, 0x10101010, 0x10101010,
546 0x10101010, 0x10101010, 0x10101010, 0x10101010
549 gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, gen6_encoder_context, batch);
550 gen7_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, gen6_encoder_context, batch);
551 gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, gen6_encoder_context, batch);
552 gen7_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, gen6_encoder_context, batch);
556 gen7_mfc_fqm_state(VADriverContextP ctx,
560 struct gen6_encoder_context *gen6_encoder_context,
561 struct intel_batchbuffer *batch)
563 unsigned int fqm_buffer[32];
566 batch = gen6_encoder_context->base.batch;
568 assert(fqm_length <= 32);
569 assert(sizeof(*fqm) == 4);
570 memcpy(fqm_buffer, fqm, fqm_length * 4);
572 BEGIN_BCS_BATCH(batch, 34);
573 OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
574 OUT_BCS_BATCH(batch, fqm_type << 0);
575 intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
576 ADVANCE_BCS_BATCH(batch);
579 static void gen7_mfc_avc_fqm_state(VADriverContextP ctx,
580 struct gen6_encoder_context *gen6_encoder_context,
581 struct intel_batchbuffer *batch)
583 unsigned int qm[32] = {
584 0x10001000, 0x10001000, 0x10001000, 0x10001000,
585 0x10001000, 0x10001000, 0x10001000, 0x10001000,
586 0x10001000, 0x10001000, 0x10001000, 0x10001000,
587 0x10001000, 0x10001000, 0x10001000, 0x10001000,
588 0x10001000, 0x10001000, 0x10001000, 0x10001000,
589 0x10001000, 0x10001000, 0x10001000, 0x10001000,
590 0x10001000, 0x10001000, 0x10001000, 0x10001000,
591 0x10001000, 0x10001000, 0x10001000, 0x10001000
594 gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, gen6_encoder_context, batch);
595 gen7_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, gen6_encoder_context, batch);
596 gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, gen6_encoder_context, batch);
597 gen7_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, gen6_encoder_context, batch);
600 static void gen6_mfc_avc_ref_idx_state(VADriverContextP ctx,
601 struct gen6_encoder_context *gen6_encoder_context,
602 struct intel_batchbuffer *batch)
607 batch = gen6_encoder_context->base.batch;
609 BEGIN_BCS_BATCH(batch, 10);
611 OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
612 OUT_BCS_BATCH(batch, 0); //Select L0
614 OUT_BCS_BATCH(batch, 0x80808000); //Only 1 reference
615 for(i = 0; i < 7; i++) {
616 OUT_BCS_BATCH(batch, 0x80808080);
619 ADVANCE_BCS_BATCH(batch);
623 gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
624 struct gen6_encoder_context *gen6_encoder_context,
625 struct intel_batchbuffer *batch)
627 int len_in_dwords = 11;
630 batch = gen6_encoder_context->base.batch;
632 BEGIN_BCS_BATCH(batch, len_in_dwords);
634 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
635 OUT_BCS_BATCH(batch, 0);
636 OUT_BCS_BATCH(batch, 0);
638 (0 << 24) | /* PackedMvNum, Debug*/
639 (0 << 20) | /* No motion vector */
640 (1 << 19) | /* CbpDcY */
641 (1 << 18) | /* CbpDcU */
642 (1 << 17) | /* CbpDcV */
645 OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
646 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
647 OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
649 /*Stuff for Intra MB*/
650 OUT_BCS_BATCH(batch, msg[1]); /* We using Intra16x16 no 4x4 predmode*/
651 OUT_BCS_BATCH(batch, msg[2]);
652 OUT_BCS_BATCH(batch, msg[3]&0xFC);
654 OUT_BCS_BATCH(batch, 0x8040000); /*MaxSizeInWord and TargetSzieInWord*/
656 ADVANCE_BCS_BATCH(batch);
658 return len_in_dwords;
661 static int gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp, unsigned int offset,
662 struct gen6_encoder_context *gen6_encoder_context, struct intel_batchbuffer *batch)
664 int len_in_dwords = 11;
667 batch = gen6_encoder_context->base.batch;
669 BEGIN_BCS_BATCH(batch, len_in_dwords);
671 OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
673 OUT_BCS_BATCH(batch, 32); /* 32 MV*/
674 OUT_BCS_BATCH(batch, offset);
677 (1 << 24) | /* PackedMvNum, Debug*/
678 (4 << 20) | /* 8 MV, SNB don't use it*/
679 (1 << 19) | /* CbpDcY */
680 (1 << 18) | /* CbpDcU */
681 (1 << 17) | /* CbpDcV */
682 (0 << 15) | /* Transform8x8Flag = 0*/
683 (0 << 14) | /* Frame based*/
684 (0 << 13) | /* Inter MB */
685 (1 << 8) | /* MbType = P_L0_16x16 */
686 (0 << 7) | /* MBZ for frame */
688 (2 << 4) | /* MBZ for inter*/
690 (0 << 2) | /* SkipMbFlag */
691 (0 << 0)); /* InterMbMode */
693 OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
694 OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
695 OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
697 /*Stuff for Inter MB*/
698 OUT_BCS_BATCH(batch, 0x0);
699 OUT_BCS_BATCH(batch, 0x0);
700 OUT_BCS_BATCH(batch, 0x0);
702 OUT_BCS_BATCH(batch, 0xF0020000); /*MaxSizeInWord and TargetSzieInWord*/
704 ADVANCE_BCS_BATCH(batch);
706 return len_in_dwords;
709 static void gen6_mfc_init(VADriverContextP ctx,
710 struct encode_state *encode_state,
711 struct gen6_encoder_context *gen6_encoder_context)
713 struct i965_driver_data *i965 = i965_driver_data(ctx);
714 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
717 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
718 int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
720 /*Encode common setup for MFC*/
721 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
722 mfc_context->post_deblocking_output.bo = NULL;
724 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
725 mfc_context->pre_deblocking_output.bo = NULL;
727 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
728 mfc_context->uncompressed_picture_source.bo = NULL;
730 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
731 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
733 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
734 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
735 mfc_context->direct_mv_buffers[i].bo = NULL;
738 for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
739 if (mfc_context->reference_surfaces[i].bo != NULL)
740 dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
741 mfc_context->reference_surfaces[i].bo = NULL;
744 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
745 bo = dri_bo_alloc(i965->intel.bufmgr,
750 mfc_context->intra_row_store_scratch_buffer.bo = bo;
752 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
753 bo = dri_bo_alloc(i965->intel.bufmgr,
755 4 * width_in_mbs * 64, /* 4 * width_in_mbs * 64 */
758 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
760 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
761 bo = dri_bo_alloc(i965->intel.bufmgr,
763 128 * width_in_mbs, /* 2 * widht_in_mbs * 64 */
766 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
769 void gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
770 struct encode_state *encode_state,
771 struct gen6_encoder_context *gen6_encoder_context)
773 struct i965_driver_data *i965 = i965_driver_data(ctx);
774 struct intel_batchbuffer *main_batch = gen6_encoder_context->base.batch;
775 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
776 struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
777 VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
778 VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer; /* FIXME: multi slices */
779 unsigned int *msg = NULL, offset = 0;
780 int emit_new_state = 1, object_len_in_bytes;
781 int is_intra = pSliceParameter->slice_flags.bits.is_intra;
782 int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
783 int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
785 struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, width_in_mbs * height_in_mbs * 12 * 4 + 0x800);
787 intel_batchbuffer_start_atomic_bcs(batch, width_in_mbs * height_in_mbs * 12 * 4 + 0x700);
790 dri_bo_map(vme_context->vme_output.bo , 1);
791 msg = (unsigned int *)vme_context->vme_output.bo->virtual;
794 for (y = 0; y < height_in_mbs; y++) {
795 for (x = 0; x < width_in_mbs; x++) {
796 int last_mb = (y == (height_in_mbs-1)) && ( x == (width_in_mbs-1) );
797 int qp = pSequenceParameter->initial_qp;
799 if (emit_new_state) {
800 intel_batchbuffer_emit_mi_flush(batch);
802 if (IS_GEN7(i965->intel.device_id)) {
803 gen7_mfc_pipe_mode_select(ctx, MFX_FORMAT_AVC, gen6_encoder_context, batch);
804 gen7_mfc_surface_state(ctx, gen6_encoder_context, batch);
805 gen7_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context, batch);
807 gen6_mfc_pipe_mode_select(ctx, gen6_encoder_context, batch);
808 gen6_mfc_surface_state(ctx, gen6_encoder_context, batch);
809 gen6_mfc_ind_obj_base_addr_state(ctx, gen6_encoder_context, batch);
812 gen6_mfc_pipe_buf_addr_state(ctx, gen6_encoder_context, batch);
813 gen6_mfc_bsp_buf_base_addr_state(ctx, gen6_encoder_context, batch);
815 if (IS_GEN7(i965->intel.device_id)) {
816 gen7_mfc_avc_img_state(ctx, gen6_encoder_context, batch);
817 gen7_mfc_avc_qm_state(ctx, gen6_encoder_context, batch);
818 gen7_mfc_avc_fqm_state(ctx, gen6_encoder_context, batch);
820 gen6_mfc_avc_img_state(ctx, gen6_encoder_context, batch);
821 gen6_mfc_avc_qm_state(ctx, gen6_encoder_context, batch);
822 gen6_mfc_avc_fqm_state(ctx, gen6_encoder_context, batch);
825 gen6_mfc_avc_ref_idx_state(ctx, gen6_encoder_context, batch);
826 gen6_mfc_avc_slice_state(ctx, is_intra, gen6_encoder_context, batch);
832 object_len_in_bytes = gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, gen6_encoder_context, batch);
835 object_len_in_bytes = gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, offset, gen6_encoder_context, batch);
839 if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
840 intel_batchbuffer_end_atomic(batch);
841 intel_batchbuffer_flush(batch);
843 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
849 dri_bo_unmap(vme_context->vme_output.bo);
851 intel_batchbuffer_align(batch, 8);
853 BEGIN_BCS_BATCH(batch, 2);
854 OUT_BCS_BATCH(batch, 0);
855 OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
856 ADVANCE_BCS_BATCH(batch);
858 intel_batchbuffer_end_atomic(batch);
860 /* chain to the main batch buffer */
861 intel_batchbuffer_start_atomic_bcs(main_batch, 0x100);
862 intel_batchbuffer_emit_mi_flush(main_batch);
863 BEGIN_BCS_BATCH(main_batch, 2);
864 OUT_BCS_BATCH(main_batch, MI_BATCH_BUFFER_START | (1 << 8));
865 OUT_BCS_RELOC(main_batch,
867 I915_GEM_DOMAIN_COMMAND, 0,
869 ADVANCE_BCS_BATCH(main_batch);
870 intel_batchbuffer_end_atomic(main_batch);
873 intel_batchbuffer_free(batch);
876 static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx,
877 struct encode_state *encode_state,
878 struct gen6_encoder_context *gen6_encoder_context)
880 struct i965_driver_data *i965 = i965_driver_data(ctx);
881 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
882 struct object_surface *obj_surface;
883 struct object_buffer *obj_buffer;
885 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
886 VAStatus vaStatus = VA_STATUS_SUCCESS;
888 /*Setup all the input&output object*/
889 obj_surface = SURFACE(pPicParameter->reconstructed_picture);
891 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
892 mfc_context->post_deblocking_output.bo = obj_surface->bo;
893 dri_bo_reference(mfc_context->post_deblocking_output.bo);
895 mfc_context->surface_state.width = obj_surface->orig_width;
896 mfc_context->surface_state.height = obj_surface->orig_height;
897 mfc_context->surface_state.w_pitch = obj_surface->width;
898 mfc_context->surface_state.h_pitch = obj_surface->height;
900 obj_surface = SURFACE(pPicParameter->reference_picture);
902 if (obj_surface->bo != NULL) {
903 mfc_context->reference_surfaces[0].bo = obj_surface->bo;
904 dri_bo_reference(obj_surface->bo);
907 obj_surface = SURFACE(encode_state->current_render_target);
908 assert(obj_surface && obj_surface->bo);
909 mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
910 dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
912 obj_buffer = BUFFER (pPicParameter->coded_buf); /* FIXME: fix this later */
913 bo = obj_buffer->buffer_store->bo;
915 mfc_context->mfc_indirect_pak_bse_object.bo = bo;
916 mfc_context->mfc_indirect_pak_bse_object.offset = ALIGN(sizeof(VACodedBufferSegment), 64);
917 dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
919 /*Programing bcs pipeline*/
920 gen6_mfc_avc_pipeline_programing(ctx, encode_state, gen6_encoder_context); //filling the pipeline
925 static VAStatus gen6_mfc_run(VADriverContextP ctx,
926 struct encode_state *encode_state,
927 struct gen6_encoder_context *gen6_encoder_context)
929 struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
931 intel_batchbuffer_flush(batch); //run the pipeline
933 return VA_STATUS_SUCCESS;
936 static VAStatus gen6_mfc_stop(VADriverContextP ctx,
937 struct encode_state *encode_state,
938 struct gen6_encoder_context *gen6_encoder_context)
941 struct i965_driver_data *i965 = i965_driver_data(ctx);
942 struct gen6_mfc_context *mfc_context = &gen6_encoder_context->mfc_context;
944 VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
946 struct object_surface *obj_surface = SURFACE(pPicParameter->reconstructed_picture);
947 //struct object_surface *obj_surface = SURFACE(pPicParameter->reference_picture[0]);
948 //struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
949 my_debug(obj_surface);
953 return VA_STATUS_SUCCESS;
957 gen6_mfc_avc_encode_picture(VADriverContextP ctx,
958 struct encode_state *encode_state,
959 struct gen6_encoder_context *gen6_encoder_context)
961 gen6_mfc_init(ctx, encode_state, gen6_encoder_context);
962 gen6_mfc_avc_prepare(ctx, encode_state, gen6_encoder_context);
963 gen6_mfc_run(ctx, encode_state, gen6_encoder_context);
964 gen6_mfc_stop(ctx, encode_state, gen6_encoder_context);
966 return VA_STATUS_SUCCESS;
970 gen6_mfc_pipeline(VADriverContextP ctx,
972 struct encode_state *encode_state,
973 struct gen6_encoder_context *gen6_encoder_context)
978 case VAProfileH264Baseline:
979 vaStatus = gen6_mfc_avc_encode_picture(ctx, encode_state, gen6_encoder_context);
982 /* FIXME: add for other profile */
984 vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
991 Bool gen6_mfc_context_init(VADriverContextP ctx, struct gen6_mfc_context *mfc_context)
996 Bool gen6_mfc_context_destroy(struct gen6_mfc_context *mfc_context)
1000 dri_bo_unreference(mfc_context->post_deblocking_output.bo);
1001 mfc_context->post_deblocking_output.bo = NULL;
1003 dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
1004 mfc_context->pre_deblocking_output.bo = NULL;
1006 dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
1007 mfc_context->uncompressed_picture_source.bo = NULL;
1009 dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
1010 mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
1012 for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
1013 dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
1014 mfc_context->direct_mv_buffers[i].bo = NULL;
1017 dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
1018 mfc_context->intra_row_store_scratch_buffer.bo = NULL;
1020 dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
1021 mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1023 dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
1024 mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;