2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77 assert((obj_surface->size & 0x3f) == 0);
78 obj_surface->private_data = gen7_avc_surface;
81 gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
82 !pic_param->seq_fields.bits.direct_8x8_inference_flag);
84 if (gen7_avc_surface->dmv_top == NULL) {
85 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86 "direct mv w/r buffer",
87 width_in_mbs * height_in_mbs * 128,
89 assert(gen7_avc_surface->dmv_top);
92 if (gen7_avc_surface->dmv_bottom_flag &&
93 gen7_avc_surface->dmv_bottom == NULL) {
94 gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
95 "direct mv w/r buffer",
96 width_in_mbs * height_in_mbs * 128,
98 assert(gen7_avc_surface->dmv_bottom);
103 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
104 struct decode_state *decode_state,
106 struct gen7_mfd_context *gen7_mfd_context)
108 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
110 assert(standard_select == MFX_FORMAT_MPEG2 ||
111 standard_select == MFX_FORMAT_AVC ||
112 standard_select == MFX_FORMAT_VC1 ||
113 standard_select == MFX_FORMAT_JPEG ||
114 standard_select == MFX_FORMAT_VP8);
116 BEGIN_BCS_BATCH(batch, 5);
117 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
119 (MFX_LONG_MODE << 17) | /* Currently only support long format */
120 (MFD_MODE_VLD << 15) | /* VLD mode */
121 (0 << 10) | /* disable Stream-Out */
122 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
123 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
124 (0 << 5) | /* not in stitch mode */
125 (MFX_CODEC_DECODE << 4) | /* decoding mode */
126 (standard_select << 0));
128 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
129 (0 << 3) | /* terminate if AVC mbdata error occurs */
130 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
133 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
134 OUT_BCS_BATCH(batch, 0); /* reserved */
135 ADVANCE_BCS_BATCH(batch);
139 gen8_mfd_surface_state(VADriverContextP ctx,
140 struct decode_state *decode_state,
142 struct gen7_mfd_context *gen7_mfd_context)
144 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
145 struct object_surface *obj_surface = decode_state->render_object;
146 unsigned int y_cb_offset;
147 unsigned int y_cr_offset;
151 y_cb_offset = obj_surface->y_cb_offset;
152 y_cr_offset = obj_surface->y_cr_offset;
154 BEGIN_BCS_BATCH(batch, 6);
155 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
156 OUT_BCS_BATCH(batch, 0);
158 ((obj_surface->orig_height - 1) << 18) |
159 ((obj_surface->orig_width - 1) << 4));
161 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
162 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
163 (0 << 22) | /* surface object control state, ignored */
164 ((obj_surface->width - 1) << 3) | /* pitch */
165 (0 << 2) | /* must be 0 */
166 (1 << 1) | /* must be tiled */
167 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
169 (0 << 16) | /* X offset for U(Cb), must be 0 */
170 (y_cb_offset << 0)); /* Y offset for U(Cb) */
172 (0 << 16) | /* X offset for V(Cr), must be 0 */
173 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
174 ADVANCE_BCS_BATCH(batch);
178 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
179 struct decode_state *decode_state,
181 struct gen7_mfd_context *gen7_mfd_context)
183 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
186 BEGIN_BCS_BATCH(batch, 61);
187 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188 /* Pre-deblock 1-3 */
189 if (gen7_mfd_context->pre_deblocking_output.valid)
190 OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
191 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
194 OUT_BCS_BATCH(batch, 0);
196 OUT_BCS_BATCH(batch, 0);
197 OUT_BCS_BATCH(batch, 0);
198 /* Post-debloing 4-6 */
199 if (gen7_mfd_context->post_deblocking_output.valid)
200 OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
201 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204 OUT_BCS_BATCH(batch, 0);
206 OUT_BCS_BATCH(batch, 0);
207 OUT_BCS_BATCH(batch, 0);
209 /* uncompressed-video & stream out 7-12 */
210 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
211 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
212 OUT_BCS_BATCH(batch, 0);
213 OUT_BCS_BATCH(batch, 0);
214 OUT_BCS_BATCH(batch, 0);
215 OUT_BCS_BATCH(batch, 0);
217 /* intra row-store scratch 13-15 */
218 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
219 OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
220 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
223 OUT_BCS_BATCH(batch, 0);
225 OUT_BCS_BATCH(batch, 0);
226 OUT_BCS_BATCH(batch, 0);
227 /* deblocking-filter-row-store 16-18 */
228 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
229 OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
230 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
233 OUT_BCS_BATCH(batch, 0);
234 OUT_BCS_BATCH(batch, 0);
235 OUT_BCS_BATCH(batch, 0);
238 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
239 struct object_surface *obj_surface;
241 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
242 gen7_mfd_context->reference_surface[i].obj_surface &&
243 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
244 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
246 OUT_BCS_RELOC(batch, obj_surface->bo,
247 I915_GEM_DOMAIN_INSTRUCTION, 0,
250 OUT_BCS_BATCH(batch, 0);
253 OUT_BCS_BATCH(batch, 0);
256 /* reference property 51 */
257 OUT_BCS_BATCH(batch, 0);
259 /* Macroblock status & ILDB 52-57 */
260 OUT_BCS_BATCH(batch, 0);
261 OUT_BCS_BATCH(batch, 0);
262 OUT_BCS_BATCH(batch, 0);
263 OUT_BCS_BATCH(batch, 0);
264 OUT_BCS_BATCH(batch, 0);
265 OUT_BCS_BATCH(batch, 0);
267 /* the second Macroblock status 58-60 */
268 OUT_BCS_BATCH(batch, 0);
269 OUT_BCS_BATCH(batch, 0);
270 OUT_BCS_BATCH(batch, 0);
272 ADVANCE_BCS_BATCH(batch);
276 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
277 dri_bo *slice_data_bo,
279 struct gen7_mfd_context *gen7_mfd_context)
281 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
283 BEGIN_BCS_BATCH(batch, 26);
284 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
286 OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
287 OUT_BCS_BATCH(batch, 0);
288 OUT_BCS_BATCH(batch, 0);
289 /* Upper bound 4-5 */
290 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
291 OUT_BCS_BATCH(batch, 0);
293 /* MFX indirect MV 6-10 */
294 OUT_BCS_BATCH(batch, 0);
295 OUT_BCS_BATCH(batch, 0);
296 OUT_BCS_BATCH(batch, 0);
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
300 /* MFX IT_COFF 11-15 */
301 OUT_BCS_BATCH(batch, 0);
302 OUT_BCS_BATCH(batch, 0);
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
307 /* MFX IT_DBLK 16-20 */
308 OUT_BCS_BATCH(batch, 0);
309 OUT_BCS_BATCH(batch, 0);
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
314 /* MFX PAK_BSE object for encoder 21-25 */
315 OUT_BCS_BATCH(batch, 0);
316 OUT_BCS_BATCH(batch, 0);
317 OUT_BCS_BATCH(batch, 0);
318 OUT_BCS_BATCH(batch, 0);
319 OUT_BCS_BATCH(batch, 0);
321 ADVANCE_BCS_BATCH(batch);
325 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
326 struct decode_state *decode_state,
328 struct gen7_mfd_context *gen7_mfd_context)
330 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
332 BEGIN_BCS_BATCH(batch, 10);
333 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
335 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
336 OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
337 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
340 OUT_BCS_BATCH(batch, 0);
342 OUT_BCS_BATCH(batch, 0);
343 OUT_BCS_BATCH(batch, 0);
344 /* MPR Row Store Scratch buffer 4-6 */
345 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
346 OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
347 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
350 OUT_BCS_BATCH(batch, 0);
352 OUT_BCS_BATCH(batch, 0);
353 OUT_BCS_BATCH(batch, 0);
356 if (gen7_mfd_context->bitplane_read_buffer.valid)
357 OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
358 I915_GEM_DOMAIN_INSTRUCTION, 0,
361 OUT_BCS_BATCH(batch, 0);
362 OUT_BCS_BATCH(batch, 0);
363 OUT_BCS_BATCH(batch, 0);
364 ADVANCE_BCS_BATCH(batch);
368 gen8_mfd_qm_state(VADriverContextP ctx,
372 struct gen7_mfd_context *gen7_mfd_context)
374 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
375 unsigned int qm_buffer[16];
377 assert(qm_length <= 16 * 4);
378 memcpy(qm_buffer, qm, qm_length);
380 BEGIN_BCS_BATCH(batch, 18);
381 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
382 OUT_BCS_BATCH(batch, qm_type << 0);
383 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
384 ADVANCE_BCS_BATCH(batch);
388 gen8_mfd_avc_img_state(VADriverContextP ctx,
389 struct decode_state *decode_state,
390 struct gen7_mfd_context *gen7_mfd_context)
392 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
394 int mbaff_frame_flag;
395 unsigned int width_in_mbs, height_in_mbs;
396 VAPictureParameterBufferH264 *pic_param;
398 assert(decode_state->pic_param && decode_state->pic_param->buffer);
399 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
400 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
402 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
404 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
409 if ((img_struct & 0x1) == 0x1) {
410 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
412 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
415 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
416 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
417 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
419 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
422 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
423 !pic_param->pic_fields.bits.field_pic_flag);
425 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
426 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
428 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
429 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
430 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
431 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
433 BEGIN_BCS_BATCH(batch, 17);
434 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
436 (width_in_mbs * height_in_mbs - 1));
438 ((height_in_mbs - 1) << 16) |
439 ((width_in_mbs - 1) << 0));
441 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
442 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
443 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
444 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
445 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
446 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
449 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
450 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
451 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
452 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
453 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
454 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
455 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
456 (mbaff_frame_flag << 1) |
457 (pic_param->pic_fields.bits.field_pic_flag << 0));
458 OUT_BCS_BATCH(batch, 0);
459 OUT_BCS_BATCH(batch, 0);
460 OUT_BCS_BATCH(batch, 0);
461 OUT_BCS_BATCH(batch, 0);
462 OUT_BCS_BATCH(batch, 0);
463 OUT_BCS_BATCH(batch, 0);
464 OUT_BCS_BATCH(batch, 0);
465 OUT_BCS_BATCH(batch, 0);
466 OUT_BCS_BATCH(batch, 0);
467 OUT_BCS_BATCH(batch, 0);
468 OUT_BCS_BATCH(batch, 0);
469 OUT_BCS_BATCH(batch, 0);
470 ADVANCE_BCS_BATCH(batch);
474 gen8_mfd_avc_qm_state(VADriverContextP ctx,
475 struct decode_state *decode_state,
476 struct gen7_mfd_context *gen7_mfd_context)
478 VAIQMatrixBufferH264 *iq_matrix;
479 VAPictureParameterBufferH264 *pic_param;
481 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
482 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
484 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
486 assert(decode_state->pic_param && decode_state->pic_param->buffer);
487 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
489 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
490 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
492 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
493 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
494 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
499 gen8_mfd_avc_picid_state(VADriverContextP ctx,
500 struct decode_state *decode_state,
501 struct gen7_mfd_context *gen7_mfd_context)
503 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
505 BEGIN_BCS_BATCH(batch, 10);
506 OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
507 OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
508 OUT_BCS_BATCH(batch, 0);
509 OUT_BCS_BATCH(batch, 0);
510 OUT_BCS_BATCH(batch, 0);
511 OUT_BCS_BATCH(batch, 0);
512 OUT_BCS_BATCH(batch, 0);
513 OUT_BCS_BATCH(batch, 0);
514 OUT_BCS_BATCH(batch, 0);
515 OUT_BCS_BATCH(batch, 0);
516 ADVANCE_BCS_BATCH(batch);
520 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
521 struct decode_state *decode_state,
522 VAPictureParameterBufferH264 *pic_param,
523 VASliceParameterBufferH264 *slice_param,
524 struct gen7_mfd_context *gen7_mfd_context)
526 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527 struct object_surface *obj_surface;
528 GenAvcSurface *gen7_avc_surface;
529 VAPictureH264 *va_pic;
532 BEGIN_BCS_BATCH(batch, 71);
533 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
535 /* reference surfaces 0..15 */
536 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538 gen7_mfd_context->reference_surface[i].obj_surface &&
539 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
541 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542 gen7_avc_surface = obj_surface->private_data;
544 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
545 I915_GEM_DOMAIN_INSTRUCTION, 0,
547 OUT_BCS_BATCH(batch, 0);
549 OUT_BCS_BATCH(batch, 0);
550 OUT_BCS_BATCH(batch, 0);
554 OUT_BCS_BATCH(batch, 0);
556 /* the current decoding frame/field */
557 va_pic = &pic_param->CurrPic;
558 obj_surface = decode_state->render_object;
559 assert(obj_surface->bo && obj_surface->private_data);
560 gen7_avc_surface = obj_surface->private_data;
562 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
563 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
566 OUT_BCS_BATCH(batch, 0);
567 OUT_BCS_BATCH(batch, 0);
570 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
571 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
574 assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
576 for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
577 va_pic = &pic_param->ReferenceFrames[j];
579 if (va_pic->flags & VA_PICTURE_H264_INVALID)
582 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
589 assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
591 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
592 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
594 OUT_BCS_BATCH(batch, 0);
595 OUT_BCS_BATCH(batch, 0);
599 va_pic = &pic_param->CurrPic;
600 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
601 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
603 ADVANCE_BCS_BATCH(batch);
607 gen8_mfd_avc_slice_state(VADriverContextP ctx,
608 VAPictureParameterBufferH264 *pic_param,
609 VASliceParameterBufferH264 *slice_param,
610 VASliceParameterBufferH264 *next_slice_param,
611 struct gen7_mfd_context *gen7_mfd_context)
613 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
614 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
615 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
616 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
617 int num_ref_idx_l0, num_ref_idx_l1;
618 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
619 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
620 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
623 if (slice_param->slice_type == SLICE_TYPE_I ||
624 slice_param->slice_type == SLICE_TYPE_SI) {
625 slice_type = SLICE_TYPE_I;
626 } else if (slice_param->slice_type == SLICE_TYPE_P ||
627 slice_param->slice_type == SLICE_TYPE_SP) {
628 slice_type = SLICE_TYPE_P;
630 assert(slice_param->slice_type == SLICE_TYPE_B);
631 slice_type = SLICE_TYPE_B;
634 if (slice_type == SLICE_TYPE_I) {
635 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
636 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
639 } else if (slice_type == SLICE_TYPE_P) {
640 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
641 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
644 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
645 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
648 first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
649 slice_hor_pos = first_mb_in_slice % width_in_mbs;
650 slice_ver_pos = first_mb_in_slice / width_in_mbs;
652 if (next_slice_param) {
653 first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
654 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
655 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
657 next_slice_hor_pos = 0;
658 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
661 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663 OUT_BCS_BATCH(batch, slice_type);
665 (num_ref_idx_l1 << 24) |
666 (num_ref_idx_l0 << 16) |
667 (slice_param->chroma_log2_weight_denom << 8) |
668 (slice_param->luma_log2_weight_denom << 0));
670 (slice_param->direct_spatial_mv_pred_flag << 29) |
671 (slice_param->disable_deblocking_filter_idc << 27) |
672 (slice_param->cabac_init_idc << 24) |
673 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
677 (slice_ver_pos << 24) |
678 (slice_hor_pos << 16) |
679 (first_mb_in_slice << 0));
681 (next_slice_ver_pos << 16) |
682 (next_slice_hor_pos << 0));
684 (next_slice_param == NULL) << 19); /* last slice flag */
685 OUT_BCS_BATCH(batch, 0);
686 OUT_BCS_BATCH(batch, 0);
687 OUT_BCS_BATCH(batch, 0);
688 OUT_BCS_BATCH(batch, 0);
689 ADVANCE_BCS_BATCH(batch);
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694 VAPictureParameterBufferH264 *pic_param,
695 VASliceParameterBufferH264 *slice_param,
696 struct gen7_mfd_context *gen7_mfd_context)
698 gen6_send_avc_ref_idx_state(
699 gen7_mfd_context->base.batch,
701 gen7_mfd_context->reference_surface
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707 VAPictureParameterBufferH264 *pic_param,
708 VASliceParameterBufferH264 *slice_param,
709 struct gen7_mfd_context *gen7_mfd_context)
711 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712 int i, j, num_weight_offset_table = 0;
713 short weightoffsets[32 * 6];
715 if ((slice_param->slice_type == SLICE_TYPE_P ||
716 slice_param->slice_type == SLICE_TYPE_SP) &&
717 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718 num_weight_offset_table = 1;
721 if ((slice_param->slice_type == SLICE_TYPE_B) &&
722 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723 num_weight_offset_table = 2;
726 for (i = 0; i < num_weight_offset_table; i++) {
727 BEGIN_BCS_BATCH(batch, 98);
728 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729 OUT_BCS_BATCH(batch, i);
732 for (j = 0; j < 32; j++) {
733 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
741 for (j = 0; j < 32; j++) {
742 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
751 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752 ADVANCE_BCS_BATCH(batch);
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758 VAPictureParameterBufferH264 *pic_param,
759 VASliceParameterBufferH264 *slice_param,
760 dri_bo *slice_data_bo,
761 VASliceParameterBufferH264 *next_slice_param,
762 struct gen7_mfd_context *gen7_mfd_context)
764 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
767 pic_param->pic_fields.bits.entropy_coding_mode_flag);
769 /* the input bitsteam format on GEN7 differs from GEN6 */
770 BEGIN_BCS_BATCH(batch, 6);
771 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
773 (slice_param->slice_data_size));
774 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
782 ((slice_data_bit_offset >> 3) << 16) |
786 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787 (slice_data_bit_offset & 0x7));
788 OUT_BCS_BATCH(batch, 0);
789 ADVANCE_BCS_BATCH(batch);
793 gen8_mfd_avc_context_init(
794 VADriverContextP ctx,
795 struct gen7_mfd_context *gen7_mfd_context
798 /* Initialize flat scaling lists */
799 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804 struct decode_state *decode_state,
805 struct gen7_mfd_context *gen7_mfd_context)
807 VAPictureParameterBufferH264 *pic_param;
808 VASliceParameterBufferH264 *slice_param;
809 struct i965_driver_data *i965 = i965_driver_data(ctx);
810 struct object_surface *obj_surface;
812 int i, j, enable_avc_ildb = 0;
813 unsigned int width_in_mbs, height_in_mbs;
815 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
819 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821 assert((slice_param->slice_type == SLICE_TYPE_I) ||
822 (slice_param->slice_type == SLICE_TYPE_SI) ||
823 (slice_param->slice_type == SLICE_TYPE_P) ||
824 (slice_param->slice_type == SLICE_TYPE_SP) ||
825 (slice_param->slice_type == SLICE_TYPE_B));
827 if (slice_param->disable_deblocking_filter_idc != 1) {
836 assert(decode_state->pic_param && decode_state->pic_param->buffer);
837 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838 intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
839 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
840 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
841 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
842 assert(height_in_mbs > 0 && height_in_mbs <= 256);
844 /* Current decoded picture */
845 obj_surface = decode_state->render_object;
846 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
847 obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
848 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
850 /* initial uv component for YUV400 case */
851 if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
852 unsigned int uv_offset = obj_surface->width * obj_surface->height;
853 unsigned int uv_size = obj_surface->width * obj_surface->height / 2;
855 drm_intel_gem_bo_map_gtt(obj_surface->bo);
856 memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
857 drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
860 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
862 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
863 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
864 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
865 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
867 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
868 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
869 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
870 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
872 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
873 bo = dri_bo_alloc(i965->intel.bufmgr,
878 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
879 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
881 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
882 bo = dri_bo_alloc(i965->intel.bufmgr,
883 "deblocking filter row store",
884 width_in_mbs * 64 * 4,
887 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
888 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
890 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
891 bo = dri_bo_alloc(i965->intel.bufmgr,
893 width_in_mbs * 64 * 2,
896 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
897 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
899 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
900 bo = dri_bo_alloc(i965->intel.bufmgr,
902 width_in_mbs * 64 * 2,
905 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
906 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
908 gen7_mfd_context->bitplane_read_buffer.valid = 0;
912 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
913 struct decode_state *decode_state,
914 struct gen7_mfd_context *gen7_mfd_context)
916 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
917 VAPictureParameterBufferH264 *pic_param;
918 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
919 dri_bo *slice_data_bo;
922 assert(decode_state->pic_param && decode_state->pic_param->buffer);
923 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
924 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
926 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
927 intel_batchbuffer_emit_mi_flush(batch);
928 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
929 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
930 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
931 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
932 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
933 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
934 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
936 for (j = 0; j < decode_state->num_slice_params; j++) {
937 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
938 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
939 slice_data_bo = decode_state->slice_datas[j]->bo;
940 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
942 if (j == decode_state->num_slice_params - 1)
943 next_slice_group_param = NULL;
945 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
947 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
948 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
949 assert((slice_param->slice_type == SLICE_TYPE_I) ||
950 (slice_param->slice_type == SLICE_TYPE_SI) ||
951 (slice_param->slice_type == SLICE_TYPE_P) ||
952 (slice_param->slice_type == SLICE_TYPE_SP) ||
953 (slice_param->slice_type == SLICE_TYPE_B));
955 if (i < decode_state->slice_params[j]->num_elements - 1)
956 next_slice_param = slice_param + 1;
958 next_slice_param = next_slice_group_param;
960 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
961 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
962 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
963 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
964 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
969 intel_batchbuffer_end_atomic(batch);
970 intel_batchbuffer_flush(batch);
974 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
975 struct decode_state *decode_state,
976 struct gen7_mfd_context *gen7_mfd_context)
978 VAPictureParameterBufferMPEG2 *pic_param;
979 struct i965_driver_data *i965 = i965_driver_data(ctx);
980 struct object_surface *obj_surface;
982 unsigned int width_in_mbs;
984 assert(decode_state->pic_param && decode_state->pic_param->buffer);
985 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
986 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
988 mpeg2_set_reference_surfaces(
990 gen7_mfd_context->reference_surface,
995 /* Current decoded picture */
996 obj_surface = decode_state->render_object;
997 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
999 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1000 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1001 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1002 gen7_mfd_context->pre_deblocking_output.valid = 1;
1004 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1005 bo = dri_bo_alloc(i965->intel.bufmgr,
1006 "bsd mpc row store",
1010 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1011 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1013 gen7_mfd_context->post_deblocking_output.valid = 0;
1014 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1015 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1016 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1017 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1021 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1022 struct decode_state *decode_state,
1023 struct gen7_mfd_context *gen7_mfd_context)
1025 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1026 VAPictureParameterBufferMPEG2 *pic_param;
1027 unsigned int slice_concealment_disable_bit = 0;
1029 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1030 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1032 slice_concealment_disable_bit = 1;
1034 BEGIN_BCS_BATCH(batch, 13);
1035 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1036 OUT_BCS_BATCH(batch,
1037 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1038 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1039 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1040 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1041 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1042 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1043 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1044 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1045 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1046 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1047 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1048 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1049 OUT_BCS_BATCH(batch,
1050 pic_param->picture_coding_type << 9);
1051 OUT_BCS_BATCH(batch,
1052 (slice_concealment_disable_bit << 31) |
1053 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1054 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1055 OUT_BCS_BATCH(batch, 0);
1056 OUT_BCS_BATCH(batch, 0);
1057 OUT_BCS_BATCH(batch, 0);
1058 OUT_BCS_BATCH(batch, 0);
1059 OUT_BCS_BATCH(batch, 0);
1060 OUT_BCS_BATCH(batch, 0);
1061 OUT_BCS_BATCH(batch, 0);
1062 OUT_BCS_BATCH(batch, 0);
1063 OUT_BCS_BATCH(batch, 0);
1064 ADVANCE_BCS_BATCH(batch);
1068 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1069 struct decode_state *decode_state,
1070 struct gen7_mfd_context *gen7_mfd_context)
1072 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1075 /* Update internal QM state */
1076 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1077 VAIQMatrixBufferMPEG2 * const iq_matrix =
1078 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1080 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1081 iq_matrix->load_intra_quantiser_matrix) {
1082 gen_iq_matrix->load_intra_quantiser_matrix =
1083 iq_matrix->load_intra_quantiser_matrix;
1084 if (iq_matrix->load_intra_quantiser_matrix) {
1085 for (j = 0; j < 64; j++)
1086 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1087 iq_matrix->intra_quantiser_matrix[j];
1091 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1092 iq_matrix->load_non_intra_quantiser_matrix) {
1093 gen_iq_matrix->load_non_intra_quantiser_matrix =
1094 iq_matrix->load_non_intra_quantiser_matrix;
1095 if (iq_matrix->load_non_intra_quantiser_matrix) {
1096 for (j = 0; j < 64; j++)
1097 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1098 iq_matrix->non_intra_quantiser_matrix[j];
1103 /* Commit QM state to HW */
1104 for (i = 0; i < 2; i++) {
1105 unsigned char *qm = NULL;
1109 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1110 qm = gen_iq_matrix->intra_quantiser_matrix;
1111 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1114 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1115 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1116 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1123 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1128 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1129 VAPictureParameterBufferMPEG2 *pic_param,
1130 VASliceParameterBufferMPEG2 *slice_param,
1131 VASliceParameterBufferMPEG2 *next_slice_param,
1132 struct gen7_mfd_context *gen7_mfd_context)
1134 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1135 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1136 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1138 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1139 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1141 is_field_pic_wa = is_field_pic &&
1142 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1144 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1145 hpos0 = slice_param->slice_horizontal_position;
1147 if (next_slice_param == NULL) {
1148 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1151 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1152 hpos1 = next_slice_param->slice_horizontal_position;
1155 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1157 BEGIN_BCS_BATCH(batch, 5);
1158 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1159 OUT_BCS_BATCH(batch,
1160 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1161 OUT_BCS_BATCH(batch,
1162 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1163 OUT_BCS_BATCH(batch,
1167 (next_slice_param == NULL) << 5 |
1168 (next_slice_param == NULL) << 3 |
1169 (slice_param->macroblock_offset & 0x7));
1170 OUT_BCS_BATCH(batch,
1171 (slice_param->quantiser_scale_code << 24) |
1172 (vpos1 << 8 | hpos1));
1173 ADVANCE_BCS_BATCH(batch);
1177 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1178 struct decode_state *decode_state,
1179 struct gen7_mfd_context *gen7_mfd_context)
1181 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1182 VAPictureParameterBufferMPEG2 *pic_param;
1183 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1184 dri_bo *slice_data_bo;
1187 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1188 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1190 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1191 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1192 intel_batchbuffer_emit_mi_flush(batch);
1193 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1194 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1195 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1196 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1197 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1198 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1200 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1201 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1202 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1204 for (j = 0; j < decode_state->num_slice_params; j++) {
1205 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1206 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1207 slice_data_bo = decode_state->slice_datas[j]->bo;
1208 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1210 if (j == decode_state->num_slice_params - 1)
1211 next_slice_group_param = NULL;
1213 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1215 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1216 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1218 if (i < decode_state->slice_params[j]->num_elements - 1)
1219 next_slice_param = slice_param + 1;
1221 next_slice_param = next_slice_group_param;
1223 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1228 intel_batchbuffer_end_atomic(batch);
1229 intel_batchbuffer_flush(batch);
1232 static const int va_to_gen7_vc1_pic_type[5] = {
1236 GEN7_VC1_BI_PICTURE,
1240 static const int va_to_gen7_vc1_mv[4] = {
1242 2, /* 1-MV half-pel */
1243 3, /* 1-MV half-pef bilinear */
1247 static const int b_picture_scale_factor[21] = {
1248 128, 85, 170, 64, 192,
1249 51, 102, 153, 204, 43,
1250 215, 37, 74, 111, 148,
1251 185, 222, 32, 96, 160,
1255 static const int va_to_gen7_vc1_condover[3] = {
1261 static const int va_to_gen7_vc1_profile[4] = {
1262 GEN7_VC1_SIMPLE_PROFILE,
1263 GEN7_VC1_MAIN_PROFILE,
1264 GEN7_VC1_RESERVED_PROFILE,
1265 GEN7_VC1_ADVANCED_PROFILE
1269 gen8_mfd_free_vc1_surface(void **data)
1271 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1273 if (!gen7_vc1_surface)
1276 dri_bo_unreference(gen7_vc1_surface->dmv);
1277 free(gen7_vc1_surface);
1282 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1283 VAPictureParameterBufferVC1 *pic_param,
1284 struct object_surface *obj_surface)
1286 struct i965_driver_data *i965 = i965_driver_data(ctx);
1287 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1288 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1289 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1291 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1293 if (!gen7_vc1_surface) {
1294 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1295 assert((obj_surface->size & 0x3f) == 0);
1296 obj_surface->private_data = gen7_vc1_surface;
1299 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1301 if (gen7_vc1_surface->dmv == NULL) {
1302 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1303 "direct mv w/r buffer",
1304 width_in_mbs * height_in_mbs * 64,
1310 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1311 struct decode_state *decode_state,
1312 struct gen7_mfd_context *gen7_mfd_context)
1314 VAPictureParameterBufferVC1 *pic_param;
1315 struct i965_driver_data *i965 = i965_driver_data(ctx);
1316 struct object_surface *obj_surface;
1321 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1322 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1323 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1324 picture_type = pic_param->picture_fields.bits.picture_type;
1326 intel_update_vc1_frame_store_index(ctx,
1329 gen7_mfd_context->reference_surface);
1331 /* Current decoded picture */
1332 obj_surface = decode_state->render_object;
1333 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1334 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1336 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1337 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1338 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1339 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1341 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1342 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1343 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1344 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1346 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1347 bo = dri_bo_alloc(i965->intel.bufmgr,
1352 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1353 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1355 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1356 bo = dri_bo_alloc(i965->intel.bufmgr,
1357 "deblocking filter row store",
1358 width_in_mbs * 7 * 64,
1361 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1362 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1364 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1365 bo = dri_bo_alloc(i965->intel.bufmgr,
1366 "bsd mpc row store",
1370 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1371 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1373 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1375 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1376 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1378 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1379 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1380 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1381 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1383 uint8_t *src = NULL, *dst = NULL;
1385 assert(decode_state->bit_plane->buffer);
1386 src = decode_state->bit_plane->buffer;
1388 bo = dri_bo_alloc(i965->intel.bufmgr,
1390 bitplane_width * height_in_mbs,
1393 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1395 dri_bo_map(bo, True);
1396 assert(bo->virtual);
1399 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1400 for(src_w = 0; src_w < width_in_mbs; src_w++) {
1401 int src_index, dst_index;
1405 src_index = (src_h * width_in_mbs + src_w) / 2;
1406 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1407 src_value = ((src[src_index] >> src_shift) & 0xf);
1409 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1413 dst_index = src_w / 2;
1414 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1418 dst[src_w / 2] >>= 4;
1420 dst += bitplane_width;
1425 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1429 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1430 struct decode_state *decode_state,
1431 struct gen7_mfd_context *gen7_mfd_context)
1433 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1434 VAPictureParameterBufferVC1 *pic_param;
1435 struct object_surface *obj_surface;
1436 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1437 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1438 int unified_mv_mode;
1439 int ref_field_pic_polarity = 0;
1440 int scale_factor = 0;
1442 int dmv_surface_valid = 0;
1448 int interpolation_mode = 0;
1450 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1451 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1453 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1454 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1455 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1456 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1457 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1458 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1459 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1460 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1463 alt_pquant_config = 0;
1464 alt_pquant_edge_mask = 0;
1465 } else if (dquant == 2) {
1466 alt_pquant_config = 1;
1467 alt_pquant_edge_mask = 0xf;
1469 assert(dquant == 1);
1470 if (dquantfrm == 0) {
1471 alt_pquant_config = 0;
1472 alt_pquant_edge_mask = 0;
1475 assert(dquantfrm == 1);
1476 alt_pquant_config = 1;
1478 switch (dqprofile) {
1480 if (dqbilevel == 0) {
1481 alt_pquant_config = 2;
1482 alt_pquant_edge_mask = 0;
1484 assert(dqbilevel == 1);
1485 alt_pquant_config = 3;
1486 alt_pquant_edge_mask = 0;
1491 alt_pquant_edge_mask = 0xf;
1496 alt_pquant_edge_mask = 0x9;
1498 alt_pquant_edge_mask = (0x3 << dqdbedge);
1503 alt_pquant_edge_mask = (0x1 << dqsbedge);
1512 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1513 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1514 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1516 assert(pic_param->mv_fields.bits.mv_mode < 4);
1517 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1520 if (pic_param->sequence_fields.bits.interlace == 1 &&
1521 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1522 /* FIXME: calculate reference field picture polarity */
1524 ref_field_pic_polarity = 0;
1527 if (pic_param->b_picture_fraction < 21)
1528 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1530 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1532 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1533 picture_type == GEN7_VC1_I_PICTURE)
1534 picture_type = GEN7_VC1_BI_PICTURE;
1536 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1537 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1539 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1542 * 8.3.6.2.1 Transform Type Selection
1543 * If variable-sized transform coding is not enabled,
1544 * then the 8x8 transform shall be used for all blocks.
1545 * it is also MFX_VC1_PIC_STATE requirement.
1547 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1548 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1549 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1553 if (picture_type == GEN7_VC1_B_PICTURE) {
1554 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1556 obj_surface = decode_state->reference_objects[1];
1559 gen7_vc1_surface = obj_surface->private_data;
1561 if (!gen7_vc1_surface ||
1562 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1563 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1564 dmv_surface_valid = 0;
1566 dmv_surface_valid = 1;
1569 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1571 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1572 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1574 if (pic_param->picture_fields.bits.top_field_first)
1580 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1581 brfd = pic_param->reference_fields.bits.reference_distance;
1582 brfd = (scale_factor * brfd) >> 8;
1583 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1590 if (profile != GEN7_VC1_ADVANCED_PROFILE){
1591 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1592 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1596 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1597 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1600 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1601 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1602 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1604 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1605 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1611 assert(pic_param->conditional_overlap_flag < 3);
1612 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1614 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1615 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1616 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1617 interpolation_mode = 9; /* Half-pel bilinear */
1618 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1619 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1620 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1621 interpolation_mode = 1; /* Half-pel bicubic */
1623 interpolation_mode = 0; /* Quarter-pel bicubic */
1625 BEGIN_BCS_BATCH(batch, 6);
1626 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1627 OUT_BCS_BATCH(batch,
1628 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1629 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1630 OUT_BCS_BATCH(batch,
1631 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1632 dmv_surface_valid << 15 |
1633 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1634 pic_param->rounding_control << 13 |
1635 pic_param->sequence_fields.bits.syncmarker << 12 |
1636 interpolation_mode << 8 |
1637 0 << 7 | /* FIXME: scale up or down ??? */
1638 pic_param->range_reduction_frame << 6 |
1639 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1641 !pic_param->picture_fields.bits.is_first_field << 3 |
1642 (pic_param->sequence_fields.bits.profile == 3) << 0);
1643 OUT_BCS_BATCH(batch,
1644 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1645 picture_type << 26 |
1648 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1650 OUT_BCS_BATCH(batch,
1651 unified_mv_mode << 28 |
1652 pic_param->mv_fields.bits.four_mv_switch << 27 |
1653 pic_param->fast_uvmc_flag << 26 |
1654 ref_field_pic_polarity << 25 |
1655 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1656 pic_param->reference_fields.bits.reference_distance << 20 |
1657 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1658 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1659 pic_param->mv_fields.bits.extended_mv_range << 8 |
1660 alt_pquant_edge_mask << 4 |
1661 alt_pquant_config << 2 |
1662 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1663 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1664 OUT_BCS_BATCH(batch,
1665 !!pic_param->bitplane_present.value << 31 |
1666 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1667 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1668 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1669 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1670 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1671 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1672 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1673 pic_param->mv_fields.bits.mv_table << 20 |
1674 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1675 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1676 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1677 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1678 pic_param->mb_mode_table << 8 |
1680 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1681 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1682 pic_param->cbp_table << 0);
1683 ADVANCE_BCS_BATCH(batch);
1687 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1688 struct decode_state *decode_state,
1689 struct gen7_mfd_context *gen7_mfd_context)
1691 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1692 VAPictureParameterBufferVC1 *pic_param;
1693 int intensitycomp_single;
1695 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1696 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1698 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1699 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1700 intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1702 BEGIN_BCS_BATCH(batch, 6);
1703 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1704 OUT_BCS_BATCH(batch,
1705 0 << 14 | /* FIXME: double ??? */
1707 intensitycomp_single << 10 |
1708 intensitycomp_single << 8 |
1709 0 << 4 | /* FIXME: interlace mode */
1711 OUT_BCS_BATCH(batch,
1712 pic_param->luma_shift << 16 |
1713 pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1714 OUT_BCS_BATCH(batch, 0);
1715 OUT_BCS_BATCH(batch, 0);
1716 OUT_BCS_BATCH(batch, 0);
1717 ADVANCE_BCS_BATCH(batch);
1721 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1722 struct decode_state *decode_state,
1723 struct gen7_mfd_context *gen7_mfd_context)
1725 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1726 struct object_surface *obj_surface;
1727 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1729 obj_surface = decode_state->render_object;
1731 if (obj_surface && obj_surface->private_data) {
1732 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1735 obj_surface = decode_state->reference_objects[1];
1737 if (obj_surface && obj_surface->private_data) {
1738 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1741 BEGIN_BCS_BATCH(batch, 7);
1742 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1744 if (dmv_write_buffer)
1745 OUT_BCS_RELOC(batch, dmv_write_buffer,
1746 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1749 OUT_BCS_BATCH(batch, 0);
1751 OUT_BCS_BATCH(batch, 0);
1752 OUT_BCS_BATCH(batch, 0);
1754 if (dmv_read_buffer)
1755 OUT_BCS_RELOC(batch, dmv_read_buffer,
1756 I915_GEM_DOMAIN_INSTRUCTION, 0,
1759 OUT_BCS_BATCH(batch, 0);
1761 OUT_BCS_BATCH(batch, 0);
1762 OUT_BCS_BATCH(batch, 0);
1764 ADVANCE_BCS_BATCH(batch);
1768 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1770 int out_slice_data_bit_offset;
1771 int slice_header_size = in_slice_data_bit_offset / 8;
1775 out_slice_data_bit_offset = in_slice_data_bit_offset;
1777 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1778 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1783 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1786 return out_slice_data_bit_offset;
1790 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1791 VAPictureParameterBufferVC1 *pic_param,
1792 VASliceParameterBufferVC1 *slice_param,
1793 VASliceParameterBufferVC1 *next_slice_param,
1794 dri_bo *slice_data_bo,
1795 struct gen7_mfd_context *gen7_mfd_context)
1797 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1798 int next_slice_start_vert_pos;
1799 int macroblock_offset;
1800 uint8_t *slice_data = NULL;
1802 dri_bo_map(slice_data_bo, 0);
1803 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1804 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1805 slice_param->macroblock_offset,
1806 pic_param->sequence_fields.bits.profile);
1807 dri_bo_unmap(slice_data_bo);
1809 if (next_slice_param)
1810 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1812 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1814 BEGIN_BCS_BATCH(batch, 5);
1815 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1816 OUT_BCS_BATCH(batch,
1817 slice_param->slice_data_size - (macroblock_offset >> 3));
1818 OUT_BCS_BATCH(batch,
1819 slice_param->slice_data_offset + (macroblock_offset >> 3));
1820 OUT_BCS_BATCH(batch,
1821 slice_param->slice_vertical_position << 16 |
1822 next_slice_start_vert_pos << 0);
1823 OUT_BCS_BATCH(batch,
1824 (macroblock_offset & 0x7));
1825 ADVANCE_BCS_BATCH(batch);
1829 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1830 struct decode_state *decode_state,
1831 struct gen7_mfd_context *gen7_mfd_context)
1833 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1834 VAPictureParameterBufferVC1 *pic_param;
1835 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1836 dri_bo *slice_data_bo;
1839 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1840 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1842 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1843 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1844 intel_batchbuffer_emit_mi_flush(batch);
1845 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1846 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1847 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1848 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1849 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1850 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1851 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1853 for (j = 0; j < decode_state->num_slice_params; j++) {
1854 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1855 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1856 slice_data_bo = decode_state->slice_datas[j]->bo;
1857 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1859 if (j == decode_state->num_slice_params - 1)
1860 next_slice_group_param = NULL;
1862 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1864 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1865 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1867 if (i < decode_state->slice_params[j]->num_elements - 1)
1868 next_slice_param = slice_param + 1;
1870 next_slice_param = next_slice_group_param;
1872 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1877 intel_batchbuffer_end_atomic(batch);
1878 intel_batchbuffer_flush(batch);
1882 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1883 struct decode_state *decode_state,
1884 struct gen7_mfd_context *gen7_mfd_context)
1886 struct object_surface *obj_surface;
1887 VAPictureParameterBufferJPEGBaseline *pic_param;
1888 int subsampling = SUBSAMPLE_YUV420;
1889 int fourcc = VA_FOURCC_IMC3;
1891 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1893 if (pic_param->num_components == 1)
1894 subsampling = SUBSAMPLE_YUV400;
1895 else if (pic_param->num_components == 3) {
1896 int h1 = pic_param->components[0].h_sampling_factor;
1897 int h2 = pic_param->components[1].h_sampling_factor;
1898 int h3 = pic_param->components[2].h_sampling_factor;
1899 int v1 = pic_param->components[0].v_sampling_factor;
1900 int v2 = pic_param->components[1].v_sampling_factor;
1901 int v3 = pic_param->components[2].v_sampling_factor;
1903 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1904 v1 == 2 && v2 == 1 && v3 == 1) {
1905 subsampling = SUBSAMPLE_YUV420;
1906 fourcc = VA_FOURCC_IMC3;
1907 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1908 v1 == 1 && v2 == 1 && v3 == 1) {
1909 subsampling = SUBSAMPLE_YUV422H;
1910 fourcc = VA_FOURCC_422H;
1911 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1912 v1 == 1 && v2 == 1 && v3 == 1) {
1913 subsampling = SUBSAMPLE_YUV444;
1914 fourcc = VA_FOURCC_444P;
1915 } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1916 v1 == 1 && v2 == 1 && v3 == 1) {
1917 subsampling = SUBSAMPLE_YUV411;
1918 fourcc = VA_FOURCC_411P;
1919 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1920 v1 == 2 && v2 == 1 && v3 == 1) {
1921 subsampling = SUBSAMPLE_YUV422V;
1922 fourcc = VA_FOURCC_422V;
1923 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1924 v1 == 2 && v2 == 2 && v3 == 2) {
1925 subsampling = SUBSAMPLE_YUV422H;
1926 fourcc = VA_FOURCC_422H;
1927 } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1928 v1 == 2 && v2 == 1 && v3 == 1) {
1929 subsampling = SUBSAMPLE_YUV422V;
1930 fourcc = VA_FOURCC_422V;
1938 /* Current decoded picture */
1939 obj_surface = decode_state->render_object;
1940 i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1942 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1943 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1944 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1945 gen7_mfd_context->pre_deblocking_output.valid = 1;
1947 gen7_mfd_context->post_deblocking_output.bo = NULL;
1948 gen7_mfd_context->post_deblocking_output.valid = 0;
1950 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1951 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1953 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1954 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1956 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1957 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1959 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1960 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1962 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1963 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1966 static const int va_to_gen7_jpeg_rotation[4] = {
1967 GEN7_JPEG_ROTATION_0,
1968 GEN7_JPEG_ROTATION_90,
1969 GEN7_JPEG_ROTATION_180,
1970 GEN7_JPEG_ROTATION_270
1974 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1975 struct decode_state *decode_state,
1976 struct gen7_mfd_context *gen7_mfd_context)
1978 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1979 VAPictureParameterBufferJPEGBaseline *pic_param;
1980 int chroma_type = GEN7_YUV420;
1981 int frame_width_in_blks;
1982 int frame_height_in_blks;
1984 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1985 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1987 if (pic_param->num_components == 1)
1988 chroma_type = GEN7_YUV400;
1989 else if (pic_param->num_components == 3) {
1990 int h1 = pic_param->components[0].h_sampling_factor;
1991 int h2 = pic_param->components[1].h_sampling_factor;
1992 int h3 = pic_param->components[2].h_sampling_factor;
1993 int v1 = pic_param->components[0].v_sampling_factor;
1994 int v2 = pic_param->components[1].v_sampling_factor;
1995 int v3 = pic_param->components[2].v_sampling_factor;
1997 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1998 v1 == 2 && v2 == 1 && v3 == 1)
1999 chroma_type = GEN7_YUV420;
2000 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2001 v1 == 1 && v2 == 1 && v3 == 1)
2002 chroma_type = GEN7_YUV422H_2Y;
2003 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2004 v1 == 1 && v2 == 1 && v3 == 1)
2005 chroma_type = GEN7_YUV444;
2006 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2007 v1 == 1 && v2 == 1 && v3 == 1)
2008 chroma_type = GEN7_YUV411;
2009 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2010 v1 == 2 && v2 == 1 && v3 == 1)
2011 chroma_type = GEN7_YUV422V_2Y;
2012 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2013 v1 == 2 && v2 == 2 && v3 == 2)
2014 chroma_type = GEN7_YUV422H_4Y;
2015 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2016 v1 == 2 && v2 == 1 && v3 == 1)
2017 chroma_type = GEN7_YUV422V_4Y;
2022 if (chroma_type == GEN7_YUV400 ||
2023 chroma_type == GEN7_YUV444 ||
2024 chroma_type == GEN7_YUV422V_2Y) {
2025 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2026 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2027 } else if (chroma_type == GEN7_YUV411) {
2028 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2029 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2031 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2032 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2035 BEGIN_BCS_BATCH(batch, 3);
2036 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2037 OUT_BCS_BATCH(batch,
2038 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2039 (chroma_type << 0));
2040 OUT_BCS_BATCH(batch,
2041 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2042 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2043 ADVANCE_BCS_BATCH(batch);
2046 static const int va_to_gen7_jpeg_hufftable[2] = {
2052 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2053 struct decode_state *decode_state,
2054 struct gen7_mfd_context *gen7_mfd_context,
2057 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2058 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2061 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2064 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2066 for (index = 0; index < num_tables; index++) {
2067 int id = va_to_gen7_jpeg_hufftable[index];
2068 if (!huffman_table->load_huffman_table[index])
2070 BEGIN_BCS_BATCH(batch, 53);
2071 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2072 OUT_BCS_BATCH(batch, id);
2073 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2074 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2075 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2076 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2077 ADVANCE_BCS_BATCH(batch);
2081 static const int va_to_gen7_jpeg_qm[5] = {
2083 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2084 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2085 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2086 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2090 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2091 struct decode_state *decode_state,
2092 struct gen7_mfd_context *gen7_mfd_context)
2094 VAPictureParameterBufferJPEGBaseline *pic_param;
2095 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2098 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2101 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2102 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2104 assert(pic_param->num_components <= 3);
2106 for (index = 0; index < pic_param->num_components; index++) {
2107 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2109 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2110 unsigned char raster_qm[64];
2113 if (id > 4 || id < 1)
2116 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2119 qm_type = va_to_gen7_jpeg_qm[id];
2121 for (j = 0; j < 64; j++)
2122 raster_qm[zigzag_direct[j]] = qm[j];
2124 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2129 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2130 VAPictureParameterBufferJPEGBaseline *pic_param,
2131 VASliceParameterBufferJPEGBaseline *slice_param,
2132 VASliceParameterBufferJPEGBaseline *next_slice_param,
2133 dri_bo *slice_data_bo,
2134 struct gen7_mfd_context *gen7_mfd_context)
2136 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2137 int scan_component_mask = 0;
2140 assert(slice_param->num_components > 0);
2141 assert(slice_param->num_components < 4);
2142 assert(slice_param->num_components <= pic_param->num_components);
2144 for (i = 0; i < slice_param->num_components; i++) {
2145 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2147 scan_component_mask |= (1 << 0);
2150 scan_component_mask |= (1 << 1);
2153 scan_component_mask |= (1 << 2);
2161 BEGIN_BCS_BATCH(batch, 6);
2162 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2163 OUT_BCS_BATCH(batch,
2164 slice_param->slice_data_size);
2165 OUT_BCS_BATCH(batch,
2166 slice_param->slice_data_offset);
2167 OUT_BCS_BATCH(batch,
2168 slice_param->slice_horizontal_position << 16 |
2169 slice_param->slice_vertical_position << 0);
2170 OUT_BCS_BATCH(batch,
2171 ((slice_param->num_components != 1) << 30) | /* interleaved */
2172 (scan_component_mask << 27) | /* scan components */
2173 (0 << 26) | /* disable interrupt allowed */
2174 (slice_param->num_mcus << 0)); /* MCU count */
2175 OUT_BCS_BATCH(batch,
2176 (slice_param->restart_interval << 0)); /* RestartInterval */
2177 ADVANCE_BCS_BATCH(batch);
2180 /* Workaround for JPEG decoding on Ivybridge */
2184 i965_CreateSurfaces(VADriverContextP ctx,
2189 VASurfaceID *surfaces);
2194 unsigned char data[32];
2196 int data_bit_offset;
2198 } gen7_jpeg_wa_clip = {
2202 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2203 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2211 gen8_jpeg_wa_init(VADriverContextP ctx,
2212 struct gen7_mfd_context *gen7_mfd_context)
2214 struct i965_driver_data *i965 = i965_driver_data(ctx);
2216 struct object_surface *obj_surface;
2218 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2219 i965_DestroySurfaces(ctx,
2220 &gen7_mfd_context->jpeg_wa_surface_id,
2223 status = i965_CreateSurfaces(ctx,
2224 gen7_jpeg_wa_clip.width,
2225 gen7_jpeg_wa_clip.height,
2226 VA_RT_FORMAT_YUV420,
2228 &gen7_mfd_context->jpeg_wa_surface_id);
2229 assert(status == VA_STATUS_SUCCESS);
2231 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2232 assert(obj_surface);
2233 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2234 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2236 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2237 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2241 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2243 gen7_jpeg_wa_clip.data_size,
2244 gen7_jpeg_wa_clip.data);
2249 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2250 struct gen7_mfd_context *gen7_mfd_context)
2252 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2254 BEGIN_BCS_BATCH(batch, 5);
2255 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2256 OUT_BCS_BATCH(batch,
2257 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2258 (MFD_MODE_VLD << 15) | /* VLD mode */
2259 (0 << 10) | /* disable Stream-Out */
2260 (0 << 9) | /* Post Deblocking Output */
2261 (1 << 8) | /* Pre Deblocking Output */
2262 (0 << 5) | /* not in stitch mode */
2263 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2264 (MFX_FORMAT_AVC << 0));
2265 OUT_BCS_BATCH(batch,
2266 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2267 (0 << 3) | /* terminate if AVC mbdata error occurs */
2268 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2271 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2272 OUT_BCS_BATCH(batch, 0); /* reserved */
2273 ADVANCE_BCS_BATCH(batch);
2277 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2278 struct gen7_mfd_context *gen7_mfd_context)
2280 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2281 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2283 BEGIN_BCS_BATCH(batch, 6);
2284 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2285 OUT_BCS_BATCH(batch, 0);
2286 OUT_BCS_BATCH(batch,
2287 ((obj_surface->orig_width - 1) << 18) |
2288 ((obj_surface->orig_height - 1) << 4));
2289 OUT_BCS_BATCH(batch,
2290 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2291 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2292 (0 << 22) | /* surface object control state, ignored */
2293 ((obj_surface->width - 1) << 3) | /* pitch */
2294 (0 << 2) | /* must be 0 */
2295 (1 << 1) | /* must be tiled */
2296 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2297 OUT_BCS_BATCH(batch,
2298 (0 << 16) | /* X offset for U(Cb), must be 0 */
2299 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2300 OUT_BCS_BATCH(batch,
2301 (0 << 16) | /* X offset for V(Cr), must be 0 */
2302 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2303 ADVANCE_BCS_BATCH(batch);
2307 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2308 struct gen7_mfd_context *gen7_mfd_context)
2310 struct i965_driver_data *i965 = i965_driver_data(ctx);
2311 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2312 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2316 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2321 BEGIN_BCS_BATCH(batch, 61);
2322 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2323 OUT_BCS_RELOC(batch,
2325 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2327 OUT_BCS_BATCH(batch, 0);
2328 OUT_BCS_BATCH(batch, 0);
2331 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2332 OUT_BCS_BATCH(batch, 0);
2333 OUT_BCS_BATCH(batch, 0);
2335 /* uncompressed-video & stream out 7-12 */
2336 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2337 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2338 OUT_BCS_BATCH(batch, 0);
2339 OUT_BCS_BATCH(batch, 0);
2340 OUT_BCS_BATCH(batch, 0);
2341 OUT_BCS_BATCH(batch, 0);
2343 /* the DW 13-15 is for intra row store scratch */
2344 OUT_BCS_RELOC(batch,
2346 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2348 OUT_BCS_BATCH(batch, 0);
2349 OUT_BCS_BATCH(batch, 0);
2351 /* the DW 16-18 is for deblocking filter */
2352 OUT_BCS_BATCH(batch, 0);
2353 OUT_BCS_BATCH(batch, 0);
2354 OUT_BCS_BATCH(batch, 0);
2357 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2358 OUT_BCS_BATCH(batch, 0);
2359 OUT_BCS_BATCH(batch, 0);
2361 OUT_BCS_BATCH(batch, 0);
2363 /* the DW52-54 is for mb status address */
2364 OUT_BCS_BATCH(batch, 0);
2365 OUT_BCS_BATCH(batch, 0);
2366 OUT_BCS_BATCH(batch, 0);
2367 /* the DW56-60 is for ILDB & second ILDB address */
2368 OUT_BCS_BATCH(batch, 0);
2369 OUT_BCS_BATCH(batch, 0);
2370 OUT_BCS_BATCH(batch, 0);
2371 OUT_BCS_BATCH(batch, 0);
2372 OUT_BCS_BATCH(batch, 0);
2373 OUT_BCS_BATCH(batch, 0);
2375 ADVANCE_BCS_BATCH(batch);
2377 dri_bo_unreference(intra_bo);
2381 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2382 struct gen7_mfd_context *gen7_mfd_context)
2384 struct i965_driver_data *i965 = i965_driver_data(ctx);
2385 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2386 dri_bo *bsd_mpc_bo, *mpr_bo;
2388 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2389 "bsd mpc row store",
2390 11520, /* 1.5 * 120 * 64 */
2393 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2395 7680, /* 1. 0 * 120 * 64 */
2398 BEGIN_BCS_BATCH(batch, 10);
2399 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2401 OUT_BCS_RELOC(batch,
2403 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2406 OUT_BCS_BATCH(batch, 0);
2407 OUT_BCS_BATCH(batch, 0);
2409 OUT_BCS_RELOC(batch,
2411 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2413 OUT_BCS_BATCH(batch, 0);
2414 OUT_BCS_BATCH(batch, 0);
2416 OUT_BCS_BATCH(batch, 0);
2417 OUT_BCS_BATCH(batch, 0);
2418 OUT_BCS_BATCH(batch, 0);
2420 ADVANCE_BCS_BATCH(batch);
2422 dri_bo_unreference(bsd_mpc_bo);
2423 dri_bo_unreference(mpr_bo);
2427 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2428 struct gen7_mfd_context *gen7_mfd_context)
2434 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2435 struct gen7_mfd_context *gen7_mfd_context)
2437 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2439 int mbaff_frame_flag = 0;
2440 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2442 BEGIN_BCS_BATCH(batch, 16);
2443 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2444 OUT_BCS_BATCH(batch,
2445 width_in_mbs * height_in_mbs);
2446 OUT_BCS_BATCH(batch,
2447 ((height_in_mbs - 1) << 16) |
2448 ((width_in_mbs - 1) << 0));
2449 OUT_BCS_BATCH(batch,
2454 (0 << 12) | /* differ from GEN6 */
2457 OUT_BCS_BATCH(batch,
2458 (1 << 10) | /* 4:2:0 */
2459 (1 << 7) | /* CABAC */
2465 (mbaff_frame_flag << 1) |
2467 OUT_BCS_BATCH(batch, 0);
2468 OUT_BCS_BATCH(batch, 0);
2469 OUT_BCS_BATCH(batch, 0);
2470 OUT_BCS_BATCH(batch, 0);
2471 OUT_BCS_BATCH(batch, 0);
2472 OUT_BCS_BATCH(batch, 0);
2473 OUT_BCS_BATCH(batch, 0);
2474 OUT_BCS_BATCH(batch, 0);
2475 OUT_BCS_BATCH(batch, 0);
2476 OUT_BCS_BATCH(batch, 0);
2477 OUT_BCS_BATCH(batch, 0);
2478 ADVANCE_BCS_BATCH(batch);
2482 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2483 struct gen7_mfd_context *gen7_mfd_context)
2485 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2488 BEGIN_BCS_BATCH(batch, 71);
2489 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2491 /* reference surfaces 0..15 */
2492 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2493 OUT_BCS_BATCH(batch, 0); /* top */
2494 OUT_BCS_BATCH(batch, 0); /* bottom */
2497 OUT_BCS_BATCH(batch, 0);
2499 /* the current decoding frame/field */
2500 OUT_BCS_BATCH(batch, 0); /* top */
2501 OUT_BCS_BATCH(batch, 0);
2502 OUT_BCS_BATCH(batch, 0);
2505 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2506 OUT_BCS_BATCH(batch, 0);
2507 OUT_BCS_BATCH(batch, 0);
2510 OUT_BCS_BATCH(batch, 0);
2511 OUT_BCS_BATCH(batch, 0);
2513 ADVANCE_BCS_BATCH(batch);
2517 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2518 struct gen7_mfd_context *gen7_mfd_context)
2520 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2522 BEGIN_BCS_BATCH(batch, 11);
2523 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2524 OUT_BCS_RELOC(batch,
2525 gen7_mfd_context->jpeg_wa_slice_data_bo,
2526 I915_GEM_DOMAIN_INSTRUCTION, 0,
2528 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2529 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2530 OUT_BCS_BATCH(batch, 0);
2531 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2532 OUT_BCS_BATCH(batch, 0);
2533 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2534 OUT_BCS_BATCH(batch, 0);
2535 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2536 OUT_BCS_BATCH(batch, 0);
2537 ADVANCE_BCS_BATCH(batch);
2541 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2542 struct gen7_mfd_context *gen7_mfd_context)
2544 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2546 /* the input bitsteam format on GEN7 differs from GEN6 */
2547 BEGIN_BCS_BATCH(batch, 6);
2548 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2549 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2550 OUT_BCS_BATCH(batch, 0);
2551 OUT_BCS_BATCH(batch,
2557 OUT_BCS_BATCH(batch,
2558 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2561 (1 << 3) | /* LastSlice Flag */
2562 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2563 OUT_BCS_BATCH(batch, 0);
2564 ADVANCE_BCS_BATCH(batch);
2568 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2569 struct gen7_mfd_context *gen7_mfd_context)
2571 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2572 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2573 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2574 int first_mb_in_slice = 0;
2575 int slice_type = SLICE_TYPE_I;
2577 BEGIN_BCS_BATCH(batch, 11);
2578 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2579 OUT_BCS_BATCH(batch, slice_type);
2580 OUT_BCS_BATCH(batch,
2581 (num_ref_idx_l1 << 24) |
2582 (num_ref_idx_l0 << 16) |
2585 OUT_BCS_BATCH(batch,
2587 (1 << 27) | /* disable Deblocking */
2589 (gen7_jpeg_wa_clip.qp << 16) |
2592 OUT_BCS_BATCH(batch,
2593 (slice_ver_pos << 24) |
2594 (slice_hor_pos << 16) |
2595 (first_mb_in_slice << 0));
2596 OUT_BCS_BATCH(batch,
2597 (next_slice_ver_pos << 16) |
2598 (next_slice_hor_pos << 0));
2599 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2600 OUT_BCS_BATCH(batch, 0);
2601 OUT_BCS_BATCH(batch, 0);
2602 OUT_BCS_BATCH(batch, 0);
2603 OUT_BCS_BATCH(batch, 0);
2604 ADVANCE_BCS_BATCH(batch);
2608 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2609 struct gen7_mfd_context *gen7_mfd_context)
2611 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2612 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2613 intel_batchbuffer_emit_mi_flush(batch);
2614 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2615 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2616 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2617 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2618 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2619 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2620 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2622 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2623 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2624 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2630 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2631 struct decode_state *decode_state,
2632 struct gen7_mfd_context *gen7_mfd_context)
2634 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2635 VAPictureParameterBufferJPEGBaseline *pic_param;
2636 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2637 dri_bo *slice_data_bo;
2638 int i, j, max_selector = 0;
2640 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2641 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2643 /* Currently only support Baseline DCT */
2644 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2645 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2647 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2649 intel_batchbuffer_emit_mi_flush(batch);
2650 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2651 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2652 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2653 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2654 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2656 for (j = 0; j < decode_state->num_slice_params; j++) {
2657 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2658 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2659 slice_data_bo = decode_state->slice_datas[j]->bo;
2660 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2662 if (j == decode_state->num_slice_params - 1)
2663 next_slice_group_param = NULL;
2665 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2667 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2670 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2672 if (i < decode_state->slice_params[j]->num_elements - 1)
2673 next_slice_param = slice_param + 1;
2675 next_slice_param = next_slice_group_param;
2677 for (component = 0; component < slice_param->num_components; component++) {
2678 if (max_selector < slice_param->components[component].dc_table_selector)
2679 max_selector = slice_param->components[component].dc_table_selector;
2681 if (max_selector < slice_param->components[component].ac_table_selector)
2682 max_selector = slice_param->components[component].ac_table_selector;
2689 assert(max_selector < 2);
2690 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2692 for (j = 0; j < decode_state->num_slice_params; j++) {
2693 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2694 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2695 slice_data_bo = decode_state->slice_datas[j]->bo;
2696 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2698 if (j == decode_state->num_slice_params - 1)
2699 next_slice_group_param = NULL;
2701 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2703 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2704 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2706 if (i < decode_state->slice_params[j]->num_elements - 1)
2707 next_slice_param = slice_param + 1;
2709 next_slice_param = next_slice_group_param;
2711 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2716 intel_batchbuffer_end_atomic(batch);
2717 intel_batchbuffer_flush(batch);
2720 static const int vp8_dc_qlookup[128] =
2722 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
2723 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
2724 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
2725 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
2726 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
2727 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
2728 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2729 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2732 static const int vp8_ac_qlookup[128] =
2734 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
2735 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
2736 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
2737 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
2738 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
2739 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2740 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2741 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2744 static inline unsigned int vp8_clip_quantization_index(int index)
2755 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2756 struct decode_state *decode_state,
2757 struct gen7_mfd_context *gen7_mfd_context)
2759 struct object_surface *obj_surface;
2760 struct i965_driver_data *i965 = i965_driver_data(ctx);
2762 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2763 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2764 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2766 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2767 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2769 intel_update_vp8_frame_store_index(ctx,
2772 gen7_mfd_context->reference_surface);
2774 /* Current decoded picture */
2775 obj_surface = decode_state->render_object;
2776 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2778 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2779 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2780 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2781 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2783 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2784 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2785 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2786 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2788 /* The same as AVC */
2789 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2790 bo = dri_bo_alloc(i965->intel.bufmgr,
2795 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2796 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2798 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2799 bo = dri_bo_alloc(i965->intel.bufmgr,
2800 "deblocking filter row store",
2801 width_in_mbs * 64 * 4,
2804 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2805 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2807 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2808 bo = dri_bo_alloc(i965->intel.bufmgr,
2809 "bsd mpc row store",
2810 width_in_mbs * 64 * 2,
2813 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2814 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2816 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2817 bo = dri_bo_alloc(i965->intel.bufmgr,
2819 width_in_mbs * 64 * 2,
2822 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2823 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2825 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2829 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2830 struct decode_state *decode_state,
2831 struct gen7_mfd_context *gen7_mfd_context)
2833 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2834 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2835 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2836 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2837 dri_bo *probs_bo = decode_state->probability_data->bo;
2839 unsigned int quantization_value[4][6];
2841 log2num = (int)log2(slice_param->num_of_partitions - 1);
2843 BEGIN_BCS_BATCH(batch, 38);
2844 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2845 OUT_BCS_BATCH(batch,
2846 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2847 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2848 OUT_BCS_BATCH(batch,
2850 pic_param->pic_fields.bits.sharpness_level << 16 |
2851 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2852 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2853 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2854 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2855 pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2856 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2857 0 << 7 | /* segmentation id streamin disabled */
2858 0 << 6 | /* segmentation id streamout disabled */
2859 (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2860 pic_param->pic_fields.bits.filter_type << 4 |
2861 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2862 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2864 OUT_BCS_BATCH(batch,
2865 pic_param->loop_filter_level[3] << 24 |
2866 pic_param->loop_filter_level[2] << 16 |
2867 pic_param->loop_filter_level[1] << 8 |
2868 pic_param->loop_filter_level[0] << 0);
2870 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2871 for (i = 0; i < 4; i++) {
2872 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2873 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2874 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2875 /* 101581>>16 is equivalent to 155/100 */
2876 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2877 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2878 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2880 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2881 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2883 OUT_BCS_BATCH(batch,
2884 quantization_value[i][0] << 16 | /* Y1AC */
2885 quantization_value[i][1] << 0); /* Y1DC */
2886 OUT_BCS_BATCH(batch,
2887 quantization_value[i][5] << 16 | /* UVAC */
2888 quantization_value[i][4] << 0); /* UVDC */
2889 OUT_BCS_BATCH(batch,
2890 quantization_value[i][3] << 16 | /* Y2AC */
2891 quantization_value[i][2] << 0); /* Y2DC */
2894 /* CoeffProbability table for non-key frame, DW16-DW18 */
2896 OUT_BCS_RELOC(batch, probs_bo,
2897 0, I915_GEM_DOMAIN_INSTRUCTION,
2899 OUT_BCS_BATCH(batch, 0);
2900 OUT_BCS_BATCH(batch, 0);
2902 OUT_BCS_BATCH(batch, 0);
2903 OUT_BCS_BATCH(batch, 0);
2904 OUT_BCS_BATCH(batch, 0);
2907 OUT_BCS_BATCH(batch,
2908 pic_param->mb_segment_tree_probs[2] << 16 |
2909 pic_param->mb_segment_tree_probs[1] << 8 |
2910 pic_param->mb_segment_tree_probs[0] << 0);
2912 OUT_BCS_BATCH(batch,
2913 pic_param->prob_skip_false << 24 |
2914 pic_param->prob_intra << 16 |
2915 pic_param->prob_last << 8 |
2916 pic_param->prob_gf << 0);
2918 OUT_BCS_BATCH(batch,
2919 pic_param->y_mode_probs[3] << 24 |
2920 pic_param->y_mode_probs[2] << 16 |
2921 pic_param->y_mode_probs[1] << 8 |
2922 pic_param->y_mode_probs[0] << 0);
2924 OUT_BCS_BATCH(batch,
2925 pic_param->uv_mode_probs[2] << 16 |
2926 pic_param->uv_mode_probs[1] << 8 |
2927 pic_param->uv_mode_probs[0] << 0);
2929 /* MV update value, DW23-DW32 */
2930 for (i = 0; i < 2; i++) {
2931 for (j = 0; j < 20; j += 4) {
2932 OUT_BCS_BATCH(batch,
2933 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2934 pic_param->mv_probs[i][j + 2] << 16 |
2935 pic_param->mv_probs[i][j + 1] << 8 |
2936 pic_param->mv_probs[i][j + 0] << 0);
2940 OUT_BCS_BATCH(batch,
2941 (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2942 (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2943 (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 |
2944 (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0);
2946 OUT_BCS_BATCH(batch,
2947 (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2948 (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2949 (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 |
2950 (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0);
2952 /* segmentation id stream base address, DW35-DW37 */
2953 OUT_BCS_BATCH(batch, 0);
2954 OUT_BCS_BATCH(batch, 0);
2955 OUT_BCS_BATCH(batch, 0);
2956 ADVANCE_BCS_BATCH(batch);
2960 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2961 VAPictureParameterBufferVP8 *pic_param,
2962 VASliceParameterBufferVP8 *slice_param,
2963 dri_bo *slice_data_bo,
2964 struct gen7_mfd_context *gen7_mfd_context)
2966 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2968 unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2969 unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2970 unsigned int partition_size_0 = slice_param->partition_size[0];
2972 assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2973 if (used_bits == 8) {
2976 partition_size_0 -= 1;
2979 assert(slice_param->num_of_partitions >= 2);
2980 assert(slice_param->num_of_partitions <= 9);
2982 log2num = (int)log2(slice_param->num_of_partitions - 1);
2984 BEGIN_BCS_BATCH(batch, 22);
2985 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2986 OUT_BCS_BATCH(batch,
2987 used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2988 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
2990 (slice_param->macroblock_offset & 0x7));
2991 OUT_BCS_BATCH(batch,
2992 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2995 OUT_BCS_BATCH(batch, partition_size_0);
2996 OUT_BCS_BATCH(batch, offset);
2997 //partion sizes in bytes are present after the above first partition when there are more than one token partition
2998 offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
2999 for (i = 1; i < 9; i++) {
3000 if (i < slice_param->num_of_partitions) {
3001 OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
3002 OUT_BCS_BATCH(batch, offset);
3004 OUT_BCS_BATCH(batch, 0);
3005 OUT_BCS_BATCH(batch, 0);
3008 offset += slice_param->partition_size[i];
3011 OUT_BCS_BATCH(batch,
3012 1 << 31 | /* concealment method */
3015 ADVANCE_BCS_BATCH(batch);
3019 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3020 struct decode_state *decode_state,
3021 struct gen7_mfd_context *gen7_mfd_context)
3023 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3024 VAPictureParameterBufferVP8 *pic_param;
3025 VASliceParameterBufferVP8 *slice_param;
3026 dri_bo *slice_data_bo;
3028 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3029 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3031 /* one slice per frame */
3032 if (decode_state->num_slice_params != 1 ||
3033 (!decode_state->slice_params ||
3034 !decode_state->slice_params[0] ||
3035 (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3036 (!decode_state->slice_datas ||
3037 !decode_state->slice_datas[0] ||
3038 !decode_state->slice_datas[0]->bo) ||
3039 !decode_state->probability_data) {
3040 WARN_ONCE("Wrong parameters for VP8 decoding\n");
3045 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3046 slice_data_bo = decode_state->slice_datas[0]->bo;
3048 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3049 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3050 intel_batchbuffer_emit_mi_flush(batch);
3051 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3052 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3053 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3054 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3055 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3056 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3057 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3058 intel_batchbuffer_end_atomic(batch);
3059 intel_batchbuffer_flush(batch);
3063 gen8_mfd_decode_picture(VADriverContextP ctx,
3065 union codec_state *codec_state,
3066 struct hw_context *hw_context)
3069 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3070 struct decode_state *decode_state = &codec_state->decode;
3073 assert(gen7_mfd_context);
3075 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3077 if (vaStatus != VA_STATUS_SUCCESS)
3080 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3083 case VAProfileMPEG2Simple:
3084 case VAProfileMPEG2Main:
3085 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3088 case VAProfileH264ConstrainedBaseline:
3089 case VAProfileH264Main:
3090 case VAProfileH264High:
3091 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3094 case VAProfileVC1Simple:
3095 case VAProfileVC1Main:
3096 case VAProfileVC1Advanced:
3097 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3100 case VAProfileJPEGBaseline:
3101 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3104 case VAProfileVP8Version0_3:
3105 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3113 vaStatus = VA_STATUS_SUCCESS;
3120 gen8_mfd_context_destroy(void *hw_context)
3122 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3124 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3125 gen7_mfd_context->post_deblocking_output.bo = NULL;
3127 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3128 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3130 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3131 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3133 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3134 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3136 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3137 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3139 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3140 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3142 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3143 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3145 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3147 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3148 free(gen7_mfd_context);
3151 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3152 struct gen7_mfd_context *gen7_mfd_context)
3154 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3155 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3156 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3157 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3161 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3163 struct intel_driver_data *intel = intel_driver_data(ctx);
3164 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3167 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3168 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3169 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3171 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3172 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3173 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3176 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3178 switch (obj_config->profile) {
3179 case VAProfileMPEG2Simple:
3180 case VAProfileMPEG2Main:
3181 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3184 case VAProfileH264ConstrainedBaseline:
3185 case VAProfileH264Main:
3186 case VAProfileH264High:
3187 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3192 return (struct hw_context *)gen7_mfd_context;