2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77 assert((obj_surface->size & 0x3f) == 0);
78 obj_surface->private_data = gen7_avc_surface;
81 gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
82 !pic_param->seq_fields.bits.direct_8x8_inference_flag);
84 if (gen7_avc_surface->dmv_top == NULL) {
85 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86 "direct mv w/r buffer",
87 width_in_mbs * height_in_mbs * 128,
89 assert(gen7_avc_surface->dmv_top);
92 if (gen7_avc_surface->dmv_bottom_flag &&
93 gen7_avc_surface->dmv_bottom == NULL) {
94 gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
95 "direct mv w/r buffer",
96 width_in_mbs * height_in_mbs * 128,
98 assert(gen7_avc_surface->dmv_bottom);
103 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
104 struct decode_state *decode_state,
106 struct gen7_mfd_context *gen7_mfd_context)
108 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
110 assert(standard_select == MFX_FORMAT_MPEG2 ||
111 standard_select == MFX_FORMAT_AVC ||
112 standard_select == MFX_FORMAT_VC1 ||
113 standard_select == MFX_FORMAT_JPEG ||
114 standard_select == MFX_FORMAT_VP8);
116 BEGIN_BCS_BATCH(batch, 5);
117 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
119 (MFX_LONG_MODE << 17) | /* Currently only support long format */
120 (MFD_MODE_VLD << 15) | /* VLD mode */
121 (0 << 10) | /* disable Stream-Out */
122 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
123 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
124 (0 << 5) | /* not in stitch mode */
125 (MFX_CODEC_DECODE << 4) | /* decoding mode */
126 (standard_select << 0));
128 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
129 (0 << 3) | /* terminate if AVC mbdata error occurs */
130 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
133 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
134 OUT_BCS_BATCH(batch, 0); /* reserved */
135 ADVANCE_BCS_BATCH(batch);
139 gen8_mfd_surface_state(VADriverContextP ctx,
140 struct decode_state *decode_state,
142 struct gen7_mfd_context *gen7_mfd_context)
144 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
145 struct object_surface *obj_surface = decode_state->render_object;
146 unsigned int y_cb_offset;
147 unsigned int y_cr_offset;
151 y_cb_offset = obj_surface->y_cb_offset;
152 y_cr_offset = obj_surface->y_cr_offset;
154 BEGIN_BCS_BATCH(batch, 6);
155 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
156 OUT_BCS_BATCH(batch, 0);
158 ((obj_surface->orig_height - 1) << 18) |
159 ((obj_surface->orig_width - 1) << 4));
161 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
162 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
163 (0 << 22) | /* surface object control state, ignored */
164 ((obj_surface->width - 1) << 3) | /* pitch */
165 (0 << 2) | /* must be 0 */
166 (1 << 1) | /* must be tiled */
167 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
169 (0 << 16) | /* X offset for U(Cb), must be 0 */
170 (y_cb_offset << 0)); /* Y offset for U(Cb) */
172 (0 << 16) | /* X offset for V(Cr), must be 0 */
173 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
174 ADVANCE_BCS_BATCH(batch);
178 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
179 struct decode_state *decode_state,
181 struct gen7_mfd_context *gen7_mfd_context)
183 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
186 BEGIN_BCS_BATCH(batch, 61);
187 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188 /* Pre-deblock 1-3 */
189 if (gen7_mfd_context->pre_deblocking_output.valid)
190 OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
191 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
194 OUT_BCS_BATCH(batch, 0);
196 OUT_BCS_BATCH(batch, 0);
197 OUT_BCS_BATCH(batch, 0);
198 /* Post-debloing 4-6 */
199 if (gen7_mfd_context->post_deblocking_output.valid)
200 OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
201 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204 OUT_BCS_BATCH(batch, 0);
206 OUT_BCS_BATCH(batch, 0);
207 OUT_BCS_BATCH(batch, 0);
209 /* uncompressed-video & stream out 7-12 */
210 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
211 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
212 OUT_BCS_BATCH(batch, 0);
213 OUT_BCS_BATCH(batch, 0);
214 OUT_BCS_BATCH(batch, 0);
215 OUT_BCS_BATCH(batch, 0);
217 /* intra row-store scratch 13-15 */
218 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
219 OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
220 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
223 OUT_BCS_BATCH(batch, 0);
225 OUT_BCS_BATCH(batch, 0);
226 OUT_BCS_BATCH(batch, 0);
227 /* deblocking-filter-row-store 16-18 */
228 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
229 OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
230 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
233 OUT_BCS_BATCH(batch, 0);
234 OUT_BCS_BATCH(batch, 0);
235 OUT_BCS_BATCH(batch, 0);
238 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
239 struct object_surface *obj_surface;
241 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
242 gen7_mfd_context->reference_surface[i].obj_surface &&
243 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
244 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
246 OUT_BCS_RELOC(batch, obj_surface->bo,
247 I915_GEM_DOMAIN_INSTRUCTION, 0,
250 OUT_BCS_BATCH(batch, 0);
253 OUT_BCS_BATCH(batch, 0);
256 /* reference property 51 */
257 OUT_BCS_BATCH(batch, 0);
259 /* Macroblock status & ILDB 52-57 */
260 OUT_BCS_BATCH(batch, 0);
261 OUT_BCS_BATCH(batch, 0);
262 OUT_BCS_BATCH(batch, 0);
263 OUT_BCS_BATCH(batch, 0);
264 OUT_BCS_BATCH(batch, 0);
265 OUT_BCS_BATCH(batch, 0);
267 /* the second Macroblock status 58-60 */
268 OUT_BCS_BATCH(batch, 0);
269 OUT_BCS_BATCH(batch, 0);
270 OUT_BCS_BATCH(batch, 0);
272 ADVANCE_BCS_BATCH(batch);
276 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
277 dri_bo *slice_data_bo,
279 struct gen7_mfd_context *gen7_mfd_context)
281 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
283 BEGIN_BCS_BATCH(batch, 26);
284 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
286 OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
287 OUT_BCS_BATCH(batch, 0);
288 OUT_BCS_BATCH(batch, 0);
289 /* Upper bound 4-5 */
290 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
291 OUT_BCS_BATCH(batch, 0);
293 /* MFX indirect MV 6-10 */
294 OUT_BCS_BATCH(batch, 0);
295 OUT_BCS_BATCH(batch, 0);
296 OUT_BCS_BATCH(batch, 0);
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
300 /* MFX IT_COFF 11-15 */
301 OUT_BCS_BATCH(batch, 0);
302 OUT_BCS_BATCH(batch, 0);
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
307 /* MFX IT_DBLK 16-20 */
308 OUT_BCS_BATCH(batch, 0);
309 OUT_BCS_BATCH(batch, 0);
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
314 /* MFX PAK_BSE object for encoder 21-25 */
315 OUT_BCS_BATCH(batch, 0);
316 OUT_BCS_BATCH(batch, 0);
317 OUT_BCS_BATCH(batch, 0);
318 OUT_BCS_BATCH(batch, 0);
319 OUT_BCS_BATCH(batch, 0);
321 ADVANCE_BCS_BATCH(batch);
325 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
326 struct decode_state *decode_state,
328 struct gen7_mfd_context *gen7_mfd_context)
330 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
332 BEGIN_BCS_BATCH(batch, 10);
333 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
335 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
336 OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
337 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
340 OUT_BCS_BATCH(batch, 0);
342 OUT_BCS_BATCH(batch, 0);
343 OUT_BCS_BATCH(batch, 0);
344 /* MPR Row Store Scratch buffer 4-6 */
345 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
346 OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
347 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
350 OUT_BCS_BATCH(batch, 0);
352 OUT_BCS_BATCH(batch, 0);
353 OUT_BCS_BATCH(batch, 0);
356 if (gen7_mfd_context->bitplane_read_buffer.valid)
357 OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
358 I915_GEM_DOMAIN_INSTRUCTION, 0,
361 OUT_BCS_BATCH(batch, 0);
362 OUT_BCS_BATCH(batch, 0);
363 OUT_BCS_BATCH(batch, 0);
364 ADVANCE_BCS_BATCH(batch);
368 gen8_mfd_qm_state(VADriverContextP ctx,
372 struct gen7_mfd_context *gen7_mfd_context)
374 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
375 unsigned int qm_buffer[16];
377 assert(qm_length <= 16 * 4);
378 memcpy(qm_buffer, qm, qm_length);
380 BEGIN_BCS_BATCH(batch, 18);
381 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
382 OUT_BCS_BATCH(batch, qm_type << 0);
383 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
384 ADVANCE_BCS_BATCH(batch);
388 gen8_mfd_avc_img_state(VADriverContextP ctx,
389 struct decode_state *decode_state,
390 struct gen7_mfd_context *gen7_mfd_context)
392 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
394 int mbaff_frame_flag;
395 unsigned int width_in_mbs, height_in_mbs;
396 VAPictureParameterBufferH264 *pic_param;
398 assert(decode_state->pic_param && decode_state->pic_param->buffer);
399 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
400 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
402 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
404 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
409 if ((img_struct & 0x1) == 0x1) {
410 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
412 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
415 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
416 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
417 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
419 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
422 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
423 !pic_param->pic_fields.bits.field_pic_flag);
425 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
426 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
428 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
429 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
430 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
431 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
433 BEGIN_BCS_BATCH(batch, 17);
434 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
436 (width_in_mbs * height_in_mbs - 1));
438 ((height_in_mbs - 1) << 16) |
439 ((width_in_mbs - 1) << 0));
441 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
442 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
443 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
444 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
445 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
446 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
449 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
450 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
451 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
452 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
453 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
454 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
455 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
456 (mbaff_frame_flag << 1) |
457 (pic_param->pic_fields.bits.field_pic_flag << 0));
458 OUT_BCS_BATCH(batch, 0);
459 OUT_BCS_BATCH(batch, 0);
460 OUT_BCS_BATCH(batch, 0);
461 OUT_BCS_BATCH(batch, 0);
462 OUT_BCS_BATCH(batch, 0);
463 OUT_BCS_BATCH(batch, 0);
464 OUT_BCS_BATCH(batch, 0);
465 OUT_BCS_BATCH(batch, 0);
466 OUT_BCS_BATCH(batch, 0);
467 OUT_BCS_BATCH(batch, 0);
468 OUT_BCS_BATCH(batch, 0);
469 OUT_BCS_BATCH(batch, 0);
470 ADVANCE_BCS_BATCH(batch);
474 gen8_mfd_avc_qm_state(VADriverContextP ctx,
475 struct decode_state *decode_state,
476 struct gen7_mfd_context *gen7_mfd_context)
478 VAIQMatrixBufferH264 *iq_matrix;
479 VAPictureParameterBufferH264 *pic_param;
481 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
482 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
484 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
486 assert(decode_state->pic_param && decode_state->pic_param->buffer);
487 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
489 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
490 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
492 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
493 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
494 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
499 gen8_mfd_avc_picid_state(VADriverContextP ctx,
500 struct decode_state *decode_state,
501 struct gen7_mfd_context *gen7_mfd_context)
503 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
505 BEGIN_BCS_BATCH(batch, 10);
506 OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
507 OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
508 OUT_BCS_BATCH(batch, 0);
509 OUT_BCS_BATCH(batch, 0);
510 OUT_BCS_BATCH(batch, 0);
511 OUT_BCS_BATCH(batch, 0);
512 OUT_BCS_BATCH(batch, 0);
513 OUT_BCS_BATCH(batch, 0);
514 OUT_BCS_BATCH(batch, 0);
515 OUT_BCS_BATCH(batch, 0);
516 ADVANCE_BCS_BATCH(batch);
520 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
521 struct decode_state *decode_state,
522 VAPictureParameterBufferH264 *pic_param,
523 VASliceParameterBufferH264 *slice_param,
524 struct gen7_mfd_context *gen7_mfd_context)
526 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527 struct object_surface *obj_surface;
528 GenAvcSurface *gen7_avc_surface;
529 VAPictureH264 *va_pic;
532 BEGIN_BCS_BATCH(batch, 71);
533 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
535 /* reference surfaces 0..15 */
536 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538 gen7_mfd_context->reference_surface[i].obj_surface &&
539 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
541 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542 gen7_avc_surface = obj_surface->private_data;
544 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
545 I915_GEM_DOMAIN_INSTRUCTION, 0,
547 OUT_BCS_BATCH(batch, 0);
549 OUT_BCS_BATCH(batch, 0);
550 OUT_BCS_BATCH(batch, 0);
554 OUT_BCS_BATCH(batch, 0);
556 /* the current decoding frame/field */
557 va_pic = &pic_param->CurrPic;
558 obj_surface = decode_state->render_object;
559 assert(obj_surface->bo && obj_surface->private_data);
560 gen7_avc_surface = obj_surface->private_data;
562 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
563 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
566 OUT_BCS_BATCH(batch, 0);
567 OUT_BCS_BATCH(batch, 0);
570 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
571 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
574 assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
576 for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
577 va_pic = &pic_param->ReferenceFrames[j];
579 if (va_pic->flags & VA_PICTURE_H264_INVALID)
582 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
589 assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
591 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
592 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
594 OUT_BCS_BATCH(batch, 0);
595 OUT_BCS_BATCH(batch, 0);
599 va_pic = &pic_param->CurrPic;
600 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
601 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
603 ADVANCE_BCS_BATCH(batch);
607 gen8_mfd_avc_slice_state(VADriverContextP ctx,
608 VAPictureParameterBufferH264 *pic_param,
609 VASliceParameterBufferH264 *slice_param,
610 VASliceParameterBufferH264 *next_slice_param,
611 struct gen7_mfd_context *gen7_mfd_context)
613 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
614 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
615 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
616 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
617 int num_ref_idx_l0, num_ref_idx_l1;
618 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
619 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
620 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
623 if (slice_param->slice_type == SLICE_TYPE_I ||
624 slice_param->slice_type == SLICE_TYPE_SI) {
625 slice_type = SLICE_TYPE_I;
626 } else if (slice_param->slice_type == SLICE_TYPE_P ||
627 slice_param->slice_type == SLICE_TYPE_SP) {
628 slice_type = SLICE_TYPE_P;
630 assert(slice_param->slice_type == SLICE_TYPE_B);
631 slice_type = SLICE_TYPE_B;
634 if (slice_type == SLICE_TYPE_I) {
635 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
636 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
639 } else if (slice_type == SLICE_TYPE_P) {
640 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
641 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
644 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
645 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
648 first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
649 slice_hor_pos = first_mb_in_slice % width_in_mbs;
650 slice_ver_pos = first_mb_in_slice / width_in_mbs;
652 if (next_slice_param) {
653 first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
654 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
655 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
657 next_slice_hor_pos = 0;
658 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
661 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663 OUT_BCS_BATCH(batch, slice_type);
665 (num_ref_idx_l1 << 24) |
666 (num_ref_idx_l0 << 16) |
667 (slice_param->chroma_log2_weight_denom << 8) |
668 (slice_param->luma_log2_weight_denom << 0));
670 (slice_param->direct_spatial_mv_pred_flag << 29) |
671 (slice_param->disable_deblocking_filter_idc << 27) |
672 (slice_param->cabac_init_idc << 24) |
673 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
677 (slice_ver_pos << 24) |
678 (slice_hor_pos << 16) |
679 (first_mb_in_slice << 0));
681 (next_slice_ver_pos << 16) |
682 (next_slice_hor_pos << 0));
684 (next_slice_param == NULL) << 19); /* last slice flag */
685 OUT_BCS_BATCH(batch, 0);
686 OUT_BCS_BATCH(batch, 0);
687 OUT_BCS_BATCH(batch, 0);
688 OUT_BCS_BATCH(batch, 0);
689 ADVANCE_BCS_BATCH(batch);
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694 VAPictureParameterBufferH264 *pic_param,
695 VASliceParameterBufferH264 *slice_param,
696 struct gen7_mfd_context *gen7_mfd_context)
698 gen6_send_avc_ref_idx_state(
699 gen7_mfd_context->base.batch,
701 gen7_mfd_context->reference_surface
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707 VAPictureParameterBufferH264 *pic_param,
708 VASliceParameterBufferH264 *slice_param,
709 struct gen7_mfd_context *gen7_mfd_context)
711 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712 int i, j, num_weight_offset_table = 0;
713 short weightoffsets[32 * 6];
715 if ((slice_param->slice_type == SLICE_TYPE_P ||
716 slice_param->slice_type == SLICE_TYPE_SP) &&
717 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718 num_weight_offset_table = 1;
721 if ((slice_param->slice_type == SLICE_TYPE_B) &&
722 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723 num_weight_offset_table = 2;
726 for (i = 0; i < num_weight_offset_table; i++) {
727 BEGIN_BCS_BATCH(batch, 98);
728 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729 OUT_BCS_BATCH(batch, i);
732 for (j = 0; j < 32; j++) {
733 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
741 for (j = 0; j < 32; j++) {
742 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
751 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752 ADVANCE_BCS_BATCH(batch);
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758 VAPictureParameterBufferH264 *pic_param,
759 VASliceParameterBufferH264 *slice_param,
760 dri_bo *slice_data_bo,
761 VASliceParameterBufferH264 *next_slice_param,
762 struct gen7_mfd_context *gen7_mfd_context)
764 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
767 pic_param->pic_fields.bits.entropy_coding_mode_flag);
769 /* the input bitsteam format on GEN7 differs from GEN6 */
770 BEGIN_BCS_BATCH(batch, 6);
771 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
773 (slice_param->slice_data_size));
774 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
782 ((slice_data_bit_offset >> 3) << 16) |
786 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787 (slice_data_bit_offset & 0x7));
788 OUT_BCS_BATCH(batch, 0);
789 ADVANCE_BCS_BATCH(batch);
793 gen8_mfd_avc_context_init(
794 VADriverContextP ctx,
795 struct gen7_mfd_context *gen7_mfd_context
798 /* Initialize flat scaling lists */
799 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804 struct decode_state *decode_state,
805 struct gen7_mfd_context *gen7_mfd_context)
807 VAPictureParameterBufferH264 *pic_param;
808 VASliceParameterBufferH264 *slice_param;
809 struct i965_driver_data *i965 = i965_driver_data(ctx);
810 struct object_surface *obj_surface;
812 int i, j, enable_avc_ildb = 0;
813 unsigned int width_in_mbs, height_in_mbs;
815 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
819 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821 assert((slice_param->slice_type == SLICE_TYPE_I) ||
822 (slice_param->slice_type == SLICE_TYPE_SI) ||
823 (slice_param->slice_type == SLICE_TYPE_P) ||
824 (slice_param->slice_type == SLICE_TYPE_SP) ||
825 (slice_param->slice_type == SLICE_TYPE_B));
827 if (slice_param->disable_deblocking_filter_idc != 1) {
836 assert(decode_state->pic_param && decode_state->pic_param->buffer);
837 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838 intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
839 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
840 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
841 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
842 assert(height_in_mbs > 0 && height_in_mbs <= 256);
844 /* Current decoded picture */
845 obj_surface = decode_state->render_object;
846 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
847 obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
848 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
850 /* initial uv component for YUV400 case */
851 if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
852 unsigned int uv_offset = obj_surface->width * obj_surface->height;
853 unsigned int uv_size = obj_surface->width * obj_surface->height / 2;
855 drm_intel_gem_bo_map_gtt(obj_surface->bo);
856 memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
857 drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
860 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
862 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
863 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
864 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
865 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
867 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
868 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
869 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
870 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
872 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
873 bo = dri_bo_alloc(i965->intel.bufmgr,
878 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
879 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
881 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
882 bo = dri_bo_alloc(i965->intel.bufmgr,
883 "deblocking filter row store",
884 width_in_mbs * 64 * 4,
887 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
888 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
890 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
891 bo = dri_bo_alloc(i965->intel.bufmgr,
893 width_in_mbs * 64 * 2,
896 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
897 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
899 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
900 bo = dri_bo_alloc(i965->intel.bufmgr,
902 width_in_mbs * 64 * 2,
905 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
906 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
908 gen7_mfd_context->bitplane_read_buffer.valid = 0;
912 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
913 struct decode_state *decode_state,
914 struct gen7_mfd_context *gen7_mfd_context)
916 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
917 VAPictureParameterBufferH264 *pic_param;
918 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
919 dri_bo *slice_data_bo;
922 assert(decode_state->pic_param && decode_state->pic_param->buffer);
923 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
924 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
926 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
927 intel_batchbuffer_emit_mi_flush(batch);
928 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
929 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
930 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
931 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
932 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
933 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
934 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
936 for (j = 0; j < decode_state->num_slice_params; j++) {
937 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
938 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
939 slice_data_bo = decode_state->slice_datas[j]->bo;
940 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
942 if (j == decode_state->num_slice_params - 1)
943 next_slice_group_param = NULL;
945 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
947 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
948 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
949 assert((slice_param->slice_type == SLICE_TYPE_I) ||
950 (slice_param->slice_type == SLICE_TYPE_SI) ||
951 (slice_param->slice_type == SLICE_TYPE_P) ||
952 (slice_param->slice_type == SLICE_TYPE_SP) ||
953 (slice_param->slice_type == SLICE_TYPE_B));
955 if (i < decode_state->slice_params[j]->num_elements - 1)
956 next_slice_param = slice_param + 1;
958 next_slice_param = next_slice_group_param;
960 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
961 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
962 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
963 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
964 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
969 intel_batchbuffer_end_atomic(batch);
970 intel_batchbuffer_flush(batch);
974 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
975 struct decode_state *decode_state,
976 struct gen7_mfd_context *gen7_mfd_context)
978 VAPictureParameterBufferMPEG2 *pic_param;
979 struct i965_driver_data *i965 = i965_driver_data(ctx);
980 struct object_surface *obj_surface;
982 unsigned int width_in_mbs;
984 assert(decode_state->pic_param && decode_state->pic_param->buffer);
985 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
986 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
988 mpeg2_set_reference_surfaces(
990 gen7_mfd_context->reference_surface,
995 /* Current decoded picture */
996 obj_surface = decode_state->render_object;
997 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
999 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1000 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1001 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1002 gen7_mfd_context->pre_deblocking_output.valid = 1;
1004 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1005 bo = dri_bo_alloc(i965->intel.bufmgr,
1006 "bsd mpc row store",
1010 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1011 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1013 gen7_mfd_context->post_deblocking_output.valid = 0;
1014 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1015 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1016 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1017 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1021 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1022 struct decode_state *decode_state,
1023 struct gen7_mfd_context *gen7_mfd_context)
1025 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1026 VAPictureParameterBufferMPEG2 *pic_param;
1027 unsigned int slice_concealment_disable_bit = 0;
1029 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1030 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1032 slice_concealment_disable_bit = 1;
1034 BEGIN_BCS_BATCH(batch, 13);
1035 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1036 OUT_BCS_BATCH(batch,
1037 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1038 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1039 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1040 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1041 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1042 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1043 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1044 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1045 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1046 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1047 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1048 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1049 OUT_BCS_BATCH(batch,
1050 pic_param->picture_coding_type << 9);
1051 OUT_BCS_BATCH(batch,
1052 (slice_concealment_disable_bit << 31) |
1053 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1054 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1055 OUT_BCS_BATCH(batch, 0);
1056 OUT_BCS_BATCH(batch, 0);
1057 OUT_BCS_BATCH(batch, 0);
1058 OUT_BCS_BATCH(batch, 0);
1059 OUT_BCS_BATCH(batch, 0);
1060 OUT_BCS_BATCH(batch, 0);
1061 OUT_BCS_BATCH(batch, 0);
1062 OUT_BCS_BATCH(batch, 0);
1063 OUT_BCS_BATCH(batch, 0);
1064 ADVANCE_BCS_BATCH(batch);
1068 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1069 struct decode_state *decode_state,
1070 struct gen7_mfd_context *gen7_mfd_context)
1072 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1075 /* Update internal QM state */
1076 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1077 VAIQMatrixBufferMPEG2 * const iq_matrix =
1078 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1080 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1081 iq_matrix->load_intra_quantiser_matrix) {
1082 gen_iq_matrix->load_intra_quantiser_matrix =
1083 iq_matrix->load_intra_quantiser_matrix;
1084 if (iq_matrix->load_intra_quantiser_matrix) {
1085 for (j = 0; j < 64; j++)
1086 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1087 iq_matrix->intra_quantiser_matrix[j];
1091 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1092 iq_matrix->load_non_intra_quantiser_matrix) {
1093 gen_iq_matrix->load_non_intra_quantiser_matrix =
1094 iq_matrix->load_non_intra_quantiser_matrix;
1095 if (iq_matrix->load_non_intra_quantiser_matrix) {
1096 for (j = 0; j < 64; j++)
1097 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1098 iq_matrix->non_intra_quantiser_matrix[j];
1103 /* Commit QM state to HW */
1104 for (i = 0; i < 2; i++) {
1105 unsigned char *qm = NULL;
1109 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1110 qm = gen_iq_matrix->intra_quantiser_matrix;
1111 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1114 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1115 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1116 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1123 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1128 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1129 VAPictureParameterBufferMPEG2 *pic_param,
1130 VASliceParameterBufferMPEG2 *slice_param,
1131 VASliceParameterBufferMPEG2 *next_slice_param,
1132 struct gen7_mfd_context *gen7_mfd_context)
1134 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1135 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1136 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1138 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1139 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1141 is_field_pic_wa = is_field_pic &&
1142 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1144 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1145 hpos0 = slice_param->slice_horizontal_position;
1147 if (next_slice_param == NULL) {
1148 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1151 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1152 hpos1 = next_slice_param->slice_horizontal_position;
1155 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1157 BEGIN_BCS_BATCH(batch, 5);
1158 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1159 OUT_BCS_BATCH(batch,
1160 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1161 OUT_BCS_BATCH(batch,
1162 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1163 OUT_BCS_BATCH(batch,
1167 (next_slice_param == NULL) << 5 |
1168 (next_slice_param == NULL) << 3 |
1169 (slice_param->macroblock_offset & 0x7));
1170 OUT_BCS_BATCH(batch,
1171 (slice_param->quantiser_scale_code << 24) |
1172 (vpos1 << 8 | hpos1));
1173 ADVANCE_BCS_BATCH(batch);
1177 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1178 struct decode_state *decode_state,
1179 struct gen7_mfd_context *gen7_mfd_context)
1181 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1182 VAPictureParameterBufferMPEG2 *pic_param;
1183 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1184 dri_bo *slice_data_bo;
1187 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1188 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1190 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1191 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1192 intel_batchbuffer_emit_mi_flush(batch);
1193 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1194 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1195 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1196 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1197 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1198 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1200 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1201 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1202 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1204 for (j = 0; j < decode_state->num_slice_params; j++) {
1205 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1206 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1207 slice_data_bo = decode_state->slice_datas[j]->bo;
1208 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1210 if (j == decode_state->num_slice_params - 1)
1211 next_slice_group_param = NULL;
1213 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1215 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1216 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1218 if (i < decode_state->slice_params[j]->num_elements - 1)
1219 next_slice_param = slice_param + 1;
1221 next_slice_param = next_slice_group_param;
1223 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1228 intel_batchbuffer_end_atomic(batch);
1229 intel_batchbuffer_flush(batch);
1232 static const int va_to_gen7_vc1_pic_type[5] = {
1236 GEN7_VC1_BI_PICTURE,
1240 static const int va_to_gen7_vc1_mv[4] = {
1242 2, /* 1-MV half-pel */
1243 3, /* 1-MV half-pef bilinear */
1247 static const int b_picture_scale_factor[21] = {
1248 128, 85, 170, 64, 192,
1249 51, 102, 153, 204, 43,
1250 215, 37, 74, 111, 148,
1251 185, 222, 32, 96, 160,
1255 static const int va_to_gen7_vc1_condover[3] = {
1261 static const int va_to_gen7_vc1_profile[4] = {
1262 GEN7_VC1_SIMPLE_PROFILE,
1263 GEN7_VC1_MAIN_PROFILE,
1264 GEN7_VC1_RESERVED_PROFILE,
1265 GEN7_VC1_ADVANCED_PROFILE
1269 gen8_mfd_free_vc1_surface(void **data)
1271 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1273 if (!gen7_vc1_surface)
1276 dri_bo_unreference(gen7_vc1_surface->dmv);
1277 free(gen7_vc1_surface);
1282 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1283 VAPictureParameterBufferVC1 *pic_param,
1284 struct object_surface *obj_surface)
1286 struct i965_driver_data *i965 = i965_driver_data(ctx);
1287 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1288 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1289 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1291 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1293 if (!gen7_vc1_surface) {
1294 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1295 assert((obj_surface->size & 0x3f) == 0);
1296 obj_surface->private_data = gen7_vc1_surface;
1299 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1301 if (gen7_vc1_surface->dmv == NULL) {
1302 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1303 "direct mv w/r buffer",
1304 width_in_mbs * height_in_mbs * 64,
1310 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1311 struct decode_state *decode_state,
1312 struct gen7_mfd_context *gen7_mfd_context)
1314 VAPictureParameterBufferVC1 *pic_param;
1315 struct i965_driver_data *i965 = i965_driver_data(ctx);
1316 struct object_surface *obj_surface;
1321 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1322 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1323 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1324 picture_type = pic_param->picture_fields.bits.picture_type;
1326 intel_update_vc1_frame_store_index(ctx,
1329 gen7_mfd_context->reference_surface);
1331 /* Current decoded picture */
1332 obj_surface = decode_state->render_object;
1333 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1334 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1336 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1337 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1338 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1339 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1341 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1342 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1343 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1344 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1346 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1347 bo = dri_bo_alloc(i965->intel.bufmgr,
1352 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1353 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1355 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1356 bo = dri_bo_alloc(i965->intel.bufmgr,
1357 "deblocking filter row store",
1358 width_in_mbs * 7 * 64,
1361 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1362 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1364 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1365 bo = dri_bo_alloc(i965->intel.bufmgr,
1366 "bsd mpc row store",
1370 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1371 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1373 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1375 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1376 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1378 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1379 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1380 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1381 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1383 uint8_t *src = NULL, *dst = NULL;
1385 assert(decode_state->bit_plane->buffer);
1386 src = decode_state->bit_plane->buffer;
1388 bo = dri_bo_alloc(i965->intel.bufmgr,
1390 bitplane_width * height_in_mbs,
1393 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1395 dri_bo_map(bo, True);
1396 assert(bo->virtual);
1399 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1400 for(src_w = 0; src_w < width_in_mbs; src_w++) {
1401 int src_index, dst_index;
1405 src_index = (src_h * width_in_mbs + src_w) / 2;
1406 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1407 src_value = ((src[src_index] >> src_shift) & 0xf);
1409 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1413 dst_index = src_w / 2;
1414 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1418 dst[src_w / 2] >>= 4;
1420 dst += bitplane_width;
1425 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1429 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1430 struct decode_state *decode_state,
1431 struct gen7_mfd_context *gen7_mfd_context)
1433 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1434 VAPictureParameterBufferVC1 *pic_param;
1435 struct object_surface *obj_surface;
1436 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1437 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1438 int unified_mv_mode;
1439 int ref_field_pic_polarity = 0;
1440 int scale_factor = 0;
1442 int dmv_surface_valid = 0;
1448 int interpolation_mode = 0;
1450 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1451 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1453 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1454 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1455 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1456 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1457 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1458 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1459 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1460 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1463 alt_pquant_config = 0;
1464 alt_pquant_edge_mask = 0;
1465 } else if (dquant == 2) {
1466 alt_pquant_config = 1;
1467 alt_pquant_edge_mask = 0xf;
1469 assert(dquant == 1);
1470 if (dquantfrm == 0) {
1471 alt_pquant_config = 0;
1472 alt_pquant_edge_mask = 0;
1475 assert(dquantfrm == 1);
1476 alt_pquant_config = 1;
1478 switch (dqprofile) {
1480 if (dqbilevel == 0) {
1481 alt_pquant_config = 2;
1482 alt_pquant_edge_mask = 0;
1484 assert(dqbilevel == 1);
1485 alt_pquant_config = 3;
1486 alt_pquant_edge_mask = 0;
1491 alt_pquant_edge_mask = 0xf;
1496 alt_pquant_edge_mask = 0x9;
1498 alt_pquant_edge_mask = (0x3 << dqdbedge);
1503 alt_pquant_edge_mask = (0x1 << dqsbedge);
1512 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1513 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1514 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1516 assert(pic_param->mv_fields.bits.mv_mode < 4);
1517 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1520 if (pic_param->sequence_fields.bits.interlace == 1 &&
1521 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1522 /* FIXME: calculate reference field picture polarity */
1524 ref_field_pic_polarity = 0;
1527 if (pic_param->b_picture_fraction < 21)
1528 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1530 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1532 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1533 picture_type == GEN7_VC1_I_PICTURE)
1534 picture_type = GEN7_VC1_BI_PICTURE;
1536 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1537 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1539 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1542 * 8.3.6.2.1 Transform Type Selection
1543 * If variable-sized transform coding is not enabled,
1544 * then the 8x8 transform shall be used for all blocks.
1545 * it is also MFX_VC1_PIC_STATE requirement.
1547 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1548 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1549 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1553 if (picture_type == GEN7_VC1_B_PICTURE) {
1554 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1556 obj_surface = decode_state->reference_objects[1];
1559 gen7_vc1_surface = obj_surface->private_data;
1561 if (!gen7_vc1_surface ||
1562 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1563 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1564 dmv_surface_valid = 0;
1566 dmv_surface_valid = 1;
1569 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1571 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1572 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1574 if (pic_param->picture_fields.bits.top_field_first)
1580 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1581 brfd = pic_param->reference_fields.bits.reference_distance;
1582 brfd = (scale_factor * brfd) >> 8;
1583 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1590 if (profile != GEN7_VC1_ADVANCED_PROFILE){
1591 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1592 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1596 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1597 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1600 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1601 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1602 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1604 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1605 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1611 assert(pic_param->conditional_overlap_flag < 3);
1612 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1614 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1615 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1616 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1617 interpolation_mode = 9; /* Half-pel bilinear */
1618 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1619 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1620 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1621 interpolation_mode = 1; /* Half-pel bicubic */
1623 interpolation_mode = 0; /* Quarter-pel bicubic */
1625 BEGIN_BCS_BATCH(batch, 6);
1626 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1627 OUT_BCS_BATCH(batch,
1628 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1629 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1630 OUT_BCS_BATCH(batch,
1631 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1632 dmv_surface_valid << 15 |
1633 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1634 pic_param->rounding_control << 13 |
1635 pic_param->sequence_fields.bits.syncmarker << 12 |
1636 interpolation_mode << 8 |
1637 0 << 7 | /* FIXME: scale up or down ??? */
1638 pic_param->range_reduction_frame << 6 |
1639 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1641 !pic_param->picture_fields.bits.is_first_field << 3 |
1642 (pic_param->sequence_fields.bits.profile == 3) << 0);
1643 OUT_BCS_BATCH(batch,
1644 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1645 picture_type << 26 |
1648 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1650 OUT_BCS_BATCH(batch,
1651 unified_mv_mode << 28 |
1652 pic_param->mv_fields.bits.four_mv_switch << 27 |
1653 pic_param->fast_uvmc_flag << 26 |
1654 ref_field_pic_polarity << 25 |
1655 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1656 pic_param->reference_fields.bits.reference_distance << 20 |
1657 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1658 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1659 pic_param->mv_fields.bits.extended_mv_range << 8 |
1660 alt_pquant_edge_mask << 4 |
1661 alt_pquant_config << 2 |
1662 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1663 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1664 OUT_BCS_BATCH(batch,
1665 !!pic_param->bitplane_present.value << 31 |
1666 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1667 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1668 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1669 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1670 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1671 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1672 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1673 pic_param->mv_fields.bits.mv_table << 20 |
1674 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1675 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1676 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1677 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1678 pic_param->mb_mode_table << 8 |
1680 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1681 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1682 pic_param->cbp_table << 0);
1683 ADVANCE_BCS_BATCH(batch);
1687 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1688 struct decode_state *decode_state,
1689 struct gen7_mfd_context *gen7_mfd_context)
1691 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1692 VAPictureParameterBufferVC1 *pic_param;
1693 int intensitycomp_single;
1695 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1696 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1698 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1699 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1700 intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1702 BEGIN_BCS_BATCH(batch, 6);
1703 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1704 OUT_BCS_BATCH(batch,
1705 0 << 14 | /* FIXME: double ??? */
1707 intensitycomp_single << 10 |
1708 intensitycomp_single << 8 |
1709 0 << 4 | /* FIXME: interlace mode */
1711 OUT_BCS_BATCH(batch,
1712 pic_param->luma_shift << 16 |
1713 pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1714 OUT_BCS_BATCH(batch, 0);
1715 OUT_BCS_BATCH(batch, 0);
1716 OUT_BCS_BATCH(batch, 0);
1717 ADVANCE_BCS_BATCH(batch);
1721 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1722 struct decode_state *decode_state,
1723 struct gen7_mfd_context *gen7_mfd_context)
1725 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1726 struct object_surface *obj_surface;
1727 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1729 obj_surface = decode_state->render_object;
1731 if (obj_surface && obj_surface->private_data) {
1732 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1735 obj_surface = decode_state->reference_objects[1];
1737 if (obj_surface && obj_surface->private_data) {
1738 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1741 BEGIN_BCS_BATCH(batch, 7);
1742 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1744 if (dmv_write_buffer)
1745 OUT_BCS_RELOC(batch, dmv_write_buffer,
1746 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1749 OUT_BCS_BATCH(batch, 0);
1751 OUT_BCS_BATCH(batch, 0);
1752 OUT_BCS_BATCH(batch, 0);
1754 if (dmv_read_buffer)
1755 OUT_BCS_RELOC(batch, dmv_read_buffer,
1756 I915_GEM_DOMAIN_INSTRUCTION, 0,
1759 OUT_BCS_BATCH(batch, 0);
1761 OUT_BCS_BATCH(batch, 0);
1762 OUT_BCS_BATCH(batch, 0);
1764 ADVANCE_BCS_BATCH(batch);
1768 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1770 int out_slice_data_bit_offset;
1771 int slice_header_size = in_slice_data_bit_offset / 8;
1775 out_slice_data_bit_offset = in_slice_data_bit_offset;
1777 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1778 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1783 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1786 return out_slice_data_bit_offset;
1790 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1791 VAPictureParameterBufferVC1 *pic_param,
1792 VASliceParameterBufferVC1 *slice_param,
1793 VASliceParameterBufferVC1 *next_slice_param,
1794 dri_bo *slice_data_bo,
1795 struct gen7_mfd_context *gen7_mfd_context)
1797 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1798 int next_slice_start_vert_pos;
1799 int macroblock_offset;
1800 uint8_t *slice_data = NULL;
1802 dri_bo_map(slice_data_bo, 0);
1803 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1804 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1805 slice_param->macroblock_offset,
1806 pic_param->sequence_fields.bits.profile);
1807 dri_bo_unmap(slice_data_bo);
1809 if (next_slice_param)
1810 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1812 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1814 BEGIN_BCS_BATCH(batch, 5);
1815 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1816 OUT_BCS_BATCH(batch,
1817 slice_param->slice_data_size - (macroblock_offset >> 3));
1818 OUT_BCS_BATCH(batch,
1819 slice_param->slice_data_offset + (macroblock_offset >> 3));
1820 OUT_BCS_BATCH(batch,
1821 slice_param->slice_vertical_position << 16 |
1822 next_slice_start_vert_pos << 0);
1823 OUT_BCS_BATCH(batch,
1824 (macroblock_offset & 0x7));
1825 ADVANCE_BCS_BATCH(batch);
1829 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1830 struct decode_state *decode_state,
1831 struct gen7_mfd_context *gen7_mfd_context)
1833 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1834 VAPictureParameterBufferVC1 *pic_param;
1835 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1836 dri_bo *slice_data_bo;
1839 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1840 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1842 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1843 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1844 intel_batchbuffer_emit_mi_flush(batch);
1845 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1846 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1847 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1848 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1849 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1850 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1851 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1853 for (j = 0; j < decode_state->num_slice_params; j++) {
1854 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1855 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1856 slice_data_bo = decode_state->slice_datas[j]->bo;
1857 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1859 if (j == decode_state->num_slice_params - 1)
1860 next_slice_group_param = NULL;
1862 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1864 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1865 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1867 if (i < decode_state->slice_params[j]->num_elements - 1)
1868 next_slice_param = slice_param + 1;
1870 next_slice_param = next_slice_group_param;
1872 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1877 intel_batchbuffer_end_atomic(batch);
1878 intel_batchbuffer_flush(batch);
1882 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1883 struct decode_state *decode_state,
1884 struct gen7_mfd_context *gen7_mfd_context)
1886 struct object_surface *obj_surface;
1887 VAPictureParameterBufferJPEGBaseline *pic_param;
1888 int subsampling = SUBSAMPLE_YUV420;
1889 int fourcc = VA_FOURCC('I', 'M', 'C', '3');
1891 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1893 if (pic_param->num_components == 1)
1894 subsampling = SUBSAMPLE_YUV400;
1895 else if (pic_param->num_components == 3) {
1896 int h1 = pic_param->components[0].h_sampling_factor;
1897 int h2 = pic_param->components[1].h_sampling_factor;
1898 int h3 = pic_param->components[2].h_sampling_factor;
1899 int v1 = pic_param->components[0].v_sampling_factor;
1900 int v2 = pic_param->components[1].v_sampling_factor;
1901 int v3 = pic_param->components[2].v_sampling_factor;
1903 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1904 v1 == 2 && v2 == 1 && v3 == 1) {
1905 subsampling = SUBSAMPLE_YUV420;
1906 fourcc = VA_FOURCC('I', 'M', 'C', '3');
1907 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1908 v1 == 1 && v2 == 1 && v3 == 1) {
1909 subsampling = SUBSAMPLE_YUV422H;
1910 fourcc = VA_FOURCC('4', '2', '2', 'H');
1911 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1912 v1 == 1 && v2 == 1 && v3 == 1) {
1913 subsampling = SUBSAMPLE_YUV444;
1914 fourcc = VA_FOURCC('4', '4', '4', 'P');
1915 } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1916 v1 == 1 && v2 == 1 && v3 == 1) {
1917 subsampling = SUBSAMPLE_YUV411;
1918 fourcc = VA_FOURCC('4', '1', '1', 'P');
1919 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1920 v1 == 2 && v2 == 1 && v3 == 1) {
1921 subsampling = SUBSAMPLE_YUV422V;
1922 fourcc = VA_FOURCC('4', '2', '2', 'V');
1923 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1924 v1 == 2 && v2 == 2 && v3 == 2) {
1925 subsampling = SUBSAMPLE_YUV422H;
1926 fourcc = VA_FOURCC('4', '2', '2', 'H');
1927 } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1928 v1 == 2 && v2 == 1 && v3 == 1) {
1929 subsampling = SUBSAMPLE_YUV422V;
1930 fourcc = VA_FOURCC('4', '2', '2', 'V');
1938 /* Current decoded picture */
1939 obj_surface = decode_state->render_object;
1940 i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1942 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1943 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1944 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1945 gen7_mfd_context->pre_deblocking_output.valid = 1;
1947 gen7_mfd_context->post_deblocking_output.bo = NULL;
1948 gen7_mfd_context->post_deblocking_output.valid = 0;
1950 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1951 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1953 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1954 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1956 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1957 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1959 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1960 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1962 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1963 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1966 static const int va_to_gen7_jpeg_rotation[4] = {
1967 GEN7_JPEG_ROTATION_0,
1968 GEN7_JPEG_ROTATION_90,
1969 GEN7_JPEG_ROTATION_180,
1970 GEN7_JPEG_ROTATION_270
1974 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1975 struct decode_state *decode_state,
1976 struct gen7_mfd_context *gen7_mfd_context)
1978 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1979 VAPictureParameterBufferJPEGBaseline *pic_param;
1980 int chroma_type = GEN7_YUV420;
1981 int frame_width_in_blks;
1982 int frame_height_in_blks;
1984 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1985 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1987 if (pic_param->num_components == 1)
1988 chroma_type = GEN7_YUV400;
1989 else if (pic_param->num_components == 3) {
1990 int h1 = pic_param->components[0].h_sampling_factor;
1991 int h2 = pic_param->components[1].h_sampling_factor;
1992 int h3 = pic_param->components[2].h_sampling_factor;
1993 int v1 = pic_param->components[0].v_sampling_factor;
1994 int v2 = pic_param->components[1].v_sampling_factor;
1995 int v3 = pic_param->components[2].v_sampling_factor;
1997 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1998 v1 == 2 && v2 == 1 && v3 == 1)
1999 chroma_type = GEN7_YUV420;
2000 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2001 v1 == 1 && v2 == 1 && v3 == 1)
2002 chroma_type = GEN7_YUV422H_2Y;
2003 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2004 v1 == 1 && v2 == 1 && v3 == 1)
2005 chroma_type = GEN7_YUV444;
2006 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2007 v1 == 1 && v2 == 1 && v3 == 1)
2008 chroma_type = GEN7_YUV411;
2009 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2010 v1 == 2 && v2 == 1 && v3 == 1)
2011 chroma_type = GEN7_YUV422V_2Y;
2012 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2013 v1 == 2 && v2 == 2 && v3 == 2)
2014 chroma_type = GEN7_YUV422H_4Y;
2015 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2016 v1 == 2 && v2 == 1 && v3 == 1)
2017 chroma_type = GEN7_YUV422V_4Y;
2022 if (chroma_type == GEN7_YUV400 ||
2023 chroma_type == GEN7_YUV444 ||
2024 chroma_type == GEN7_YUV422V_2Y) {
2025 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2026 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2027 } else if (chroma_type == GEN7_YUV411) {
2028 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2029 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2031 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2032 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2035 BEGIN_BCS_BATCH(batch, 3);
2036 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2037 OUT_BCS_BATCH(batch,
2038 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2039 (chroma_type << 0));
2040 OUT_BCS_BATCH(batch,
2041 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2042 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2043 ADVANCE_BCS_BATCH(batch);
2046 static const int va_to_gen7_jpeg_hufftable[2] = {
2052 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2053 struct decode_state *decode_state,
2054 struct gen7_mfd_context *gen7_mfd_context,
2057 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2058 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2061 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2064 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2066 for (index = 0; index < num_tables; index++) {
2067 int id = va_to_gen7_jpeg_hufftable[index];
2068 if (!huffman_table->load_huffman_table[index])
2070 BEGIN_BCS_BATCH(batch, 53);
2071 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2072 OUT_BCS_BATCH(batch, id);
2073 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2074 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2075 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2076 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2077 ADVANCE_BCS_BATCH(batch);
2081 static const int va_to_gen7_jpeg_qm[5] = {
2083 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2084 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2085 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2086 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2090 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2091 struct decode_state *decode_state,
2092 struct gen7_mfd_context *gen7_mfd_context)
2094 VAPictureParameterBufferJPEGBaseline *pic_param;
2095 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2098 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2101 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2102 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2104 assert(pic_param->num_components <= 3);
2106 for (index = 0; index < pic_param->num_components; index++) {
2107 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2109 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2110 unsigned char raster_qm[64];
2113 if (id > 4 || id < 1)
2116 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2119 qm_type = va_to_gen7_jpeg_qm[id];
2121 for (j = 0; j < 64; j++)
2122 raster_qm[zigzag_direct[j]] = qm[j];
2124 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2129 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2130 VAPictureParameterBufferJPEGBaseline *pic_param,
2131 VASliceParameterBufferJPEGBaseline *slice_param,
2132 VASliceParameterBufferJPEGBaseline *next_slice_param,
2133 dri_bo *slice_data_bo,
2134 struct gen7_mfd_context *gen7_mfd_context)
2136 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2137 int scan_component_mask = 0;
2140 assert(slice_param->num_components > 0);
2141 assert(slice_param->num_components < 4);
2142 assert(slice_param->num_components <= pic_param->num_components);
2144 for (i = 0; i < slice_param->num_components; i++) {
2145 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2147 scan_component_mask |= (1 << 0);
2150 scan_component_mask |= (1 << 1);
2153 scan_component_mask |= (1 << 2);
2161 BEGIN_BCS_BATCH(batch, 6);
2162 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2163 OUT_BCS_BATCH(batch,
2164 slice_param->slice_data_size);
2165 OUT_BCS_BATCH(batch,
2166 slice_param->slice_data_offset);
2167 OUT_BCS_BATCH(batch,
2168 slice_param->slice_horizontal_position << 16 |
2169 slice_param->slice_vertical_position << 0);
2170 OUT_BCS_BATCH(batch,
2171 ((slice_param->num_components != 1) << 30) | /* interleaved */
2172 (scan_component_mask << 27) | /* scan components */
2173 (0 << 26) | /* disable interrupt allowed */
2174 (slice_param->num_mcus << 0)); /* MCU count */
2175 OUT_BCS_BATCH(batch,
2176 (slice_param->restart_interval << 0)); /* RestartInterval */
2177 ADVANCE_BCS_BATCH(batch);
2180 /* Workaround for JPEG decoding on Ivybridge */
2184 i965_DestroySurfaces(VADriverContextP ctx,
2185 VASurfaceID *surface_list,
2188 i965_CreateSurfaces(VADriverContextP ctx,
2193 VASurfaceID *surfaces);
2198 unsigned char data[32];
2200 int data_bit_offset;
2202 } gen7_jpeg_wa_clip = {
2206 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2207 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2215 gen8_jpeg_wa_init(VADriverContextP ctx,
2216 struct gen7_mfd_context *gen7_mfd_context)
2218 struct i965_driver_data *i965 = i965_driver_data(ctx);
2220 struct object_surface *obj_surface;
2222 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2223 i965_DestroySurfaces(ctx,
2224 &gen7_mfd_context->jpeg_wa_surface_id,
2227 status = i965_CreateSurfaces(ctx,
2228 gen7_jpeg_wa_clip.width,
2229 gen7_jpeg_wa_clip.height,
2230 VA_RT_FORMAT_YUV420,
2232 &gen7_mfd_context->jpeg_wa_surface_id);
2233 assert(status == VA_STATUS_SUCCESS);
2235 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2236 assert(obj_surface);
2237 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
2238 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2240 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2241 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2245 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2247 gen7_jpeg_wa_clip.data_size,
2248 gen7_jpeg_wa_clip.data);
2253 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2254 struct gen7_mfd_context *gen7_mfd_context)
2256 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2258 BEGIN_BCS_BATCH(batch, 5);
2259 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2260 OUT_BCS_BATCH(batch,
2261 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2262 (MFD_MODE_VLD << 15) | /* VLD mode */
2263 (0 << 10) | /* disable Stream-Out */
2264 (0 << 9) | /* Post Deblocking Output */
2265 (1 << 8) | /* Pre Deblocking Output */
2266 (0 << 5) | /* not in stitch mode */
2267 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2268 (MFX_FORMAT_AVC << 0));
2269 OUT_BCS_BATCH(batch,
2270 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2271 (0 << 3) | /* terminate if AVC mbdata error occurs */
2272 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2275 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2276 OUT_BCS_BATCH(batch, 0); /* reserved */
2277 ADVANCE_BCS_BATCH(batch);
2281 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2282 struct gen7_mfd_context *gen7_mfd_context)
2284 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2285 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2287 BEGIN_BCS_BATCH(batch, 6);
2288 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2289 OUT_BCS_BATCH(batch, 0);
2290 OUT_BCS_BATCH(batch,
2291 ((obj_surface->orig_width - 1) << 18) |
2292 ((obj_surface->orig_height - 1) << 4));
2293 OUT_BCS_BATCH(batch,
2294 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2295 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2296 (0 << 22) | /* surface object control state, ignored */
2297 ((obj_surface->width - 1) << 3) | /* pitch */
2298 (0 << 2) | /* must be 0 */
2299 (1 << 1) | /* must be tiled */
2300 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2301 OUT_BCS_BATCH(batch,
2302 (0 << 16) | /* X offset for U(Cb), must be 0 */
2303 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2304 OUT_BCS_BATCH(batch,
2305 (0 << 16) | /* X offset for V(Cr), must be 0 */
2306 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2307 ADVANCE_BCS_BATCH(batch);
2311 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2312 struct gen7_mfd_context *gen7_mfd_context)
2314 struct i965_driver_data *i965 = i965_driver_data(ctx);
2315 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2316 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2320 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2325 BEGIN_BCS_BATCH(batch, 61);
2326 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2327 OUT_BCS_RELOC(batch,
2329 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2331 OUT_BCS_BATCH(batch, 0);
2332 OUT_BCS_BATCH(batch, 0);
2335 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2336 OUT_BCS_BATCH(batch, 0);
2337 OUT_BCS_BATCH(batch, 0);
2339 /* uncompressed-video & stream out 7-12 */
2340 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2341 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2342 OUT_BCS_BATCH(batch, 0);
2343 OUT_BCS_BATCH(batch, 0);
2344 OUT_BCS_BATCH(batch, 0);
2345 OUT_BCS_BATCH(batch, 0);
2347 /* the DW 13-15 is for intra row store scratch */
2348 OUT_BCS_RELOC(batch,
2350 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2352 OUT_BCS_BATCH(batch, 0);
2353 OUT_BCS_BATCH(batch, 0);
2355 /* the DW 16-18 is for deblocking filter */
2356 OUT_BCS_BATCH(batch, 0);
2357 OUT_BCS_BATCH(batch, 0);
2358 OUT_BCS_BATCH(batch, 0);
2361 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2362 OUT_BCS_BATCH(batch, 0);
2363 OUT_BCS_BATCH(batch, 0);
2365 OUT_BCS_BATCH(batch, 0);
2367 /* the DW52-54 is for mb status address */
2368 OUT_BCS_BATCH(batch, 0);
2369 OUT_BCS_BATCH(batch, 0);
2370 OUT_BCS_BATCH(batch, 0);
2371 /* the DW56-60 is for ILDB & second ILDB address */
2372 OUT_BCS_BATCH(batch, 0);
2373 OUT_BCS_BATCH(batch, 0);
2374 OUT_BCS_BATCH(batch, 0);
2375 OUT_BCS_BATCH(batch, 0);
2376 OUT_BCS_BATCH(batch, 0);
2377 OUT_BCS_BATCH(batch, 0);
2379 ADVANCE_BCS_BATCH(batch);
2381 dri_bo_unreference(intra_bo);
2385 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2386 struct gen7_mfd_context *gen7_mfd_context)
2388 struct i965_driver_data *i965 = i965_driver_data(ctx);
2389 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2390 dri_bo *bsd_mpc_bo, *mpr_bo;
2392 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2393 "bsd mpc row store",
2394 11520, /* 1.5 * 120 * 64 */
2397 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2399 7680, /* 1. 0 * 120 * 64 */
2402 BEGIN_BCS_BATCH(batch, 10);
2403 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2405 OUT_BCS_RELOC(batch,
2407 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2410 OUT_BCS_BATCH(batch, 0);
2411 OUT_BCS_BATCH(batch, 0);
2413 OUT_BCS_RELOC(batch,
2415 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2417 OUT_BCS_BATCH(batch, 0);
2418 OUT_BCS_BATCH(batch, 0);
2420 OUT_BCS_BATCH(batch, 0);
2421 OUT_BCS_BATCH(batch, 0);
2422 OUT_BCS_BATCH(batch, 0);
2424 ADVANCE_BCS_BATCH(batch);
2426 dri_bo_unreference(bsd_mpc_bo);
2427 dri_bo_unreference(mpr_bo);
2431 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2432 struct gen7_mfd_context *gen7_mfd_context)
2438 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2439 struct gen7_mfd_context *gen7_mfd_context)
2441 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2443 int mbaff_frame_flag = 0;
2444 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2446 BEGIN_BCS_BATCH(batch, 16);
2447 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2448 OUT_BCS_BATCH(batch,
2449 width_in_mbs * height_in_mbs);
2450 OUT_BCS_BATCH(batch,
2451 ((height_in_mbs - 1) << 16) |
2452 ((width_in_mbs - 1) << 0));
2453 OUT_BCS_BATCH(batch,
2458 (0 << 12) | /* differ from GEN6 */
2461 OUT_BCS_BATCH(batch,
2462 (1 << 10) | /* 4:2:0 */
2463 (1 << 7) | /* CABAC */
2469 (mbaff_frame_flag << 1) |
2471 OUT_BCS_BATCH(batch, 0);
2472 OUT_BCS_BATCH(batch, 0);
2473 OUT_BCS_BATCH(batch, 0);
2474 OUT_BCS_BATCH(batch, 0);
2475 OUT_BCS_BATCH(batch, 0);
2476 OUT_BCS_BATCH(batch, 0);
2477 OUT_BCS_BATCH(batch, 0);
2478 OUT_BCS_BATCH(batch, 0);
2479 OUT_BCS_BATCH(batch, 0);
2480 OUT_BCS_BATCH(batch, 0);
2481 OUT_BCS_BATCH(batch, 0);
2482 ADVANCE_BCS_BATCH(batch);
2486 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2487 struct gen7_mfd_context *gen7_mfd_context)
2489 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2492 BEGIN_BCS_BATCH(batch, 71);
2493 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2495 /* reference surfaces 0..15 */
2496 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2497 OUT_BCS_BATCH(batch, 0); /* top */
2498 OUT_BCS_BATCH(batch, 0); /* bottom */
2501 OUT_BCS_BATCH(batch, 0);
2503 /* the current decoding frame/field */
2504 OUT_BCS_BATCH(batch, 0); /* top */
2505 OUT_BCS_BATCH(batch, 0);
2506 OUT_BCS_BATCH(batch, 0);
2509 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2510 OUT_BCS_BATCH(batch, 0);
2511 OUT_BCS_BATCH(batch, 0);
2514 OUT_BCS_BATCH(batch, 0);
2515 OUT_BCS_BATCH(batch, 0);
2517 ADVANCE_BCS_BATCH(batch);
2521 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2522 struct gen7_mfd_context *gen7_mfd_context)
2524 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2526 BEGIN_BCS_BATCH(batch, 11);
2527 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2528 OUT_BCS_RELOC(batch,
2529 gen7_mfd_context->jpeg_wa_slice_data_bo,
2530 I915_GEM_DOMAIN_INSTRUCTION, 0,
2532 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2533 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2534 OUT_BCS_BATCH(batch, 0);
2535 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2536 OUT_BCS_BATCH(batch, 0);
2537 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2538 OUT_BCS_BATCH(batch, 0);
2539 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2540 OUT_BCS_BATCH(batch, 0);
2541 ADVANCE_BCS_BATCH(batch);
2545 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2546 struct gen7_mfd_context *gen7_mfd_context)
2548 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2550 /* the input bitsteam format on GEN7 differs from GEN6 */
2551 BEGIN_BCS_BATCH(batch, 6);
2552 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2553 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2554 OUT_BCS_BATCH(batch, 0);
2555 OUT_BCS_BATCH(batch,
2561 OUT_BCS_BATCH(batch,
2562 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2565 (1 << 3) | /* LastSlice Flag */
2566 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2567 OUT_BCS_BATCH(batch, 0);
2568 ADVANCE_BCS_BATCH(batch);
2572 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2573 struct gen7_mfd_context *gen7_mfd_context)
2575 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2576 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2577 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2578 int first_mb_in_slice = 0;
2579 int slice_type = SLICE_TYPE_I;
2581 BEGIN_BCS_BATCH(batch, 11);
2582 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2583 OUT_BCS_BATCH(batch, slice_type);
2584 OUT_BCS_BATCH(batch,
2585 (num_ref_idx_l1 << 24) |
2586 (num_ref_idx_l0 << 16) |
2589 OUT_BCS_BATCH(batch,
2591 (1 << 27) | /* disable Deblocking */
2593 (gen7_jpeg_wa_clip.qp << 16) |
2596 OUT_BCS_BATCH(batch,
2597 (slice_ver_pos << 24) |
2598 (slice_hor_pos << 16) |
2599 (first_mb_in_slice << 0));
2600 OUT_BCS_BATCH(batch,
2601 (next_slice_ver_pos << 16) |
2602 (next_slice_hor_pos << 0));
2603 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2604 OUT_BCS_BATCH(batch, 0);
2605 OUT_BCS_BATCH(batch, 0);
2606 OUT_BCS_BATCH(batch, 0);
2607 OUT_BCS_BATCH(batch, 0);
2608 ADVANCE_BCS_BATCH(batch);
2612 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2613 struct gen7_mfd_context *gen7_mfd_context)
2615 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2616 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2617 intel_batchbuffer_emit_mi_flush(batch);
2618 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2619 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2620 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2621 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2622 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2623 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2624 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2626 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2627 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2628 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2634 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2635 struct decode_state *decode_state,
2636 struct gen7_mfd_context *gen7_mfd_context)
2638 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2639 VAPictureParameterBufferJPEGBaseline *pic_param;
2640 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2641 dri_bo *slice_data_bo;
2642 int i, j, max_selector = 0;
2644 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2645 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2647 /* Currently only support Baseline DCT */
2648 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2649 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2651 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2653 intel_batchbuffer_emit_mi_flush(batch);
2654 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2655 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2656 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2657 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2658 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2660 for (j = 0; j < decode_state->num_slice_params; j++) {
2661 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2662 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2663 slice_data_bo = decode_state->slice_datas[j]->bo;
2664 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2666 if (j == decode_state->num_slice_params - 1)
2667 next_slice_group_param = NULL;
2669 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2671 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2674 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2676 if (i < decode_state->slice_params[j]->num_elements - 1)
2677 next_slice_param = slice_param + 1;
2679 next_slice_param = next_slice_group_param;
2681 for (component = 0; component < slice_param->num_components; component++) {
2682 if (max_selector < slice_param->components[component].dc_table_selector)
2683 max_selector = slice_param->components[component].dc_table_selector;
2685 if (max_selector < slice_param->components[component].ac_table_selector)
2686 max_selector = slice_param->components[component].ac_table_selector;
2693 assert(max_selector < 2);
2694 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2696 for (j = 0; j < decode_state->num_slice_params; j++) {
2697 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2698 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2699 slice_data_bo = decode_state->slice_datas[j]->bo;
2700 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2702 if (j == decode_state->num_slice_params - 1)
2703 next_slice_group_param = NULL;
2705 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2707 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2708 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2710 if (i < decode_state->slice_params[j]->num_elements - 1)
2711 next_slice_param = slice_param + 1;
2713 next_slice_param = next_slice_group_param;
2715 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2720 intel_batchbuffer_end_atomic(batch);
2721 intel_batchbuffer_flush(batch);
2724 static const int vp8_dc_qlookup[128] =
2726 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
2727 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
2728 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
2729 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
2730 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
2731 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
2732 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2733 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2736 static const int vp8_ac_qlookup[128] =
2738 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
2739 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
2740 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
2741 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
2742 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
2743 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2744 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2745 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2748 static inline unsigned int vp8_clip_quantization_index(int index)
2759 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2760 struct decode_state *decode_state,
2761 struct gen7_mfd_context *gen7_mfd_context)
2763 struct object_surface *obj_surface;
2764 struct i965_driver_data *i965 = i965_driver_data(ctx);
2766 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2767 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2768 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2770 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2771 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2773 /* Current decoded picture */
2774 obj_surface = decode_state->render_object;
2775 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2777 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2778 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2779 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2780 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2782 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2783 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2784 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2785 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2787 /* The same as AVC */
2788 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2789 bo = dri_bo_alloc(i965->intel.bufmgr,
2794 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2795 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2797 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2798 bo = dri_bo_alloc(i965->intel.bufmgr,
2799 "deblocking filter row store",
2800 width_in_mbs * 64 * 4,
2803 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2804 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2806 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2807 bo = dri_bo_alloc(i965->intel.bufmgr,
2808 "bsd mpc row store",
2809 width_in_mbs * 64 * 2,
2812 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2813 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2815 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2816 bo = dri_bo_alloc(i965->intel.bufmgr,
2818 width_in_mbs * 64 * 2,
2821 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2822 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2824 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2828 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2829 struct decode_state *decode_state,
2830 struct gen7_mfd_context *gen7_mfd_context)
2832 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2833 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2834 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2835 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2836 dri_bo *probs_bo = decode_state->probability_data->bo;
2838 unsigned int quantization_value[4][6];
2840 log2num = (int)log2(slice_param->num_of_partitions - 1);
2842 BEGIN_BCS_BATCH(batch, 38);
2843 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2844 OUT_BCS_BATCH(batch,
2845 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2846 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2847 OUT_BCS_BATCH(batch,
2849 pic_param->pic_fields.bits.sharpness_level << 16 |
2850 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2851 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2852 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2853 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2854 pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2855 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2856 0 << 7 | /* segmentation id streamin disabled */
2857 0 << 6 | /* segmentation id streamout disabled */
2858 pic_param->pic_fields.bits.key_frame << 5 |
2859 pic_param->pic_fields.bits.filter_type << 4 |
2860 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2861 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2863 OUT_BCS_BATCH(batch,
2864 pic_param->loop_filter_level[3] << 24 |
2865 pic_param->loop_filter_level[2] << 16 |
2866 pic_param->loop_filter_level[1] << 8 |
2867 pic_param->loop_filter_level[0] << 0);
2869 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2870 for (i = 0; i < 4; i++) {
2871 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2872 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2873 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2874 /* 101581>>16 is equivalent to 155/100 */
2875 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2876 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2877 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2879 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2880 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2882 OUT_BCS_BATCH(batch,
2883 quantization_value[i][0] << 16 | /* Y1AC */
2884 quantization_value[i][1] << 0); /* Y1DC */
2885 OUT_BCS_BATCH(batch,
2886 quantization_value[i][5] << 16 | /* UVAC */
2887 quantization_value[i][4] << 0); /* UVDC */
2888 OUT_BCS_BATCH(batch,
2889 quantization_value[i][3] << 16 | /* Y2AC */
2890 quantization_value[i][2] << 0); /* Y2DC */
2893 /* CoeffProbability table for non-key frame, DW16-DW18 */
2895 OUT_BCS_RELOC(batch, probs_bo,
2896 0, I915_GEM_DOMAIN_INSTRUCTION,
2898 OUT_BCS_BATCH(batch, 0);
2899 OUT_BCS_BATCH(batch, 0);
2901 OUT_BCS_BATCH(batch, 0);
2902 OUT_BCS_BATCH(batch, 0);
2903 OUT_BCS_BATCH(batch, 0);
2906 OUT_BCS_BATCH(batch,
2907 pic_param->mb_segment_tree_probs[2] << 16 |
2908 pic_param->mb_segment_tree_probs[1] << 8 |
2909 pic_param->mb_segment_tree_probs[0] << 0);
2911 OUT_BCS_BATCH(batch,
2912 pic_param->prob_skip_false << 24 |
2913 pic_param->prob_intra << 16 |
2914 pic_param->prob_last << 8 |
2915 pic_param->prob_gf << 0);
2917 OUT_BCS_BATCH(batch,
2918 pic_param->y_mode_probs[3] << 24 |
2919 pic_param->y_mode_probs[2] << 16 |
2920 pic_param->y_mode_probs[1] << 8 |
2921 pic_param->y_mode_probs[0] << 0);
2923 OUT_BCS_BATCH(batch,
2924 pic_param->uv_mode_probs[2] << 16 |
2925 pic_param->uv_mode_probs[1] << 8 |
2926 pic_param->uv_mode_probs[0] << 0);
2928 /* MV update value, DW23-DW32 */
2929 for (i = 0; i < 2; i++) {
2930 for (j = 0; j < 20; j += 4) {
2931 OUT_BCS_BATCH(batch,
2932 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2933 pic_param->mv_probs[i][j + 2] << 16 |
2934 pic_param->mv_probs[i][j + 1] << 8 |
2935 pic_param->mv_probs[i][j + 0] << 0);
2939 OUT_BCS_BATCH(batch,
2940 pic_param->loop_filter_deltas_ref_frame[3] << 24 |
2941 pic_param->loop_filter_deltas_ref_frame[2] << 16 |
2942 pic_param->loop_filter_deltas_ref_frame[1] << 8 |
2943 pic_param->loop_filter_deltas_ref_frame[0] << 0);
2945 OUT_BCS_BATCH(batch,
2946 pic_param->loop_filter_deltas_mode[3] << 24 |
2947 pic_param->loop_filter_deltas_mode[2] << 16 |
2948 pic_param->loop_filter_deltas_mode[1] << 8 |
2949 pic_param->loop_filter_deltas_mode[0] << 0);
2951 /* segmentation id stream base address, DW35-DW37 */
2952 OUT_BCS_BATCH(batch, 0);
2953 OUT_BCS_BATCH(batch, 0);
2954 OUT_BCS_BATCH(batch, 0);
2955 ADVANCE_BCS_BATCH(batch);
2959 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2960 VAPictureParameterBufferVP8 *pic_param,
2961 VASliceParameterBufferVP8 *slice_param,
2962 dri_bo *slice_data_bo,
2963 struct gen7_mfd_context *gen7_mfd_context)
2965 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2967 unsigned int offset = slice_param->slice_data_offset;
2969 assert(slice_param->num_of_partitions >= 2);
2970 assert(slice_param->num_of_partitions <= 9);
2972 log2num = (int)log2(slice_param->num_of_partitions - 1);
2974 BEGIN_BCS_BATCH(batch, 22);
2975 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2976 OUT_BCS_BATCH(batch,
2977 pic_param->bool_coder_ctx.count << 16 | /* Partition 0 CPBAC Entropy Count */
2978 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
2980 (slice_param->macroblock_offset & 0x7));
2981 OUT_BCS_BATCH(batch,
2982 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2985 for (i = 0; i < 9; i++) {
2986 if (i < slice_param->num_of_partitions) {
2987 OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
2988 OUT_BCS_BATCH(batch, offset);
2990 OUT_BCS_BATCH(batch, 0);
2991 OUT_BCS_BATCH(batch, 0);
2994 offset += slice_param->partition_size[i];
2997 OUT_BCS_BATCH(batch,
2998 1 << 31 | /* concealment method */
3001 ADVANCE_BCS_BATCH(batch);
3005 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3006 struct decode_state *decode_state,
3007 struct gen7_mfd_context *gen7_mfd_context)
3009 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3010 VAPictureParameterBufferVP8 *pic_param;
3011 VASliceParameterBufferVP8 *slice_param;
3012 dri_bo *slice_data_bo;
3014 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3015 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3017 /* one slice per frame */
3018 assert(decode_state->num_slice_params == 1);
3019 assert(decode_state->slice_params[0]->num_elements == 1);
3020 assert(decode_state->slice_params && decode_state->slice_params[0]->buffer);
3021 assert(decode_state->slice_datas[0]->bo);
3023 assert(decode_state->probability_data);
3025 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3026 slice_data_bo = decode_state->slice_datas[0]->bo;
3028 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3029 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3030 intel_batchbuffer_emit_mi_flush(batch);
3031 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3032 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3033 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3034 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3035 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3036 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3037 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3038 intel_batchbuffer_end_atomic(batch);
3039 intel_batchbuffer_flush(batch);
3043 gen8_mfd_decode_picture(VADriverContextP ctx,
3045 union codec_state *codec_state,
3046 struct hw_context *hw_context)
3049 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3050 struct decode_state *decode_state = &codec_state->decode;
3053 assert(gen7_mfd_context);
3055 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3057 if (vaStatus != VA_STATUS_SUCCESS)
3060 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3063 case VAProfileMPEG2Simple:
3064 case VAProfileMPEG2Main:
3065 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3068 case VAProfileH264ConstrainedBaseline:
3069 case VAProfileH264Main:
3070 case VAProfileH264High:
3071 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3074 case VAProfileVC1Simple:
3075 case VAProfileVC1Main:
3076 case VAProfileVC1Advanced:
3077 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3080 case VAProfileJPEGBaseline:
3081 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3084 case VAProfileVP8Version0_3:
3085 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3093 vaStatus = VA_STATUS_SUCCESS;
3100 gen8_mfd_context_destroy(void *hw_context)
3102 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3104 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3105 gen7_mfd_context->post_deblocking_output.bo = NULL;
3107 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3108 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3110 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3111 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3113 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3114 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3116 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3117 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3119 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3120 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3122 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3123 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3125 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3127 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3128 free(gen7_mfd_context);
3131 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3132 struct gen7_mfd_context *gen7_mfd_context)
3134 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3135 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3136 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3137 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3141 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3143 struct intel_driver_data *intel = intel_driver_data(ctx);
3144 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3147 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3148 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3149 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3151 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3152 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3153 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3156 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3158 switch (obj_config->profile) {
3159 case VAProfileMPEG2Simple:
3160 case VAProfileMPEG2Main:
3161 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3164 case VAProfileH264ConstrainedBaseline:
3165 case VAProfileH264Main:
3166 case VAProfileH264High:
3167 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3172 return (struct hw_context *)gen7_mfd_context;