2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77 assert((obj_surface->size & 0x3f) == 0);
78 obj_surface->private_data = gen7_avc_surface;
81 gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
82 !pic_param->seq_fields.bits.direct_8x8_inference_flag);
84 if (gen7_avc_surface->dmv_top == NULL) {
85 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86 "direct mv w/r buffer",
87 width_in_mbs * height_in_mbs * 128,
89 assert(gen7_avc_surface->dmv_top);
92 if (gen7_avc_surface->dmv_bottom_flag &&
93 gen7_avc_surface->dmv_bottom == NULL) {
94 gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
95 "direct mv w/r buffer",
96 width_in_mbs * height_in_mbs * 128,
98 assert(gen7_avc_surface->dmv_bottom);
103 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
104 struct decode_state *decode_state,
106 struct gen7_mfd_context *gen7_mfd_context)
108 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
110 assert(standard_select == MFX_FORMAT_MPEG2 ||
111 standard_select == MFX_FORMAT_AVC ||
112 standard_select == MFX_FORMAT_VC1 ||
113 standard_select == MFX_FORMAT_JPEG ||
114 standard_select == MFX_FORMAT_VP8);
116 BEGIN_BCS_BATCH(batch, 5);
117 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
119 (MFX_LONG_MODE << 17) | /* Currently only support long format */
120 (MFD_MODE_VLD << 15) | /* VLD mode */
121 (0 << 10) | /* disable Stream-Out */
122 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
123 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
124 (0 << 5) | /* not in stitch mode */
125 (MFX_CODEC_DECODE << 4) | /* decoding mode */
126 (standard_select << 0));
128 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
129 (0 << 3) | /* terminate if AVC mbdata error occurs */
130 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
133 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
134 OUT_BCS_BATCH(batch, 0); /* reserved */
135 ADVANCE_BCS_BATCH(batch);
139 gen8_mfd_surface_state(VADriverContextP ctx,
140 struct decode_state *decode_state,
142 struct gen7_mfd_context *gen7_mfd_context)
144 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
145 struct object_surface *obj_surface = decode_state->render_object;
146 unsigned int y_cb_offset;
147 unsigned int y_cr_offset;
151 y_cb_offset = obj_surface->y_cb_offset;
152 y_cr_offset = obj_surface->y_cr_offset;
154 BEGIN_BCS_BATCH(batch, 6);
155 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
156 OUT_BCS_BATCH(batch, 0);
158 ((obj_surface->orig_height - 1) << 18) |
159 ((obj_surface->orig_width - 1) << 4));
161 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
162 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
163 (0 << 22) | /* surface object control state, ignored */
164 ((obj_surface->width - 1) << 3) | /* pitch */
165 (0 << 2) | /* must be 0 */
166 (1 << 1) | /* must be tiled */
167 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
169 (0 << 16) | /* X offset for U(Cb), must be 0 */
170 (y_cb_offset << 0)); /* Y offset for U(Cb) */
172 (0 << 16) | /* X offset for V(Cr), must be 0 */
173 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
174 ADVANCE_BCS_BATCH(batch);
178 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
179 struct decode_state *decode_state,
181 struct gen7_mfd_context *gen7_mfd_context)
183 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
186 BEGIN_BCS_BATCH(batch, 61);
187 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188 /* Pre-deblock 1-3 */
189 if (gen7_mfd_context->pre_deblocking_output.valid)
190 OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
191 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
194 OUT_BCS_BATCH(batch, 0);
196 OUT_BCS_BATCH(batch, 0);
197 OUT_BCS_BATCH(batch, 0);
198 /* Post-debloing 4-6 */
199 if (gen7_mfd_context->post_deblocking_output.valid)
200 OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
201 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
204 OUT_BCS_BATCH(batch, 0);
206 OUT_BCS_BATCH(batch, 0);
207 OUT_BCS_BATCH(batch, 0);
209 /* uncompressed-video & stream out 7-12 */
210 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
211 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
212 OUT_BCS_BATCH(batch, 0);
213 OUT_BCS_BATCH(batch, 0);
214 OUT_BCS_BATCH(batch, 0);
215 OUT_BCS_BATCH(batch, 0);
217 /* intra row-store scratch 13-15 */
218 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
219 OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
220 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
223 OUT_BCS_BATCH(batch, 0);
225 OUT_BCS_BATCH(batch, 0);
226 OUT_BCS_BATCH(batch, 0);
227 /* deblocking-filter-row-store 16-18 */
228 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
229 OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
230 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
233 OUT_BCS_BATCH(batch, 0);
234 OUT_BCS_BATCH(batch, 0);
235 OUT_BCS_BATCH(batch, 0);
238 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
239 struct object_surface *obj_surface;
241 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
242 gen7_mfd_context->reference_surface[i].obj_surface &&
243 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
244 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
246 OUT_BCS_RELOC(batch, obj_surface->bo,
247 I915_GEM_DOMAIN_INSTRUCTION, 0,
250 OUT_BCS_BATCH(batch, 0);
253 OUT_BCS_BATCH(batch, 0);
256 /* reference property 51 */
257 OUT_BCS_BATCH(batch, 0);
259 /* Macroblock status & ILDB 52-57 */
260 OUT_BCS_BATCH(batch, 0);
261 OUT_BCS_BATCH(batch, 0);
262 OUT_BCS_BATCH(batch, 0);
263 OUT_BCS_BATCH(batch, 0);
264 OUT_BCS_BATCH(batch, 0);
265 OUT_BCS_BATCH(batch, 0);
267 /* the second Macroblock status 58-60 */
268 OUT_BCS_BATCH(batch, 0);
269 OUT_BCS_BATCH(batch, 0);
270 OUT_BCS_BATCH(batch, 0);
272 ADVANCE_BCS_BATCH(batch);
276 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
277 dri_bo *slice_data_bo,
279 struct gen7_mfd_context *gen7_mfd_context)
281 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
283 BEGIN_BCS_BATCH(batch, 26);
284 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
286 OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
287 OUT_BCS_BATCH(batch, 0);
288 OUT_BCS_BATCH(batch, 0);
289 /* Upper bound 4-5 */
290 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
291 OUT_BCS_BATCH(batch, 0);
293 /* MFX indirect MV 6-10 */
294 OUT_BCS_BATCH(batch, 0);
295 OUT_BCS_BATCH(batch, 0);
296 OUT_BCS_BATCH(batch, 0);
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
300 /* MFX IT_COFF 11-15 */
301 OUT_BCS_BATCH(batch, 0);
302 OUT_BCS_BATCH(batch, 0);
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
307 /* MFX IT_DBLK 16-20 */
308 OUT_BCS_BATCH(batch, 0);
309 OUT_BCS_BATCH(batch, 0);
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
314 /* MFX PAK_BSE object for encoder 21-25 */
315 OUT_BCS_BATCH(batch, 0);
316 OUT_BCS_BATCH(batch, 0);
317 OUT_BCS_BATCH(batch, 0);
318 OUT_BCS_BATCH(batch, 0);
319 OUT_BCS_BATCH(batch, 0);
321 ADVANCE_BCS_BATCH(batch);
325 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
326 struct decode_state *decode_state,
328 struct gen7_mfd_context *gen7_mfd_context)
330 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
332 BEGIN_BCS_BATCH(batch, 10);
333 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
335 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
336 OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
337 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
340 OUT_BCS_BATCH(batch, 0);
342 OUT_BCS_BATCH(batch, 0);
343 OUT_BCS_BATCH(batch, 0);
344 /* MPR Row Store Scratch buffer 4-6 */
345 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
346 OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
347 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
350 OUT_BCS_BATCH(batch, 0);
352 OUT_BCS_BATCH(batch, 0);
353 OUT_BCS_BATCH(batch, 0);
356 if (gen7_mfd_context->bitplane_read_buffer.valid)
357 OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
358 I915_GEM_DOMAIN_INSTRUCTION, 0,
361 OUT_BCS_BATCH(batch, 0);
362 OUT_BCS_BATCH(batch, 0);
363 OUT_BCS_BATCH(batch, 0);
364 ADVANCE_BCS_BATCH(batch);
368 gen8_mfd_qm_state(VADriverContextP ctx,
372 struct gen7_mfd_context *gen7_mfd_context)
374 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
375 unsigned int qm_buffer[16];
377 assert(qm_length <= 16 * 4);
378 memcpy(qm_buffer, qm, qm_length);
380 BEGIN_BCS_BATCH(batch, 18);
381 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
382 OUT_BCS_BATCH(batch, qm_type << 0);
383 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
384 ADVANCE_BCS_BATCH(batch);
388 gen8_mfd_avc_img_state(VADriverContextP ctx,
389 struct decode_state *decode_state,
390 struct gen7_mfd_context *gen7_mfd_context)
392 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
394 int mbaff_frame_flag;
395 unsigned int width_in_mbs, height_in_mbs;
396 VAPictureParameterBufferH264 *pic_param;
398 assert(decode_state->pic_param && decode_state->pic_param->buffer);
399 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
400 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
402 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
404 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
409 if ((img_struct & 0x1) == 0x1) {
410 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
412 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
415 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
416 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
417 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
419 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
422 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
423 !pic_param->pic_fields.bits.field_pic_flag);
425 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
426 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
428 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
429 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
430 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
431 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
433 BEGIN_BCS_BATCH(batch, 17);
434 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
436 width_in_mbs * height_in_mbs);
438 ((height_in_mbs - 1) << 16) |
439 ((width_in_mbs - 1) << 0));
441 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
442 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
443 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
444 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
445 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
446 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
449 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
450 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
451 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
452 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
453 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
454 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
455 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
456 (mbaff_frame_flag << 1) |
457 (pic_param->pic_fields.bits.field_pic_flag << 0));
458 OUT_BCS_BATCH(batch, 0);
459 OUT_BCS_BATCH(batch, 0);
460 OUT_BCS_BATCH(batch, 0);
461 OUT_BCS_BATCH(batch, 0);
462 OUT_BCS_BATCH(batch, 0);
463 OUT_BCS_BATCH(batch, 0);
464 OUT_BCS_BATCH(batch, 0);
465 OUT_BCS_BATCH(batch, 0);
466 OUT_BCS_BATCH(batch, 0);
467 OUT_BCS_BATCH(batch, 0);
468 OUT_BCS_BATCH(batch, 0);
469 OUT_BCS_BATCH(batch, 0);
470 ADVANCE_BCS_BATCH(batch);
474 gen8_mfd_avc_qm_state(VADriverContextP ctx,
475 struct decode_state *decode_state,
476 struct gen7_mfd_context *gen7_mfd_context)
478 VAIQMatrixBufferH264 *iq_matrix;
479 VAPictureParameterBufferH264 *pic_param;
481 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
482 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
484 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
486 assert(decode_state->pic_param && decode_state->pic_param->buffer);
487 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
489 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
490 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
492 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
493 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
494 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
499 gen8_mfd_avc_picid_state(VADriverContextP ctx,
500 struct decode_state *decode_state,
501 struct gen7_mfd_context *gen7_mfd_context)
503 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
505 BEGIN_BCS_BATCH(batch, 10);
506 OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
507 OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
508 OUT_BCS_BATCH(batch, 0);
509 OUT_BCS_BATCH(batch, 0);
510 OUT_BCS_BATCH(batch, 0);
511 OUT_BCS_BATCH(batch, 0);
512 OUT_BCS_BATCH(batch, 0);
513 OUT_BCS_BATCH(batch, 0);
514 OUT_BCS_BATCH(batch, 0);
515 OUT_BCS_BATCH(batch, 0);
516 ADVANCE_BCS_BATCH(batch);
520 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
521 struct decode_state *decode_state,
522 VAPictureParameterBufferH264 *pic_param,
523 VASliceParameterBufferH264 *slice_param,
524 struct gen7_mfd_context *gen7_mfd_context)
526 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527 struct object_surface *obj_surface;
528 GenAvcSurface *gen7_avc_surface;
529 VAPictureH264 *va_pic;
532 BEGIN_BCS_BATCH(batch, 71);
533 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
535 /* reference surfaces 0..15 */
536 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538 gen7_mfd_context->reference_surface[i].obj_surface &&
539 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
541 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542 gen7_avc_surface = obj_surface->private_data;
544 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
545 I915_GEM_DOMAIN_INSTRUCTION, 0,
547 OUT_BCS_BATCH(batch, 0);
549 OUT_BCS_BATCH(batch, 0);
550 OUT_BCS_BATCH(batch, 0);
554 OUT_BCS_BATCH(batch, 0);
556 /* the current decoding frame/field */
557 va_pic = &pic_param->CurrPic;
558 obj_surface = decode_state->render_object;
559 assert(obj_surface->bo && obj_surface->private_data);
560 gen7_avc_surface = obj_surface->private_data;
562 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
563 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
566 OUT_BCS_BATCH(batch, 0);
567 OUT_BCS_BATCH(batch, 0);
570 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
571 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
574 assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
576 for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
577 va_pic = &pic_param->ReferenceFrames[j];
579 if (va_pic->flags & VA_PICTURE_H264_INVALID)
582 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
589 assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
591 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
592 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
594 OUT_BCS_BATCH(batch, 0);
595 OUT_BCS_BATCH(batch, 0);
599 va_pic = &pic_param->CurrPic;
600 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
601 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
603 ADVANCE_BCS_BATCH(batch);
607 gen8_mfd_avc_slice_state(VADriverContextP ctx,
608 VAPictureParameterBufferH264 *pic_param,
609 VASliceParameterBufferH264 *slice_param,
610 VASliceParameterBufferH264 *next_slice_param,
611 struct gen7_mfd_context *gen7_mfd_context)
613 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
614 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
615 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
616 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
617 int num_ref_idx_l0, num_ref_idx_l1;
618 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
619 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
620 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
623 if (slice_param->slice_type == SLICE_TYPE_I ||
624 slice_param->slice_type == SLICE_TYPE_SI) {
625 slice_type = SLICE_TYPE_I;
626 } else if (slice_param->slice_type == SLICE_TYPE_P ||
627 slice_param->slice_type == SLICE_TYPE_SP) {
628 slice_type = SLICE_TYPE_P;
630 assert(slice_param->slice_type == SLICE_TYPE_B);
631 slice_type = SLICE_TYPE_B;
634 if (slice_type == SLICE_TYPE_I) {
635 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
636 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
639 } else if (slice_type == SLICE_TYPE_P) {
640 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
641 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
644 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
645 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
648 first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
649 slice_hor_pos = first_mb_in_slice % width_in_mbs;
650 slice_ver_pos = first_mb_in_slice / width_in_mbs;
652 if (next_slice_param) {
653 first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
654 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
655 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
657 next_slice_hor_pos = 0;
658 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
661 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663 OUT_BCS_BATCH(batch, slice_type);
665 (num_ref_idx_l1 << 24) |
666 (num_ref_idx_l0 << 16) |
667 (slice_param->chroma_log2_weight_denom << 8) |
668 (slice_param->luma_log2_weight_denom << 0));
670 (slice_param->direct_spatial_mv_pred_flag << 29) |
671 (slice_param->disable_deblocking_filter_idc << 27) |
672 (slice_param->cabac_init_idc << 24) |
673 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
677 (slice_ver_pos << 24) |
678 (slice_hor_pos << 16) |
679 (first_mb_in_slice << 0));
681 (next_slice_ver_pos << 16) |
682 (next_slice_hor_pos << 0));
684 (next_slice_param == NULL) << 19); /* last slice flag */
685 OUT_BCS_BATCH(batch, 0);
686 OUT_BCS_BATCH(batch, 0);
687 OUT_BCS_BATCH(batch, 0);
688 OUT_BCS_BATCH(batch, 0);
689 ADVANCE_BCS_BATCH(batch);
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694 VAPictureParameterBufferH264 *pic_param,
695 VASliceParameterBufferH264 *slice_param,
696 struct gen7_mfd_context *gen7_mfd_context)
698 gen6_send_avc_ref_idx_state(
699 gen7_mfd_context->base.batch,
701 gen7_mfd_context->reference_surface
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707 VAPictureParameterBufferH264 *pic_param,
708 VASliceParameterBufferH264 *slice_param,
709 struct gen7_mfd_context *gen7_mfd_context)
711 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712 int i, j, num_weight_offset_table = 0;
713 short weightoffsets[32 * 6];
715 if ((slice_param->slice_type == SLICE_TYPE_P ||
716 slice_param->slice_type == SLICE_TYPE_SP) &&
717 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718 num_weight_offset_table = 1;
721 if ((slice_param->slice_type == SLICE_TYPE_B) &&
722 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723 num_weight_offset_table = 2;
726 for (i = 0; i < num_weight_offset_table; i++) {
727 BEGIN_BCS_BATCH(batch, 98);
728 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729 OUT_BCS_BATCH(batch, i);
732 for (j = 0; j < 32; j++) {
733 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
741 for (j = 0; j < 32; j++) {
742 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
751 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752 ADVANCE_BCS_BATCH(batch);
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758 VAPictureParameterBufferH264 *pic_param,
759 VASliceParameterBufferH264 *slice_param,
760 dri_bo *slice_data_bo,
761 VASliceParameterBufferH264 *next_slice_param,
762 struct gen7_mfd_context *gen7_mfd_context)
764 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
767 pic_param->pic_fields.bits.entropy_coding_mode_flag);
769 /* the input bitsteam format on GEN7 differs from GEN6 */
770 BEGIN_BCS_BATCH(batch, 6);
771 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
773 (slice_param->slice_data_size));
774 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
782 ((slice_data_bit_offset >> 3) << 16) |
786 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787 (slice_data_bit_offset & 0x7));
788 OUT_BCS_BATCH(batch, 0);
789 ADVANCE_BCS_BATCH(batch);
793 gen8_mfd_avc_context_init(
794 VADriverContextP ctx,
795 struct gen7_mfd_context *gen7_mfd_context
798 /* Initialize flat scaling lists */
799 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804 struct decode_state *decode_state,
805 struct gen7_mfd_context *gen7_mfd_context)
807 VAPictureParameterBufferH264 *pic_param;
808 VASliceParameterBufferH264 *slice_param;
809 struct i965_driver_data *i965 = i965_driver_data(ctx);
810 struct object_surface *obj_surface;
812 int i, j, enable_avc_ildb = 0;
813 unsigned int width_in_mbs, height_in_mbs;
815 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
819 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821 assert((slice_param->slice_type == SLICE_TYPE_I) ||
822 (slice_param->slice_type == SLICE_TYPE_SI) ||
823 (slice_param->slice_type == SLICE_TYPE_P) ||
824 (slice_param->slice_type == SLICE_TYPE_SP) ||
825 (slice_param->slice_type == SLICE_TYPE_B));
827 if (slice_param->disable_deblocking_filter_idc != 1) {
836 assert(decode_state->pic_param && decode_state->pic_param->buffer);
837 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838 intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
839 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
840 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
841 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
842 assert(height_in_mbs > 0 && height_in_mbs <= 256);
844 /* Current decoded picture */
845 obj_surface = decode_state->render_object;
846 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
847 obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
848 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
850 /* initial uv component for YUV400 case */
851 if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
852 unsigned int uv_offset = obj_surface->width * obj_surface->height;
853 unsigned int uv_size = obj_surface->width * obj_surface->height / 2;
855 drm_intel_gem_bo_map_gtt(obj_surface->bo);
856 memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
857 drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
860 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
862 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
863 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
864 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
865 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
867 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
868 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
869 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
870 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
872 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
873 bo = dri_bo_alloc(i965->intel.bufmgr,
878 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
879 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
881 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
882 bo = dri_bo_alloc(i965->intel.bufmgr,
883 "deblocking filter row store",
884 width_in_mbs * 64 * 4,
887 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
888 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
890 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
891 bo = dri_bo_alloc(i965->intel.bufmgr,
893 width_in_mbs * 64 * 2,
896 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
897 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
899 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
900 bo = dri_bo_alloc(i965->intel.bufmgr,
902 width_in_mbs * 64 * 2,
905 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
906 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
908 gen7_mfd_context->bitplane_read_buffer.valid = 0;
912 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
913 struct decode_state *decode_state,
914 struct gen7_mfd_context *gen7_mfd_context)
916 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
917 VAPictureParameterBufferH264 *pic_param;
918 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
919 dri_bo *slice_data_bo;
922 assert(decode_state->pic_param && decode_state->pic_param->buffer);
923 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
924 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
926 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
927 intel_batchbuffer_emit_mi_flush(batch);
928 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
929 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
930 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
931 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
932 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
933 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
934 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
936 for (j = 0; j < decode_state->num_slice_params; j++) {
937 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
938 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
939 slice_data_bo = decode_state->slice_datas[j]->bo;
940 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
942 if (j == decode_state->num_slice_params - 1)
943 next_slice_group_param = NULL;
945 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
947 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
948 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
949 assert((slice_param->slice_type == SLICE_TYPE_I) ||
950 (slice_param->slice_type == SLICE_TYPE_SI) ||
951 (slice_param->slice_type == SLICE_TYPE_P) ||
952 (slice_param->slice_type == SLICE_TYPE_SP) ||
953 (slice_param->slice_type == SLICE_TYPE_B));
955 if (i < decode_state->slice_params[j]->num_elements - 1)
956 next_slice_param = slice_param + 1;
958 next_slice_param = next_slice_group_param;
960 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
961 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
962 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
963 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
964 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
969 intel_batchbuffer_end_atomic(batch);
970 intel_batchbuffer_flush(batch);
974 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
975 struct decode_state *decode_state,
976 struct gen7_mfd_context *gen7_mfd_context)
978 VAPictureParameterBufferMPEG2 *pic_param;
979 struct i965_driver_data *i965 = i965_driver_data(ctx);
980 struct object_surface *obj_surface;
982 unsigned int width_in_mbs;
984 assert(decode_state->pic_param && decode_state->pic_param->buffer);
985 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
986 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
988 mpeg2_set_reference_surfaces(
990 gen7_mfd_context->reference_surface,
995 /* Current decoded picture */
996 obj_surface = decode_state->render_object;
997 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
999 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1000 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1001 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1002 gen7_mfd_context->pre_deblocking_output.valid = 1;
1004 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1005 bo = dri_bo_alloc(i965->intel.bufmgr,
1006 "bsd mpc row store",
1010 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1011 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1013 gen7_mfd_context->post_deblocking_output.valid = 0;
1014 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1015 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1016 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1017 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1021 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1022 struct decode_state *decode_state,
1023 struct gen7_mfd_context *gen7_mfd_context)
1025 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1026 VAPictureParameterBufferMPEG2 *pic_param;
1027 unsigned int slice_concealment_disable_bit = 0;
1029 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1030 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1032 slice_concealment_disable_bit = 1;
1034 BEGIN_BCS_BATCH(batch, 13);
1035 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1036 OUT_BCS_BATCH(batch,
1037 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1038 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1039 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1040 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1041 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1042 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1043 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1044 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1045 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1046 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1047 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1048 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1049 OUT_BCS_BATCH(batch,
1050 pic_param->picture_coding_type << 9);
1051 OUT_BCS_BATCH(batch,
1052 (slice_concealment_disable_bit << 31) |
1053 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1054 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1055 OUT_BCS_BATCH(batch, 0);
1056 OUT_BCS_BATCH(batch, 0);
1057 OUT_BCS_BATCH(batch, 0);
1058 OUT_BCS_BATCH(batch, 0);
1059 OUT_BCS_BATCH(batch, 0);
1060 OUT_BCS_BATCH(batch, 0);
1061 OUT_BCS_BATCH(batch, 0);
1062 OUT_BCS_BATCH(batch, 0);
1063 OUT_BCS_BATCH(batch, 0);
1064 ADVANCE_BCS_BATCH(batch);
1068 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1069 struct decode_state *decode_state,
1070 struct gen7_mfd_context *gen7_mfd_context)
1072 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1075 /* Update internal QM state */
1076 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1077 VAIQMatrixBufferMPEG2 * const iq_matrix =
1078 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1080 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1081 iq_matrix->load_intra_quantiser_matrix) {
1082 gen_iq_matrix->load_intra_quantiser_matrix =
1083 iq_matrix->load_intra_quantiser_matrix;
1084 if (iq_matrix->load_intra_quantiser_matrix) {
1085 for (j = 0; j < 64; j++)
1086 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1087 iq_matrix->intra_quantiser_matrix[j];
1091 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1092 iq_matrix->load_non_intra_quantiser_matrix) {
1093 gen_iq_matrix->load_non_intra_quantiser_matrix =
1094 iq_matrix->load_non_intra_quantiser_matrix;
1095 if (iq_matrix->load_non_intra_quantiser_matrix) {
1096 for (j = 0; j < 64; j++)
1097 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1098 iq_matrix->non_intra_quantiser_matrix[j];
1103 /* Commit QM state to HW */
1104 for (i = 0; i < 2; i++) {
1105 unsigned char *qm = NULL;
1109 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1110 qm = gen_iq_matrix->intra_quantiser_matrix;
1111 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1114 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1115 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1116 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1123 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1128 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1129 VAPictureParameterBufferMPEG2 *pic_param,
1130 VASliceParameterBufferMPEG2 *slice_param,
1131 VASliceParameterBufferMPEG2 *next_slice_param,
1132 struct gen7_mfd_context *gen7_mfd_context)
1134 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1135 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1136 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1138 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1139 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1141 is_field_pic_wa = is_field_pic &&
1142 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1144 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1145 hpos0 = slice_param->slice_horizontal_position;
1147 if (next_slice_param == NULL) {
1148 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1151 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1152 hpos1 = next_slice_param->slice_horizontal_position;
1155 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1157 BEGIN_BCS_BATCH(batch, 5);
1158 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1159 OUT_BCS_BATCH(batch,
1160 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1161 OUT_BCS_BATCH(batch,
1162 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1163 OUT_BCS_BATCH(batch,
1167 (next_slice_param == NULL) << 5 |
1168 (next_slice_param == NULL) << 3 |
1169 (slice_param->macroblock_offset & 0x7));
1170 OUT_BCS_BATCH(batch,
1171 (slice_param->quantiser_scale_code << 24) |
1172 (vpos1 << 8 | hpos1));
1173 ADVANCE_BCS_BATCH(batch);
1177 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1178 struct decode_state *decode_state,
1179 struct gen7_mfd_context *gen7_mfd_context)
1181 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1182 VAPictureParameterBufferMPEG2 *pic_param;
1183 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1184 dri_bo *slice_data_bo;
1187 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1188 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1190 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1191 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1192 intel_batchbuffer_emit_mi_flush(batch);
1193 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1194 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1195 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1196 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1197 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1198 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1200 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1201 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1202 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1204 for (j = 0; j < decode_state->num_slice_params; j++) {
1205 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1206 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1207 slice_data_bo = decode_state->slice_datas[j]->bo;
1208 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1210 if (j == decode_state->num_slice_params - 1)
1211 next_slice_group_param = NULL;
1213 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1215 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1216 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1218 if (i < decode_state->slice_params[j]->num_elements - 1)
1219 next_slice_param = slice_param + 1;
1221 next_slice_param = next_slice_group_param;
1223 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1228 intel_batchbuffer_end_atomic(batch);
1229 intel_batchbuffer_flush(batch);
1232 static const int va_to_gen7_vc1_pic_type[5] = {
1236 GEN7_VC1_BI_PICTURE,
1240 static const int va_to_gen7_vc1_mv[4] = {
1242 2, /* 1-MV half-pel */
1243 3, /* 1-MV half-pef bilinear */
1247 static const int b_picture_scale_factor[21] = {
1248 128, 85, 170, 64, 192,
1249 51, 102, 153, 204, 43,
1250 215, 37, 74, 111, 148,
1251 185, 222, 32, 96, 160,
1255 static const int va_to_gen7_vc1_condover[3] = {
1261 static const int va_to_gen7_vc1_profile[4] = {
1262 GEN7_VC1_SIMPLE_PROFILE,
1263 GEN7_VC1_MAIN_PROFILE,
1264 GEN7_VC1_RESERVED_PROFILE,
1265 GEN7_VC1_ADVANCED_PROFILE
1269 gen8_mfd_free_vc1_surface(void **data)
1271 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1273 if (!gen7_vc1_surface)
1276 dri_bo_unreference(gen7_vc1_surface->dmv);
1277 free(gen7_vc1_surface);
1282 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1283 VAPictureParameterBufferVC1 *pic_param,
1284 struct object_surface *obj_surface)
1286 struct i965_driver_data *i965 = i965_driver_data(ctx);
1287 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1288 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1289 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1291 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1293 if (!gen7_vc1_surface) {
1294 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1295 assert((obj_surface->size & 0x3f) == 0);
1296 obj_surface->private_data = gen7_vc1_surface;
1299 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1301 if (gen7_vc1_surface->dmv == NULL) {
1302 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1303 "direct mv w/r buffer",
1304 width_in_mbs * height_in_mbs * 64,
1310 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1311 struct decode_state *decode_state,
1312 struct gen7_mfd_context *gen7_mfd_context)
1314 VAPictureParameterBufferVC1 *pic_param;
1315 struct i965_driver_data *i965 = i965_driver_data(ctx);
1316 struct object_surface *obj_surface;
1321 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1322 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1323 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1324 picture_type = pic_param->picture_fields.bits.picture_type;
1326 intel_update_vc1_frame_store_index(ctx,
1329 gen7_mfd_context->reference_surface);
1331 /* Current decoded picture */
1332 obj_surface = decode_state->render_object;
1333 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1334 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1336 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1337 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1338 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1339 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1341 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1342 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1343 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1344 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1346 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1347 bo = dri_bo_alloc(i965->intel.bufmgr,
1352 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1353 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1355 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1356 bo = dri_bo_alloc(i965->intel.bufmgr,
1357 "deblocking filter row store",
1358 width_in_mbs * 7 * 64,
1361 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1362 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1364 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1365 bo = dri_bo_alloc(i965->intel.bufmgr,
1366 "bsd mpc row store",
1370 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1371 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1373 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1375 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1376 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1378 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1379 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1380 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1381 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1383 uint8_t *src = NULL, *dst = NULL;
1385 assert(decode_state->bit_plane->buffer);
1386 src = decode_state->bit_plane->buffer;
1388 bo = dri_bo_alloc(i965->intel.bufmgr,
1390 bitplane_width * height_in_mbs,
1393 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1395 dri_bo_map(bo, True);
1396 assert(bo->virtual);
1399 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1400 for(src_w = 0; src_w < width_in_mbs; src_w++) {
1401 int src_index, dst_index;
1405 src_index = (src_h * width_in_mbs + src_w) / 2;
1406 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1407 src_value = ((src[src_index] >> src_shift) & 0xf);
1409 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1413 dst_index = src_w / 2;
1414 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1418 dst[src_w / 2] >>= 4;
1420 dst += bitplane_width;
1425 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1429 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1430 struct decode_state *decode_state,
1431 struct gen7_mfd_context *gen7_mfd_context)
1433 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1434 VAPictureParameterBufferVC1 *pic_param;
1435 struct object_surface *obj_surface;
1436 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1437 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1438 int unified_mv_mode;
1439 int ref_field_pic_polarity = 0;
1440 int scale_factor = 0;
1442 int dmv_surface_valid = 0;
1448 int interpolation_mode = 0;
1450 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1451 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1453 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1454 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1455 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1456 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1457 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1458 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1459 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1460 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1463 alt_pquant_config = 0;
1464 alt_pquant_edge_mask = 0;
1465 } else if (dquant == 2) {
1466 alt_pquant_config = 1;
1467 alt_pquant_edge_mask = 0xf;
1469 assert(dquant == 1);
1470 if (dquantfrm == 0) {
1471 alt_pquant_config = 0;
1472 alt_pquant_edge_mask = 0;
1475 assert(dquantfrm == 1);
1476 alt_pquant_config = 1;
1478 switch (dqprofile) {
1480 if (dqbilevel == 0) {
1481 alt_pquant_config = 2;
1482 alt_pquant_edge_mask = 0;
1484 assert(dqbilevel == 1);
1485 alt_pquant_config = 3;
1486 alt_pquant_edge_mask = 0;
1491 alt_pquant_edge_mask = 0xf;
1496 alt_pquant_edge_mask = 0x9;
1498 alt_pquant_edge_mask = (0x3 << dqdbedge);
1503 alt_pquant_edge_mask = (0x1 << dqsbedge);
1512 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1513 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1514 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1516 assert(pic_param->mv_fields.bits.mv_mode < 4);
1517 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1520 if (pic_param->sequence_fields.bits.interlace == 1 &&
1521 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1522 /* FIXME: calculate reference field picture polarity */
1524 ref_field_pic_polarity = 0;
1527 if (pic_param->b_picture_fraction < 21)
1528 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1530 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1532 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1533 picture_type == GEN7_VC1_I_PICTURE)
1534 picture_type = GEN7_VC1_BI_PICTURE;
1536 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1537 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1539 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1542 * 8.3.6.2.1 Transform Type Selection
1543 * If variable-sized transform coding is not enabled,
1544 * then the 8x8 transform shall be used for all blocks.
1545 * it is also MFX_VC1_PIC_STATE requirement.
1547 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1548 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1549 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1553 if (picture_type == GEN7_VC1_B_PICTURE) {
1554 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1556 obj_surface = decode_state->reference_objects[1];
1559 gen7_vc1_surface = obj_surface->private_data;
1561 if (!gen7_vc1_surface ||
1562 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1563 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1564 dmv_surface_valid = 0;
1566 dmv_surface_valid = 1;
1569 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1571 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1572 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1574 if (pic_param->picture_fields.bits.top_field_first)
1580 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1581 brfd = pic_param->reference_fields.bits.reference_distance;
1582 brfd = (scale_factor * brfd) >> 8;
1583 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1590 if (profile != GEN7_VC1_ADVANCED_PROFILE){
1591 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1592 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1596 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1597 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1600 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1601 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1602 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1604 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1605 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1611 assert(pic_param->conditional_overlap_flag < 3);
1612 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1614 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1615 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1616 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1617 interpolation_mode = 9; /* Half-pel bilinear */
1618 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1619 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1620 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1621 interpolation_mode = 1; /* Half-pel bicubic */
1623 interpolation_mode = 0; /* Quarter-pel bicubic */
1625 BEGIN_BCS_BATCH(batch, 6);
1626 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1627 OUT_BCS_BATCH(batch,
1628 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1629 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1630 OUT_BCS_BATCH(batch,
1631 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1632 dmv_surface_valid << 15 |
1633 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1634 pic_param->rounding_control << 13 |
1635 pic_param->sequence_fields.bits.syncmarker << 12 |
1636 interpolation_mode << 8 |
1637 0 << 7 | /* FIXME: scale up or down ??? */
1638 pic_param->range_reduction_frame << 6 |
1639 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1641 !pic_param->picture_fields.bits.is_first_field << 3 |
1642 (pic_param->sequence_fields.bits.profile == 3) << 0);
1643 OUT_BCS_BATCH(batch,
1644 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1645 picture_type << 26 |
1648 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1650 OUT_BCS_BATCH(batch,
1651 unified_mv_mode << 28 |
1652 pic_param->mv_fields.bits.four_mv_switch << 27 |
1653 pic_param->fast_uvmc_flag << 26 |
1654 ref_field_pic_polarity << 25 |
1655 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1656 pic_param->reference_fields.bits.reference_distance << 20 |
1657 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1658 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1659 pic_param->mv_fields.bits.extended_mv_range << 8 |
1660 alt_pquant_edge_mask << 4 |
1661 alt_pquant_config << 2 |
1662 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1663 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1664 OUT_BCS_BATCH(batch,
1665 !!pic_param->bitplane_present.value << 31 |
1666 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1667 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1668 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1669 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1670 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1671 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1672 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1673 pic_param->mv_fields.bits.mv_table << 20 |
1674 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1675 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1676 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1677 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1678 pic_param->mb_mode_table << 8 |
1680 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1681 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1682 pic_param->cbp_table << 0);
1683 ADVANCE_BCS_BATCH(batch);
1687 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1688 struct decode_state *decode_state,
1689 struct gen7_mfd_context *gen7_mfd_context)
1691 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1692 VAPictureParameterBufferVC1 *pic_param;
1693 int intensitycomp_single;
1695 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1696 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1698 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1699 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1700 intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1702 BEGIN_BCS_BATCH(batch, 6);
1703 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1704 OUT_BCS_BATCH(batch,
1705 0 << 14 | /* FIXME: double ??? */
1707 intensitycomp_single << 10 |
1708 intensitycomp_single << 8 |
1709 0 << 4 | /* FIXME: interlace mode */
1711 OUT_BCS_BATCH(batch,
1712 pic_param->luma_shift << 16 |
1713 pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1714 OUT_BCS_BATCH(batch, 0);
1715 OUT_BCS_BATCH(batch, 0);
1716 OUT_BCS_BATCH(batch, 0);
1717 ADVANCE_BCS_BATCH(batch);
1721 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1722 struct decode_state *decode_state,
1723 struct gen7_mfd_context *gen7_mfd_context)
1725 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1726 struct object_surface *obj_surface;
1727 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1729 obj_surface = decode_state->render_object;
1731 if (obj_surface && obj_surface->private_data) {
1732 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1735 obj_surface = decode_state->reference_objects[1];
1737 if (obj_surface && obj_surface->private_data) {
1738 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1741 BEGIN_BCS_BATCH(batch, 7);
1742 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1744 if (dmv_write_buffer)
1745 OUT_BCS_RELOC(batch, dmv_write_buffer,
1746 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1749 OUT_BCS_BATCH(batch, 0);
1751 OUT_BCS_BATCH(batch, 0);
1752 OUT_BCS_BATCH(batch, 0);
1754 if (dmv_read_buffer)
1755 OUT_BCS_RELOC(batch, dmv_read_buffer,
1756 I915_GEM_DOMAIN_INSTRUCTION, 0,
1759 OUT_BCS_BATCH(batch, 0);
1761 OUT_BCS_BATCH(batch, 0);
1762 OUT_BCS_BATCH(batch, 0);
1764 ADVANCE_BCS_BATCH(batch);
1768 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1770 int out_slice_data_bit_offset;
1771 int slice_header_size = in_slice_data_bit_offset / 8;
1775 out_slice_data_bit_offset = in_slice_data_bit_offset;
1777 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1778 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1783 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1786 return out_slice_data_bit_offset;
1790 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1791 VAPictureParameterBufferVC1 *pic_param,
1792 VASliceParameterBufferVC1 *slice_param,
1793 VASliceParameterBufferVC1 *next_slice_param,
1794 dri_bo *slice_data_bo,
1795 struct gen7_mfd_context *gen7_mfd_context)
1797 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1798 int next_slice_start_vert_pos;
1799 int macroblock_offset;
1800 uint8_t *slice_data = NULL;
1802 dri_bo_map(slice_data_bo, 0);
1803 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1804 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1805 slice_param->macroblock_offset,
1806 pic_param->sequence_fields.bits.profile);
1807 dri_bo_unmap(slice_data_bo);
1809 if (next_slice_param)
1810 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1812 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1814 BEGIN_BCS_BATCH(batch, 5);
1815 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1816 OUT_BCS_BATCH(batch,
1817 slice_param->slice_data_size - (macroblock_offset >> 3));
1818 OUT_BCS_BATCH(batch,
1819 slice_param->slice_data_offset + (macroblock_offset >> 3));
1820 OUT_BCS_BATCH(batch,
1821 slice_param->slice_vertical_position << 16 |
1822 next_slice_start_vert_pos << 0);
1823 OUT_BCS_BATCH(batch,
1824 (macroblock_offset & 0x7));
1825 ADVANCE_BCS_BATCH(batch);
1829 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1830 struct decode_state *decode_state,
1831 struct gen7_mfd_context *gen7_mfd_context)
1833 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1834 VAPictureParameterBufferVC1 *pic_param;
1835 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1836 dri_bo *slice_data_bo;
1839 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1840 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1842 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1843 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1844 intel_batchbuffer_emit_mi_flush(batch);
1845 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1846 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1847 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1848 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1849 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1850 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1851 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1853 for (j = 0; j < decode_state->num_slice_params; j++) {
1854 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1855 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1856 slice_data_bo = decode_state->slice_datas[j]->bo;
1857 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1859 if (j == decode_state->num_slice_params - 1)
1860 next_slice_group_param = NULL;
1862 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1864 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1865 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1867 if (i < decode_state->slice_params[j]->num_elements - 1)
1868 next_slice_param = slice_param + 1;
1870 next_slice_param = next_slice_group_param;
1872 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1877 intel_batchbuffer_end_atomic(batch);
1878 intel_batchbuffer_flush(batch);
1882 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1883 struct decode_state *decode_state,
1884 struct gen7_mfd_context *gen7_mfd_context)
1886 struct object_surface *obj_surface;
1887 VAPictureParameterBufferJPEGBaseline *pic_param;
1888 int subsampling = SUBSAMPLE_YUV420;
1890 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1892 if (pic_param->num_components == 1)
1893 subsampling = SUBSAMPLE_YUV400;
1894 else if (pic_param->num_components == 3) {
1895 int h1 = pic_param->components[0].h_sampling_factor;
1896 int h2 = pic_param->components[1].h_sampling_factor;
1897 int h3 = pic_param->components[2].h_sampling_factor;
1898 int v1 = pic_param->components[0].v_sampling_factor;
1899 int v2 = pic_param->components[1].v_sampling_factor;
1900 int v3 = pic_param->components[2].v_sampling_factor;
1902 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1903 v1 == 2 && v2 == 1 && v3 == 1)
1904 subsampling = SUBSAMPLE_YUV420;
1905 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1906 v1 == 1 && v2 == 1 && v3 == 1)
1907 subsampling = SUBSAMPLE_YUV422H;
1908 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1909 v1 == 1 && v2 == 1 && v3 == 1)
1910 subsampling = SUBSAMPLE_YUV444;
1911 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1912 v1 == 1 && v2 == 1 && v3 == 1)
1913 subsampling = SUBSAMPLE_YUV411;
1914 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1915 v1 == 2 && v2 == 1 && v3 == 1)
1916 subsampling = SUBSAMPLE_YUV422V;
1917 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1918 v1 == 2 && v2 == 2 && v3 == 2)
1919 subsampling = SUBSAMPLE_YUV422H;
1920 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1921 v1 == 2 && v2 == 1 && v3 == 1)
1922 subsampling = SUBSAMPLE_YUV422V;
1929 /* Current decoded picture */
1930 obj_surface = decode_state->render_object;
1931 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling);
1933 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1934 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1935 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1936 gen7_mfd_context->pre_deblocking_output.valid = 1;
1938 gen7_mfd_context->post_deblocking_output.bo = NULL;
1939 gen7_mfd_context->post_deblocking_output.valid = 0;
1941 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1942 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1944 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1945 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1947 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1948 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1950 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1951 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1953 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1954 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1957 static const int va_to_gen7_jpeg_rotation[4] = {
1958 GEN7_JPEG_ROTATION_0,
1959 GEN7_JPEG_ROTATION_90,
1960 GEN7_JPEG_ROTATION_180,
1961 GEN7_JPEG_ROTATION_270
1965 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1966 struct decode_state *decode_state,
1967 struct gen7_mfd_context *gen7_mfd_context)
1969 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1970 VAPictureParameterBufferJPEGBaseline *pic_param;
1971 int chroma_type = GEN7_YUV420;
1972 int frame_width_in_blks;
1973 int frame_height_in_blks;
1975 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1976 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1978 if (pic_param->num_components == 1)
1979 chroma_type = GEN7_YUV400;
1980 else if (pic_param->num_components == 3) {
1981 int h1 = pic_param->components[0].h_sampling_factor;
1982 int h2 = pic_param->components[1].h_sampling_factor;
1983 int h3 = pic_param->components[2].h_sampling_factor;
1984 int v1 = pic_param->components[0].v_sampling_factor;
1985 int v2 = pic_param->components[1].v_sampling_factor;
1986 int v3 = pic_param->components[2].v_sampling_factor;
1988 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1989 v1 == 2 && v2 == 1 && v3 == 1)
1990 chroma_type = GEN7_YUV420;
1991 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1992 v1 == 1 && v2 == 1 && v3 == 1)
1993 chroma_type = GEN7_YUV422H_2Y;
1994 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1995 v1 == 1 && v2 == 1 && v3 == 1)
1996 chroma_type = GEN7_YUV444;
1997 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1998 v1 == 1 && v2 == 1 && v3 == 1)
1999 chroma_type = GEN7_YUV411;
2000 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2001 v1 == 2 && v2 == 1 && v3 == 1)
2002 chroma_type = GEN7_YUV422V_2Y;
2003 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2004 v1 == 2 && v2 == 2 && v3 == 2)
2005 chroma_type = GEN7_YUV422H_4Y;
2006 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2007 v1 == 2 && v2 == 1 && v3 == 1)
2008 chroma_type = GEN7_YUV422V_4Y;
2013 if (chroma_type == GEN7_YUV400 ||
2014 chroma_type == GEN7_YUV444 ||
2015 chroma_type == GEN7_YUV422V_2Y) {
2016 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2017 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2018 } else if (chroma_type == GEN7_YUV411) {
2019 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2020 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2022 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2023 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2026 BEGIN_BCS_BATCH(batch, 3);
2027 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2028 OUT_BCS_BATCH(batch,
2029 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2030 (chroma_type << 0));
2031 OUT_BCS_BATCH(batch,
2032 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2033 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2034 ADVANCE_BCS_BATCH(batch);
2037 static const int va_to_gen7_jpeg_hufftable[2] = {
2043 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2044 struct decode_state *decode_state,
2045 struct gen7_mfd_context *gen7_mfd_context,
2048 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2049 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2052 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2055 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2057 for (index = 0; index < num_tables; index++) {
2058 int id = va_to_gen7_jpeg_hufftable[index];
2059 if (!huffman_table->load_huffman_table[index])
2061 BEGIN_BCS_BATCH(batch, 53);
2062 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2063 OUT_BCS_BATCH(batch, id);
2064 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2065 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2066 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2067 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2068 ADVANCE_BCS_BATCH(batch);
2072 static const int va_to_gen7_jpeg_qm[5] = {
2074 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2075 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2076 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2077 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2081 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2082 struct decode_state *decode_state,
2083 struct gen7_mfd_context *gen7_mfd_context)
2085 VAPictureParameterBufferJPEGBaseline *pic_param;
2086 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2089 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2092 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2093 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2095 assert(pic_param->num_components <= 3);
2097 for (index = 0; index < pic_param->num_components; index++) {
2098 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2100 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2101 unsigned char raster_qm[64];
2104 if (id > 4 || id < 1)
2107 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2110 qm_type = va_to_gen7_jpeg_qm[id];
2112 for (j = 0; j < 64; j++)
2113 raster_qm[zigzag_direct[j]] = qm[j];
2115 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2120 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2121 VAPictureParameterBufferJPEGBaseline *pic_param,
2122 VASliceParameterBufferJPEGBaseline *slice_param,
2123 VASliceParameterBufferJPEGBaseline *next_slice_param,
2124 dri_bo *slice_data_bo,
2125 struct gen7_mfd_context *gen7_mfd_context)
2127 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2128 int scan_component_mask = 0;
2131 assert(slice_param->num_components > 0);
2132 assert(slice_param->num_components < 4);
2133 assert(slice_param->num_components <= pic_param->num_components);
2135 for (i = 0; i < slice_param->num_components; i++) {
2136 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2138 scan_component_mask |= (1 << 0);
2141 scan_component_mask |= (1 << 1);
2144 scan_component_mask |= (1 << 2);
2152 BEGIN_BCS_BATCH(batch, 6);
2153 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2154 OUT_BCS_BATCH(batch,
2155 slice_param->slice_data_size);
2156 OUT_BCS_BATCH(batch,
2157 slice_param->slice_data_offset);
2158 OUT_BCS_BATCH(batch,
2159 slice_param->slice_horizontal_position << 16 |
2160 slice_param->slice_vertical_position << 0);
2161 OUT_BCS_BATCH(batch,
2162 ((slice_param->num_components != 1) << 30) | /* interleaved */
2163 (scan_component_mask << 27) | /* scan components */
2164 (0 << 26) | /* disable interrupt allowed */
2165 (slice_param->num_mcus << 0)); /* MCU count */
2166 OUT_BCS_BATCH(batch,
2167 (slice_param->restart_interval << 0)); /* RestartInterval */
2168 ADVANCE_BCS_BATCH(batch);
2171 /* Workaround for JPEG decoding on Ivybridge */
2175 i965_DestroySurfaces(VADriverContextP ctx,
2176 VASurfaceID *surface_list,
2179 i965_CreateSurfaces(VADriverContextP ctx,
2184 VASurfaceID *surfaces);
2189 unsigned char data[32];
2191 int data_bit_offset;
2193 } gen7_jpeg_wa_clip = {
2197 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2198 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2206 gen8_jpeg_wa_init(VADriverContextP ctx,
2207 struct gen7_mfd_context *gen7_mfd_context)
2209 struct i965_driver_data *i965 = i965_driver_data(ctx);
2211 struct object_surface *obj_surface;
2213 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2214 i965_DestroySurfaces(ctx,
2215 &gen7_mfd_context->jpeg_wa_surface_id,
2218 status = i965_CreateSurfaces(ctx,
2219 gen7_jpeg_wa_clip.width,
2220 gen7_jpeg_wa_clip.height,
2221 VA_RT_FORMAT_YUV420,
2223 &gen7_mfd_context->jpeg_wa_surface_id);
2224 assert(status == VA_STATUS_SUCCESS);
2226 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2227 assert(obj_surface);
2228 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
2229 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2231 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2232 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2236 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2238 gen7_jpeg_wa_clip.data_size,
2239 gen7_jpeg_wa_clip.data);
2244 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2245 struct gen7_mfd_context *gen7_mfd_context)
2247 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2249 BEGIN_BCS_BATCH(batch, 5);
2250 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2251 OUT_BCS_BATCH(batch,
2252 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2253 (MFD_MODE_VLD << 15) | /* VLD mode */
2254 (0 << 10) | /* disable Stream-Out */
2255 (0 << 9) | /* Post Deblocking Output */
2256 (1 << 8) | /* Pre Deblocking Output */
2257 (0 << 5) | /* not in stitch mode */
2258 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2259 (MFX_FORMAT_AVC << 0));
2260 OUT_BCS_BATCH(batch,
2261 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2262 (0 << 3) | /* terminate if AVC mbdata error occurs */
2263 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2266 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2267 OUT_BCS_BATCH(batch, 0); /* reserved */
2268 ADVANCE_BCS_BATCH(batch);
2272 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2273 struct gen7_mfd_context *gen7_mfd_context)
2275 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2276 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2278 BEGIN_BCS_BATCH(batch, 6);
2279 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2280 OUT_BCS_BATCH(batch, 0);
2281 OUT_BCS_BATCH(batch,
2282 ((obj_surface->orig_width - 1) << 18) |
2283 ((obj_surface->orig_height - 1) << 4));
2284 OUT_BCS_BATCH(batch,
2285 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2286 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2287 (0 << 22) | /* surface object control state, ignored */
2288 ((obj_surface->width - 1) << 3) | /* pitch */
2289 (0 << 2) | /* must be 0 */
2290 (1 << 1) | /* must be tiled */
2291 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2292 OUT_BCS_BATCH(batch,
2293 (0 << 16) | /* X offset for U(Cb), must be 0 */
2294 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2295 OUT_BCS_BATCH(batch,
2296 (0 << 16) | /* X offset for V(Cr), must be 0 */
2297 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2298 ADVANCE_BCS_BATCH(batch);
2302 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2303 struct gen7_mfd_context *gen7_mfd_context)
2305 struct i965_driver_data *i965 = i965_driver_data(ctx);
2306 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2307 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2311 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2316 BEGIN_BCS_BATCH(batch, 61);
2317 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2318 OUT_BCS_RELOC(batch,
2320 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2322 OUT_BCS_BATCH(batch, 0);
2323 OUT_BCS_BATCH(batch, 0);
2326 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2327 OUT_BCS_BATCH(batch, 0);
2328 OUT_BCS_BATCH(batch, 0);
2330 /* uncompressed-video & stream out 7-12 */
2331 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2332 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2333 OUT_BCS_BATCH(batch, 0);
2334 OUT_BCS_BATCH(batch, 0);
2335 OUT_BCS_BATCH(batch, 0);
2336 OUT_BCS_BATCH(batch, 0);
2338 /* the DW 13-15 is for intra row store scratch */
2339 OUT_BCS_RELOC(batch,
2341 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2343 OUT_BCS_BATCH(batch, 0);
2344 OUT_BCS_BATCH(batch, 0);
2346 /* the DW 16-18 is for deblocking filter */
2347 OUT_BCS_BATCH(batch, 0);
2348 OUT_BCS_BATCH(batch, 0);
2349 OUT_BCS_BATCH(batch, 0);
2352 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2353 OUT_BCS_BATCH(batch, 0);
2354 OUT_BCS_BATCH(batch, 0);
2356 OUT_BCS_BATCH(batch, 0);
2358 /* the DW52-54 is for mb status address */
2359 OUT_BCS_BATCH(batch, 0);
2360 OUT_BCS_BATCH(batch, 0);
2361 OUT_BCS_BATCH(batch, 0);
2362 /* the DW56-60 is for ILDB & second ILDB address */
2363 OUT_BCS_BATCH(batch, 0);
2364 OUT_BCS_BATCH(batch, 0);
2365 OUT_BCS_BATCH(batch, 0);
2366 OUT_BCS_BATCH(batch, 0);
2367 OUT_BCS_BATCH(batch, 0);
2368 OUT_BCS_BATCH(batch, 0);
2370 ADVANCE_BCS_BATCH(batch);
2372 dri_bo_unreference(intra_bo);
2376 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2377 struct gen7_mfd_context *gen7_mfd_context)
2379 struct i965_driver_data *i965 = i965_driver_data(ctx);
2380 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2381 dri_bo *bsd_mpc_bo, *mpr_bo;
2383 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2384 "bsd mpc row store",
2385 11520, /* 1.5 * 120 * 64 */
2388 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2390 7680, /* 1. 0 * 120 * 64 */
2393 BEGIN_BCS_BATCH(batch, 10);
2394 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2396 OUT_BCS_RELOC(batch,
2398 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2401 OUT_BCS_BATCH(batch, 0);
2402 OUT_BCS_BATCH(batch, 0);
2404 OUT_BCS_RELOC(batch,
2406 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2408 OUT_BCS_BATCH(batch, 0);
2409 OUT_BCS_BATCH(batch, 0);
2411 OUT_BCS_BATCH(batch, 0);
2412 OUT_BCS_BATCH(batch, 0);
2413 OUT_BCS_BATCH(batch, 0);
2415 ADVANCE_BCS_BATCH(batch);
2417 dri_bo_unreference(bsd_mpc_bo);
2418 dri_bo_unreference(mpr_bo);
2422 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2423 struct gen7_mfd_context *gen7_mfd_context)
2429 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2430 struct gen7_mfd_context *gen7_mfd_context)
2432 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2434 int mbaff_frame_flag = 0;
2435 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2437 BEGIN_BCS_BATCH(batch, 16);
2438 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2439 OUT_BCS_BATCH(batch,
2440 width_in_mbs * height_in_mbs);
2441 OUT_BCS_BATCH(batch,
2442 ((height_in_mbs - 1) << 16) |
2443 ((width_in_mbs - 1) << 0));
2444 OUT_BCS_BATCH(batch,
2449 (0 << 12) | /* differ from GEN6 */
2452 OUT_BCS_BATCH(batch,
2453 (1 << 10) | /* 4:2:0 */
2454 (1 << 7) | /* CABAC */
2460 (mbaff_frame_flag << 1) |
2462 OUT_BCS_BATCH(batch, 0);
2463 OUT_BCS_BATCH(batch, 0);
2464 OUT_BCS_BATCH(batch, 0);
2465 OUT_BCS_BATCH(batch, 0);
2466 OUT_BCS_BATCH(batch, 0);
2467 OUT_BCS_BATCH(batch, 0);
2468 OUT_BCS_BATCH(batch, 0);
2469 OUT_BCS_BATCH(batch, 0);
2470 OUT_BCS_BATCH(batch, 0);
2471 OUT_BCS_BATCH(batch, 0);
2472 OUT_BCS_BATCH(batch, 0);
2473 ADVANCE_BCS_BATCH(batch);
2477 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2478 struct gen7_mfd_context *gen7_mfd_context)
2480 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2483 BEGIN_BCS_BATCH(batch, 71);
2484 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2486 /* reference surfaces 0..15 */
2487 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2488 OUT_BCS_BATCH(batch, 0); /* top */
2489 OUT_BCS_BATCH(batch, 0); /* bottom */
2492 OUT_BCS_BATCH(batch, 0);
2494 /* the current decoding frame/field */
2495 OUT_BCS_BATCH(batch, 0); /* top */
2496 OUT_BCS_BATCH(batch, 0);
2497 OUT_BCS_BATCH(batch, 0);
2500 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2501 OUT_BCS_BATCH(batch, 0);
2502 OUT_BCS_BATCH(batch, 0);
2505 OUT_BCS_BATCH(batch, 0);
2506 OUT_BCS_BATCH(batch, 0);
2508 ADVANCE_BCS_BATCH(batch);
2512 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2513 struct gen7_mfd_context *gen7_mfd_context)
2515 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2517 BEGIN_BCS_BATCH(batch, 11);
2518 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2519 OUT_BCS_RELOC(batch,
2520 gen7_mfd_context->jpeg_wa_slice_data_bo,
2521 I915_GEM_DOMAIN_INSTRUCTION, 0,
2523 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2524 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2525 OUT_BCS_BATCH(batch, 0);
2526 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2527 OUT_BCS_BATCH(batch, 0);
2528 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2529 OUT_BCS_BATCH(batch, 0);
2530 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2531 OUT_BCS_BATCH(batch, 0);
2532 ADVANCE_BCS_BATCH(batch);
2536 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2537 struct gen7_mfd_context *gen7_mfd_context)
2539 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2541 /* the input bitsteam format on GEN7 differs from GEN6 */
2542 BEGIN_BCS_BATCH(batch, 6);
2543 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2544 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2545 OUT_BCS_BATCH(batch, 0);
2546 OUT_BCS_BATCH(batch,
2552 OUT_BCS_BATCH(batch,
2553 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2556 (1 << 3) | /* LastSlice Flag */
2557 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2558 OUT_BCS_BATCH(batch, 0);
2559 ADVANCE_BCS_BATCH(batch);
2563 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2564 struct gen7_mfd_context *gen7_mfd_context)
2566 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2567 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2568 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2569 int first_mb_in_slice = 0;
2570 int slice_type = SLICE_TYPE_I;
2572 BEGIN_BCS_BATCH(batch, 11);
2573 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2574 OUT_BCS_BATCH(batch, slice_type);
2575 OUT_BCS_BATCH(batch,
2576 (num_ref_idx_l1 << 24) |
2577 (num_ref_idx_l0 << 16) |
2580 OUT_BCS_BATCH(batch,
2582 (1 << 27) | /* disable Deblocking */
2584 (gen7_jpeg_wa_clip.qp << 16) |
2587 OUT_BCS_BATCH(batch,
2588 (slice_ver_pos << 24) |
2589 (slice_hor_pos << 16) |
2590 (first_mb_in_slice << 0));
2591 OUT_BCS_BATCH(batch,
2592 (next_slice_ver_pos << 16) |
2593 (next_slice_hor_pos << 0));
2594 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2595 OUT_BCS_BATCH(batch, 0);
2596 OUT_BCS_BATCH(batch, 0);
2597 OUT_BCS_BATCH(batch, 0);
2598 OUT_BCS_BATCH(batch, 0);
2599 ADVANCE_BCS_BATCH(batch);
2603 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2604 struct gen7_mfd_context *gen7_mfd_context)
2606 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2607 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2608 intel_batchbuffer_emit_mi_flush(batch);
2609 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2610 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2611 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2612 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2613 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2614 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2615 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2617 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2618 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2619 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2625 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2626 struct decode_state *decode_state,
2627 struct gen7_mfd_context *gen7_mfd_context)
2629 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2630 VAPictureParameterBufferJPEGBaseline *pic_param;
2631 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2632 dri_bo *slice_data_bo;
2633 int i, j, max_selector = 0;
2635 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2636 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2638 /* Currently only support Baseline DCT */
2639 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2640 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2642 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2644 intel_batchbuffer_emit_mi_flush(batch);
2645 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2646 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2647 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2648 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2649 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2651 for (j = 0; j < decode_state->num_slice_params; j++) {
2652 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2653 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2654 slice_data_bo = decode_state->slice_datas[j]->bo;
2655 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2657 if (j == decode_state->num_slice_params - 1)
2658 next_slice_group_param = NULL;
2660 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2662 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2665 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2667 if (i < decode_state->slice_params[j]->num_elements - 1)
2668 next_slice_param = slice_param + 1;
2670 next_slice_param = next_slice_group_param;
2672 for (component = 0; component < slice_param->num_components; component++) {
2673 if (max_selector < slice_param->components[component].dc_table_selector)
2674 max_selector = slice_param->components[component].dc_table_selector;
2676 if (max_selector < slice_param->components[component].ac_table_selector)
2677 max_selector = slice_param->components[component].ac_table_selector;
2684 assert(max_selector < 2);
2685 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2687 for (j = 0; j < decode_state->num_slice_params; j++) {
2688 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2689 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2690 slice_data_bo = decode_state->slice_datas[j]->bo;
2691 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2693 if (j == decode_state->num_slice_params - 1)
2694 next_slice_group_param = NULL;
2696 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2698 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2699 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2701 if (i < decode_state->slice_params[j]->num_elements - 1)
2702 next_slice_param = slice_param + 1;
2704 next_slice_param = next_slice_group_param;
2706 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2711 intel_batchbuffer_end_atomic(batch);
2712 intel_batchbuffer_flush(batch);
2715 static const int vp8_dc_qlookup[128] =
2717 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
2718 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
2719 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
2720 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
2721 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
2722 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
2723 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2724 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2727 static const int vp8_ac_qlookup[128] =
2729 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
2730 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
2731 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
2732 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
2733 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
2734 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2735 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2736 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2739 static inline unsigned int vp8_clip_quantization_index(unsigned int index)
2748 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2749 struct decode_state *decode_state,
2750 struct gen7_mfd_context *gen7_mfd_context)
2752 struct object_surface *obj_surface;
2753 struct i965_driver_data *i965 = i965_driver_data(ctx);
2755 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2756 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2757 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2759 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2760 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2762 /* Current decoded picture */
2763 obj_surface = decode_state->render_object;
2764 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2766 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2767 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2768 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2769 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2771 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2772 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2773 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2774 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2776 /* The same as AVC */
2777 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2778 bo = dri_bo_alloc(i965->intel.bufmgr,
2783 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2784 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2786 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2787 bo = dri_bo_alloc(i965->intel.bufmgr,
2788 "deblocking filter row store",
2789 width_in_mbs * 64 * 4,
2792 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2793 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2795 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2796 bo = dri_bo_alloc(i965->intel.bufmgr,
2797 "bsd mpc row store",
2798 width_in_mbs * 64 * 2,
2801 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2802 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2804 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2805 bo = dri_bo_alloc(i965->intel.bufmgr,
2807 width_in_mbs * 64 * 2,
2810 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2811 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2813 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2817 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2818 struct decode_state *decode_state,
2819 struct gen7_mfd_context *gen7_mfd_context)
2821 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2822 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2823 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2824 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2825 dri_bo *probs_bo = decode_state->probability_data->bo;
2827 unsigned int quantization_value[4][6];
2829 log2num = (int)log2(slice_param->num_of_partitions - 1);
2831 BEGIN_BCS_BATCH(batch, 38);
2832 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2833 OUT_BCS_BATCH(batch,
2834 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2835 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2836 OUT_BCS_BATCH(batch,
2838 pic_param->pic_fields.bits.sharpness_level << 16 |
2839 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2840 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2841 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2842 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2843 pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2844 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2845 0 << 7 | /* segmentation id streamin disabled */
2846 0 << 6 | /* segmentation id streamout disabled */
2847 pic_param->pic_fields.bits.key_frame << 5 |
2848 pic_param->pic_fields.bits.filter_type << 4 |
2849 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2850 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2852 OUT_BCS_BATCH(batch,
2853 pic_param->loop_filter_level[3] << 24 |
2854 pic_param->loop_filter_level[2] << 16 |
2855 pic_param->loop_filter_level[1] << 8 |
2856 pic_param->loop_filter_level[0] << 0);
2858 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2859 for (i = 0; i < 4; i++) {
2860 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2861 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2862 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2863 /* 101581>>16 is equivalent to 155/100 */
2864 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2865 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2866 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2868 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2869 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2871 OUT_BCS_BATCH(batch,
2872 quantization_value[i][0] << 16 | /* Y1AC */
2873 quantization_value[i][1] << 0); /* Y1DC */
2874 OUT_BCS_BATCH(batch,
2875 quantization_value[i][5] << 16 | /* UVAC */
2876 quantization_value[i][4] << 0); /* UVDC */
2877 OUT_BCS_BATCH(batch,
2878 quantization_value[i][3] << 16 | /* Y2AC */
2879 quantization_value[i][2] << 0); /* Y2DC */
2882 /* CoeffProbability table for non-key frame, DW16-DW18 */
2884 OUT_BCS_RELOC(batch, probs_bo,
2885 0, I915_GEM_DOMAIN_INSTRUCTION,
2887 OUT_BCS_BATCH(batch, 0);
2888 OUT_BCS_BATCH(batch, 0);
2890 OUT_BCS_BATCH(batch, 0);
2891 OUT_BCS_BATCH(batch, 0);
2892 OUT_BCS_BATCH(batch, 0);
2895 OUT_BCS_BATCH(batch,
2896 pic_param->mb_segment_tree_probs[2] << 16 |
2897 pic_param->mb_segment_tree_probs[1] << 8 |
2898 pic_param->mb_segment_tree_probs[0] << 0);
2900 OUT_BCS_BATCH(batch,
2901 pic_param->prob_skip_false << 24 |
2902 pic_param->prob_intra << 16 |
2903 pic_param->prob_last << 8 |
2904 pic_param->prob_gf << 0);
2906 OUT_BCS_BATCH(batch,
2907 pic_param->y_mode_probs[3] << 24 |
2908 pic_param->y_mode_probs[2] << 16 |
2909 pic_param->y_mode_probs[1] << 8 |
2910 pic_param->y_mode_probs[0] << 0);
2912 OUT_BCS_BATCH(batch,
2913 pic_param->uv_mode_probs[2] << 16 |
2914 pic_param->uv_mode_probs[1] << 8 |
2915 pic_param->uv_mode_probs[0] << 0);
2917 /* MV update value, DW23-DW32 */
2918 for (i = 0; i < 2; i++) {
2919 for (j = 0; j < 20; j += 4) {
2920 OUT_BCS_BATCH(batch,
2921 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2922 pic_param->mv_probs[i][j + 2] << 16 |
2923 pic_param->mv_probs[i][j + 1] << 8 |
2924 pic_param->mv_probs[i][j + 0] << 0);
2928 OUT_BCS_BATCH(batch,
2929 pic_param->loop_filter_deltas_ref_frame[3] << 24 |
2930 pic_param->loop_filter_deltas_ref_frame[2] << 16 |
2931 pic_param->loop_filter_deltas_ref_frame[1] << 8 |
2932 pic_param->loop_filter_deltas_ref_frame[0] << 0);
2934 OUT_BCS_BATCH(batch,
2935 pic_param->loop_filter_deltas_mode[3] << 24 |
2936 pic_param->loop_filter_deltas_mode[2] << 16 |
2937 pic_param->loop_filter_deltas_mode[1] << 8 |
2938 pic_param->loop_filter_deltas_mode[0] << 0);
2940 /* segmentation id stream base address, DW35-DW37 */
2941 OUT_BCS_BATCH(batch, 0);
2942 OUT_BCS_BATCH(batch, 0);
2943 OUT_BCS_BATCH(batch, 0);
2944 ADVANCE_BCS_BATCH(batch);
2948 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2949 VAPictureParameterBufferVP8 *pic_param,
2950 VASliceParameterBufferVP8 *slice_param,
2951 dri_bo *slice_data_bo,
2952 struct gen7_mfd_context *gen7_mfd_context)
2954 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2956 unsigned int offset = slice_param->slice_data_offset;
2958 assert(slice_param->num_of_partitions >= 2);
2959 assert(slice_param->num_of_partitions <= 9);
2961 log2num = (int)log2(slice_param->num_of_partitions - 1);
2963 BEGIN_BCS_BATCH(batch, 22);
2964 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2965 OUT_BCS_BATCH(batch,
2966 pic_param->bool_coder_ctx.count << 16 | /* Partition 0 CPBAC Entropy Count */
2967 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
2969 (slice_param->macroblock_offset & 0x7));
2970 OUT_BCS_BATCH(batch,
2971 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2974 for (i = 0; i < 9; i++) {
2975 if (i < slice_param->num_of_partitions) {
2976 OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
2977 OUT_BCS_BATCH(batch, offset);
2979 OUT_BCS_BATCH(batch, 0);
2980 OUT_BCS_BATCH(batch, 0);
2983 offset += slice_param->partition_size[i];
2986 OUT_BCS_BATCH(batch,
2987 1 << 31 | /* concealment method */
2990 ADVANCE_BCS_BATCH(batch);
2994 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
2995 struct decode_state *decode_state,
2996 struct gen7_mfd_context *gen7_mfd_context)
2998 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2999 VAPictureParameterBufferVP8 *pic_param;
3000 VASliceParameterBufferVP8 *slice_param;
3001 dri_bo *slice_data_bo;
3003 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3004 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3006 /* one slice per frame */
3007 assert(decode_state->num_slice_params == 1);
3008 assert(decode_state->slice_params[0]->num_elements == 1);
3009 assert(decode_state->slice_params && decode_state->slice_params[0]->buffer);
3010 assert(decode_state->slice_datas[0]->bo);
3012 assert(decode_state->probability_data);
3014 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3015 slice_data_bo = decode_state->slice_datas[0]->bo;
3017 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3018 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3019 intel_batchbuffer_emit_mi_flush(batch);
3020 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3021 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3022 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3023 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3024 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3025 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3026 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3027 intel_batchbuffer_end_atomic(batch);
3028 intel_batchbuffer_flush(batch);
3032 gen8_mfd_decode_picture(VADriverContextP ctx,
3034 union codec_state *codec_state,
3035 struct hw_context *hw_context)
3038 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3039 struct decode_state *decode_state = &codec_state->decode;
3042 assert(gen7_mfd_context);
3044 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3046 if (vaStatus != VA_STATUS_SUCCESS)
3049 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3052 case VAProfileMPEG2Simple:
3053 case VAProfileMPEG2Main:
3054 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3057 case VAProfileH264ConstrainedBaseline:
3058 case VAProfileH264Main:
3059 case VAProfileH264High:
3060 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3063 case VAProfileVC1Simple:
3064 case VAProfileVC1Main:
3065 case VAProfileVC1Advanced:
3066 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3069 case VAProfileJPEGBaseline:
3070 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3073 case VAProfileVP8Version0_3:
3074 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3082 vaStatus = VA_STATUS_SUCCESS;
3089 gen8_mfd_context_destroy(void *hw_context)
3091 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3093 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3094 gen7_mfd_context->post_deblocking_output.bo = NULL;
3096 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3097 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3099 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3100 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3102 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3103 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3105 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3106 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3108 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3109 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3111 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3112 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3114 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3116 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3117 free(gen7_mfd_context);
3120 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3121 struct gen7_mfd_context *gen7_mfd_context)
3123 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3124 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3125 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3126 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3130 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3132 struct intel_driver_data *intel = intel_driver_data(ctx);
3133 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3136 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3137 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3138 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3140 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3141 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3142 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3145 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3147 switch (obj_config->profile) {
3148 case VAProfileMPEG2Simple:
3149 case VAProfileMPEG2Main:
3150 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3153 case VAProfileH264ConstrainedBaseline:
3154 case VAProfileH264Main:
3155 case VAProfileH264High:
3156 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3161 return (struct hw_context *)gen7_mfd_context;