2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77 gen7_avc_surface->frame_store_id = -1;
78 assert((obj_surface->size & 0x3f) == 0);
79 obj_surface->private_data = gen7_avc_surface;
82 /* DMV buffers now relate to the whole frame, irrespective of
84 if (gen7_avc_surface->dmv_top == NULL) {
85 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86 "direct mv w/r buffer",
87 width_in_mbs * height_in_mbs * 128,
89 assert(gen7_avc_surface->dmv_top);
94 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
95 struct decode_state *decode_state,
97 struct gen7_mfd_context *gen7_mfd_context)
99 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
101 assert(standard_select == MFX_FORMAT_MPEG2 ||
102 standard_select == MFX_FORMAT_AVC ||
103 standard_select == MFX_FORMAT_VC1 ||
104 standard_select == MFX_FORMAT_JPEG ||
105 standard_select == MFX_FORMAT_VP8);
107 BEGIN_BCS_BATCH(batch, 5);
108 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
110 (MFX_LONG_MODE << 17) | /* Currently only support long format */
111 (MFD_MODE_VLD << 15) | /* VLD mode */
112 (0 << 10) | /* disable Stream-Out */
113 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
114 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
115 (0 << 5) | /* not in stitch mode */
116 (MFX_CODEC_DECODE << 4) | /* decoding mode */
117 (standard_select << 0));
119 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
120 (0 << 3) | /* terminate if AVC mbdata error occurs */
121 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
124 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
125 OUT_BCS_BATCH(batch, 0); /* reserved */
126 ADVANCE_BCS_BATCH(batch);
130 gen8_mfd_surface_state(VADriverContextP ctx,
131 struct decode_state *decode_state,
133 struct gen7_mfd_context *gen7_mfd_context)
135 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
136 struct object_surface *obj_surface = decode_state->render_object;
137 unsigned int y_cb_offset;
138 unsigned int y_cr_offset;
139 unsigned int surface_format;
143 y_cb_offset = obj_surface->y_cb_offset;
144 y_cr_offset = obj_surface->y_cr_offset;
146 surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
147 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
149 BEGIN_BCS_BATCH(batch, 6);
150 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
151 OUT_BCS_BATCH(batch, 0);
153 ((obj_surface->orig_height - 1) << 18) |
154 ((obj_surface->orig_width - 1) << 4));
156 (surface_format << 28) | /* 420 planar YUV surface */
157 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
158 (0 << 22) | /* surface object control state, ignored */
159 ((obj_surface->width - 1) << 3) | /* pitch */
160 (0 << 2) | /* must be 0 */
161 (1 << 1) | /* must be tiled */
162 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
164 (0 << 16) | /* X offset for U(Cb), must be 0 */
165 (y_cb_offset << 0)); /* Y offset for U(Cb) */
167 (0 << 16) | /* X offset for V(Cr), must be 0 */
168 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
169 ADVANCE_BCS_BATCH(batch);
173 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
174 struct decode_state *decode_state,
176 struct gen7_mfd_context *gen7_mfd_context)
178 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
181 BEGIN_BCS_BATCH(batch, 61);
182 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
183 /* Pre-deblock 1-3 */
184 if (gen7_mfd_context->pre_deblocking_output.valid)
185 OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
186 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
189 OUT_BCS_BATCH(batch, 0);
191 OUT_BCS_BATCH(batch, 0);
192 OUT_BCS_BATCH(batch, 0);
193 /* Post-debloing 4-6 */
194 if (gen7_mfd_context->post_deblocking_output.valid)
195 OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
196 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
199 OUT_BCS_BATCH(batch, 0);
201 OUT_BCS_BATCH(batch, 0);
202 OUT_BCS_BATCH(batch, 0);
204 /* uncompressed-video & stream out 7-12 */
205 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
206 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
207 OUT_BCS_BATCH(batch, 0);
208 OUT_BCS_BATCH(batch, 0);
209 OUT_BCS_BATCH(batch, 0);
210 OUT_BCS_BATCH(batch, 0);
212 /* intra row-store scratch 13-15 */
213 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
214 OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
215 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
218 OUT_BCS_BATCH(batch, 0);
220 OUT_BCS_BATCH(batch, 0);
221 OUT_BCS_BATCH(batch, 0);
222 /* deblocking-filter-row-store 16-18 */
223 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
224 OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
225 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
228 OUT_BCS_BATCH(batch, 0);
229 OUT_BCS_BATCH(batch, 0);
230 OUT_BCS_BATCH(batch, 0);
233 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
234 struct object_surface *obj_surface;
236 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
237 gen7_mfd_context->reference_surface[i].obj_surface &&
238 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
239 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
241 OUT_BCS_RELOC(batch, obj_surface->bo,
242 I915_GEM_DOMAIN_INSTRUCTION, 0,
245 OUT_BCS_BATCH(batch, 0);
248 OUT_BCS_BATCH(batch, 0);
251 /* reference property 51 */
252 OUT_BCS_BATCH(batch, 0);
254 /* Macroblock status & ILDB 52-57 */
255 OUT_BCS_BATCH(batch, 0);
256 OUT_BCS_BATCH(batch, 0);
257 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
259 OUT_BCS_BATCH(batch, 0);
260 OUT_BCS_BATCH(batch, 0);
262 /* the second Macroblock status 58-60 */
263 OUT_BCS_BATCH(batch, 0);
264 OUT_BCS_BATCH(batch, 0);
265 OUT_BCS_BATCH(batch, 0);
267 ADVANCE_BCS_BATCH(batch);
271 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
272 dri_bo *slice_data_bo,
274 struct gen7_mfd_context *gen7_mfd_context)
276 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
278 BEGIN_BCS_BATCH(batch, 26);
279 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
281 OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
282 OUT_BCS_BATCH(batch, 0);
283 OUT_BCS_BATCH(batch, 0);
284 /* Upper bound 4-5 */
285 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
286 OUT_BCS_BATCH(batch, 0);
288 /* MFX indirect MV 6-10 */
289 OUT_BCS_BATCH(batch, 0);
290 OUT_BCS_BATCH(batch, 0);
291 OUT_BCS_BATCH(batch, 0);
292 OUT_BCS_BATCH(batch, 0);
293 OUT_BCS_BATCH(batch, 0);
295 /* MFX IT_COFF 11-15 */
296 OUT_BCS_BATCH(batch, 0);
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
299 OUT_BCS_BATCH(batch, 0);
300 OUT_BCS_BATCH(batch, 0);
302 /* MFX IT_DBLK 16-20 */
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
306 OUT_BCS_BATCH(batch, 0);
307 OUT_BCS_BATCH(batch, 0);
309 /* MFX PAK_BSE object for encoder 21-25 */
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
313 OUT_BCS_BATCH(batch, 0);
314 OUT_BCS_BATCH(batch, 0);
316 ADVANCE_BCS_BATCH(batch);
320 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
321 struct decode_state *decode_state,
323 struct gen7_mfd_context *gen7_mfd_context)
325 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
327 BEGIN_BCS_BATCH(batch, 10);
328 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
330 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
331 OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
332 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
335 OUT_BCS_BATCH(batch, 0);
337 OUT_BCS_BATCH(batch, 0);
338 OUT_BCS_BATCH(batch, 0);
339 /* MPR Row Store Scratch buffer 4-6 */
340 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
341 OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
342 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
345 OUT_BCS_BATCH(batch, 0);
347 OUT_BCS_BATCH(batch, 0);
348 OUT_BCS_BATCH(batch, 0);
351 if (gen7_mfd_context->bitplane_read_buffer.valid)
352 OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
353 I915_GEM_DOMAIN_INSTRUCTION, 0,
356 OUT_BCS_BATCH(batch, 0);
357 OUT_BCS_BATCH(batch, 0);
358 OUT_BCS_BATCH(batch, 0);
359 ADVANCE_BCS_BATCH(batch);
363 gen8_mfd_qm_state(VADriverContextP ctx,
367 struct gen7_mfd_context *gen7_mfd_context)
369 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
370 unsigned int qm_buffer[16];
372 assert(qm_length <= 16 * 4);
373 memcpy(qm_buffer, qm, qm_length);
375 BEGIN_BCS_BATCH(batch, 18);
376 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
377 OUT_BCS_BATCH(batch, qm_type << 0);
378 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
379 ADVANCE_BCS_BATCH(batch);
383 gen8_mfd_avc_img_state(VADriverContextP ctx,
384 struct decode_state *decode_state,
385 struct gen7_mfd_context *gen7_mfd_context)
387 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
389 int mbaff_frame_flag;
390 unsigned int width_in_mbs, height_in_mbs;
391 VAPictureParameterBufferH264 *pic_param;
393 assert(decode_state->pic_param && decode_state->pic_param->buffer);
394 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
395 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
397 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
399 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
404 if ((img_struct & 0x1) == 0x1) {
405 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
407 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
410 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
411 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
412 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
414 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
417 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
418 !pic_param->pic_fields.bits.field_pic_flag);
420 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
421 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
423 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
424 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
425 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
426 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
428 BEGIN_BCS_BATCH(batch, 17);
429 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
431 (width_in_mbs * height_in_mbs - 1));
433 ((height_in_mbs - 1) << 16) |
434 ((width_in_mbs - 1) << 0));
436 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
437 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
438 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
439 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
440 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
441 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
444 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
445 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
446 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
447 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
448 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
449 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
450 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
451 (mbaff_frame_flag << 1) |
452 (pic_param->pic_fields.bits.field_pic_flag << 0));
453 OUT_BCS_BATCH(batch, 0);
454 OUT_BCS_BATCH(batch, 0);
455 OUT_BCS_BATCH(batch, 0);
456 OUT_BCS_BATCH(batch, 0);
457 OUT_BCS_BATCH(batch, 0);
458 OUT_BCS_BATCH(batch, 0);
459 OUT_BCS_BATCH(batch, 0);
460 OUT_BCS_BATCH(batch, 0);
461 OUT_BCS_BATCH(batch, 0);
462 OUT_BCS_BATCH(batch, 0);
463 OUT_BCS_BATCH(batch, 0);
464 OUT_BCS_BATCH(batch, 0);
465 ADVANCE_BCS_BATCH(batch);
469 gen8_mfd_avc_qm_state(VADriverContextP ctx,
470 struct decode_state *decode_state,
471 struct gen7_mfd_context *gen7_mfd_context)
473 VAIQMatrixBufferH264 *iq_matrix;
474 VAPictureParameterBufferH264 *pic_param;
476 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
477 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
479 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
481 assert(decode_state->pic_param && decode_state->pic_param->buffer);
482 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
484 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
485 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
487 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
488 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
489 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
494 gen8_mfd_avc_picid_state(VADriverContextP ctx,
495 struct decode_state *decode_state,
496 struct gen7_mfd_context *gen7_mfd_context)
498 gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
499 gen7_mfd_context->reference_surface);
503 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
504 struct decode_state *decode_state,
505 VAPictureParameterBufferH264 *pic_param,
506 VASliceParameterBufferH264 *slice_param,
507 struct gen7_mfd_context *gen7_mfd_context)
509 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
510 struct object_surface *obj_surface;
511 GenAvcSurface *gen7_avc_surface;
512 VAPictureH264 *va_pic;
515 BEGIN_BCS_BATCH(batch, 71);
516 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
518 /* reference surfaces 0..15 */
519 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
520 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
521 gen7_mfd_context->reference_surface[i].obj_surface &&
522 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
524 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
525 gen7_avc_surface = obj_surface->private_data;
527 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
528 I915_GEM_DOMAIN_INSTRUCTION, 0,
530 OUT_BCS_BATCH(batch, 0);
532 OUT_BCS_BATCH(batch, 0);
533 OUT_BCS_BATCH(batch, 0);
537 OUT_BCS_BATCH(batch, 0);
539 /* the current decoding frame/field */
540 va_pic = &pic_param->CurrPic;
541 obj_surface = decode_state->render_object;
542 assert(obj_surface->bo && obj_surface->private_data);
543 gen7_avc_surface = obj_surface->private_data;
545 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
546 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
549 OUT_BCS_BATCH(batch, 0);
550 OUT_BCS_BATCH(batch, 0);
553 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
554 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
557 const VAPictureH264 * const va_pic = avc_find_picture(
558 obj_surface->base.id, pic_param->ReferenceFrames,
559 ARRAY_ELEMS(pic_param->ReferenceFrames));
561 assert(va_pic != NULL);
562 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
563 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
565 OUT_BCS_BATCH(batch, 0);
566 OUT_BCS_BATCH(batch, 0);
570 va_pic = &pic_param->CurrPic;
571 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
572 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
574 ADVANCE_BCS_BATCH(batch);
578 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
579 VAPictureParameterBufferH264 *pic_param,
580 VASliceParameterBufferH264 *next_slice_param,
581 struct gen7_mfd_context *gen7_mfd_context)
583 gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
587 gen8_mfd_avc_slice_state(VADriverContextP ctx,
588 VAPictureParameterBufferH264 *pic_param,
589 VASliceParameterBufferH264 *slice_param,
590 VASliceParameterBufferH264 *next_slice_param,
591 struct gen7_mfd_context *gen7_mfd_context)
593 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
594 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
595 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
596 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
597 int num_ref_idx_l0, num_ref_idx_l1;
598 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
599 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
600 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
603 if (slice_param->slice_type == SLICE_TYPE_I ||
604 slice_param->slice_type == SLICE_TYPE_SI) {
605 slice_type = SLICE_TYPE_I;
606 } else if (slice_param->slice_type == SLICE_TYPE_P ||
607 slice_param->slice_type == SLICE_TYPE_SP) {
608 slice_type = SLICE_TYPE_P;
610 assert(slice_param->slice_type == SLICE_TYPE_B);
611 slice_type = SLICE_TYPE_B;
614 if (slice_type == SLICE_TYPE_I) {
615 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
616 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
619 } else if (slice_type == SLICE_TYPE_P) {
620 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
621 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
624 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
625 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
628 first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
629 slice_hor_pos = first_mb_in_slice % width_in_mbs;
630 slice_ver_pos = first_mb_in_slice / width_in_mbs;
632 if (next_slice_param) {
633 first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
634 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
635 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
637 next_slice_hor_pos = 0;
638 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
641 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
642 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
643 OUT_BCS_BATCH(batch, slice_type);
645 (num_ref_idx_l1 << 24) |
646 (num_ref_idx_l0 << 16) |
647 (slice_param->chroma_log2_weight_denom << 8) |
648 (slice_param->luma_log2_weight_denom << 0));
650 (slice_param->direct_spatial_mv_pred_flag << 29) |
651 (slice_param->disable_deblocking_filter_idc << 27) |
652 (slice_param->cabac_init_idc << 24) |
653 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
654 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
655 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
657 (slice_ver_pos << 24) |
658 (slice_hor_pos << 16) |
659 (first_mb_in_slice << 0));
661 (next_slice_ver_pos << 16) |
662 (next_slice_hor_pos << 0));
664 (next_slice_param == NULL) << 19); /* last slice flag */
665 OUT_BCS_BATCH(batch, 0);
666 OUT_BCS_BATCH(batch, 0);
667 OUT_BCS_BATCH(batch, 0);
668 OUT_BCS_BATCH(batch, 0);
669 ADVANCE_BCS_BATCH(batch);
673 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
674 VAPictureParameterBufferH264 *pic_param,
675 VASliceParameterBufferH264 *slice_param,
676 struct gen7_mfd_context *gen7_mfd_context)
678 gen6_send_avc_ref_idx_state(
679 gen7_mfd_context->base.batch,
681 gen7_mfd_context->reference_surface
686 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
687 VAPictureParameterBufferH264 *pic_param,
688 VASliceParameterBufferH264 *slice_param,
689 struct gen7_mfd_context *gen7_mfd_context)
691 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
692 int i, j, num_weight_offset_table = 0;
693 short weightoffsets[32 * 6];
695 if ((slice_param->slice_type == SLICE_TYPE_P ||
696 slice_param->slice_type == SLICE_TYPE_SP) &&
697 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
698 num_weight_offset_table = 1;
701 if ((slice_param->slice_type == SLICE_TYPE_B) &&
702 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
703 num_weight_offset_table = 2;
706 for (i = 0; i < num_weight_offset_table; i++) {
707 BEGIN_BCS_BATCH(batch, 98);
708 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
709 OUT_BCS_BATCH(batch, i);
712 for (j = 0; j < 32; j++) {
713 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
714 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
715 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
716 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
717 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
718 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
721 for (j = 0; j < 32; j++) {
722 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
723 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
724 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
725 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
726 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
727 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
731 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
732 ADVANCE_BCS_BATCH(batch);
737 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
738 VAPictureParameterBufferH264 *pic_param,
739 VASliceParameterBufferH264 *slice_param,
740 dri_bo *slice_data_bo,
741 VASliceParameterBufferH264 *next_slice_param,
742 struct gen7_mfd_context *gen7_mfd_context)
744 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
745 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
747 pic_param->pic_fields.bits.entropy_coding_mode_flag);
749 /* the input bitsteam format on GEN7 differs from GEN6 */
750 BEGIN_BCS_BATCH(batch, 6);
751 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
753 (slice_param->slice_data_size));
754 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
762 ((slice_data_bit_offset >> 3) << 16) |
766 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
767 (slice_data_bit_offset & 0x7));
768 OUT_BCS_BATCH(batch, 0);
769 ADVANCE_BCS_BATCH(batch);
773 gen8_mfd_avc_context_init(
774 VADriverContextP ctx,
775 struct gen7_mfd_context *gen7_mfd_context
778 /* Initialize flat scaling lists */
779 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
783 gen8_mfd_avc_decode_init(VADriverContextP ctx,
784 struct decode_state *decode_state,
785 struct gen7_mfd_context *gen7_mfd_context)
787 VAPictureParameterBufferH264 *pic_param;
788 VASliceParameterBufferH264 *slice_param;
789 struct i965_driver_data *i965 = i965_driver_data(ctx);
790 struct object_surface *obj_surface;
792 int i, j, enable_avc_ildb = 0;
793 unsigned int width_in_mbs, height_in_mbs;
795 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
796 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
797 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
799 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
800 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
801 assert((slice_param->slice_type == SLICE_TYPE_I) ||
802 (slice_param->slice_type == SLICE_TYPE_SI) ||
803 (slice_param->slice_type == SLICE_TYPE_P) ||
804 (slice_param->slice_type == SLICE_TYPE_SP) ||
805 (slice_param->slice_type == SLICE_TYPE_B));
807 if (slice_param->disable_deblocking_filter_idc != 1) {
816 assert(decode_state->pic_param && decode_state->pic_param->buffer);
817 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
818 gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
819 gen7_mfd_context->reference_surface);
820 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
821 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
822 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
823 assert(height_in_mbs > 0 && height_in_mbs <= 256);
825 /* Current decoded picture */
826 obj_surface = decode_state->render_object;
827 if (pic_param->pic_fields.bits.reference_pic_flag)
828 obj_surface->flags |= SURFACE_REFERENCED;
830 obj_surface->flags &= ~SURFACE_REFERENCED;
832 avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
833 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
835 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
836 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
837 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
838 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
840 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
841 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
842 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
843 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
845 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
846 bo = dri_bo_alloc(i965->intel.bufmgr,
851 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
852 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
854 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
855 bo = dri_bo_alloc(i965->intel.bufmgr,
856 "deblocking filter row store",
857 width_in_mbs * 64 * 4,
860 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
861 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
863 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
864 bo = dri_bo_alloc(i965->intel.bufmgr,
866 width_in_mbs * 64 * 2,
869 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
870 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
872 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
873 bo = dri_bo_alloc(i965->intel.bufmgr,
875 width_in_mbs * 64 * 2,
878 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
879 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
881 gen7_mfd_context->bitplane_read_buffer.valid = 0;
885 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
886 struct decode_state *decode_state,
887 struct gen7_mfd_context *gen7_mfd_context)
889 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
890 VAPictureParameterBufferH264 *pic_param;
891 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
892 dri_bo *slice_data_bo;
895 assert(decode_state->pic_param && decode_state->pic_param->buffer);
896 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
897 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
899 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
900 intel_batchbuffer_emit_mi_flush(batch);
901 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
902 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
903 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
904 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
905 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
906 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
907 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
909 for (j = 0; j < decode_state->num_slice_params; j++) {
910 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
911 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
912 slice_data_bo = decode_state->slice_datas[j]->bo;
913 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
915 if (j == decode_state->num_slice_params - 1)
916 next_slice_group_param = NULL;
918 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
920 if (j == 0 && slice_param->first_mb_in_slice)
921 gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
923 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
924 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
925 assert((slice_param->slice_type == SLICE_TYPE_I) ||
926 (slice_param->slice_type == SLICE_TYPE_SI) ||
927 (slice_param->slice_type == SLICE_TYPE_P) ||
928 (slice_param->slice_type == SLICE_TYPE_SP) ||
929 (slice_param->slice_type == SLICE_TYPE_B));
931 if (i < decode_state->slice_params[j]->num_elements - 1)
932 next_slice_param = slice_param + 1;
934 next_slice_param = next_slice_group_param;
936 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
937 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
938 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
939 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
940 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
945 intel_batchbuffer_end_atomic(batch);
946 intel_batchbuffer_flush(batch);
950 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
951 struct decode_state *decode_state,
952 struct gen7_mfd_context *gen7_mfd_context)
954 VAPictureParameterBufferMPEG2 *pic_param;
955 struct i965_driver_data *i965 = i965_driver_data(ctx);
956 struct object_surface *obj_surface;
958 unsigned int width_in_mbs;
960 assert(decode_state->pic_param && decode_state->pic_param->buffer);
961 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
962 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
964 mpeg2_set_reference_surfaces(
966 gen7_mfd_context->reference_surface,
971 /* Current decoded picture */
972 obj_surface = decode_state->render_object;
973 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
975 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
976 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
977 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
978 gen7_mfd_context->pre_deblocking_output.valid = 1;
980 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
981 bo = dri_bo_alloc(i965->intel.bufmgr,
986 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
987 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
989 gen7_mfd_context->post_deblocking_output.valid = 0;
990 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
991 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
992 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
993 gen7_mfd_context->bitplane_read_buffer.valid = 0;
997 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
998 struct decode_state *decode_state,
999 struct gen7_mfd_context *gen7_mfd_context)
1001 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1002 VAPictureParameterBufferMPEG2 *pic_param;
1003 unsigned int slice_concealment_disable_bit = 0;
1005 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1006 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1008 slice_concealment_disable_bit = 1;
1010 BEGIN_BCS_BATCH(batch, 13);
1011 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1012 OUT_BCS_BATCH(batch,
1013 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1014 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1015 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1016 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1017 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1018 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1019 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1020 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1021 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1022 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1023 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1024 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1025 OUT_BCS_BATCH(batch,
1026 pic_param->picture_coding_type << 9);
1027 OUT_BCS_BATCH(batch,
1028 (slice_concealment_disable_bit << 31) |
1029 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1030 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1031 OUT_BCS_BATCH(batch, 0);
1032 OUT_BCS_BATCH(batch, 0);
1033 OUT_BCS_BATCH(batch, 0);
1034 OUT_BCS_BATCH(batch, 0);
1035 OUT_BCS_BATCH(batch, 0);
1036 OUT_BCS_BATCH(batch, 0);
1037 OUT_BCS_BATCH(batch, 0);
1038 OUT_BCS_BATCH(batch, 0);
1039 OUT_BCS_BATCH(batch, 0);
1040 ADVANCE_BCS_BATCH(batch);
1044 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1045 struct decode_state *decode_state,
1046 struct gen7_mfd_context *gen7_mfd_context)
1048 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1051 /* Update internal QM state */
1052 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1053 VAIQMatrixBufferMPEG2 * const iq_matrix =
1054 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1056 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1057 iq_matrix->load_intra_quantiser_matrix) {
1058 gen_iq_matrix->load_intra_quantiser_matrix =
1059 iq_matrix->load_intra_quantiser_matrix;
1060 if (iq_matrix->load_intra_quantiser_matrix) {
1061 for (j = 0; j < 64; j++)
1062 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1063 iq_matrix->intra_quantiser_matrix[j];
1067 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1068 iq_matrix->load_non_intra_quantiser_matrix) {
1069 gen_iq_matrix->load_non_intra_quantiser_matrix =
1070 iq_matrix->load_non_intra_quantiser_matrix;
1071 if (iq_matrix->load_non_intra_quantiser_matrix) {
1072 for (j = 0; j < 64; j++)
1073 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1074 iq_matrix->non_intra_quantiser_matrix[j];
1079 /* Commit QM state to HW */
1080 for (i = 0; i < 2; i++) {
1081 unsigned char *qm = NULL;
1085 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1086 qm = gen_iq_matrix->intra_quantiser_matrix;
1087 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1090 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1091 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1092 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1099 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1104 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1105 VAPictureParameterBufferMPEG2 *pic_param,
1106 VASliceParameterBufferMPEG2 *slice_param,
1107 VASliceParameterBufferMPEG2 *next_slice_param,
1108 struct gen7_mfd_context *gen7_mfd_context)
1110 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1111 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1112 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1114 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1115 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1117 is_field_pic_wa = is_field_pic &&
1118 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1120 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1121 hpos0 = slice_param->slice_horizontal_position;
1123 if (next_slice_param == NULL) {
1124 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1127 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1128 hpos1 = next_slice_param->slice_horizontal_position;
1131 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1133 BEGIN_BCS_BATCH(batch, 5);
1134 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1135 OUT_BCS_BATCH(batch,
1136 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1137 OUT_BCS_BATCH(batch,
1138 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1139 OUT_BCS_BATCH(batch,
1143 (next_slice_param == NULL) << 5 |
1144 (next_slice_param == NULL) << 3 |
1145 (slice_param->macroblock_offset & 0x7));
1146 OUT_BCS_BATCH(batch,
1147 (slice_param->quantiser_scale_code << 24) |
1148 (vpos1 << 8 | hpos1));
1149 ADVANCE_BCS_BATCH(batch);
1153 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1154 struct decode_state *decode_state,
1155 struct gen7_mfd_context *gen7_mfd_context)
1157 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1158 VAPictureParameterBufferMPEG2 *pic_param;
1159 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1160 dri_bo *slice_data_bo;
1163 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1164 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1166 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1167 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1168 intel_batchbuffer_emit_mi_flush(batch);
1169 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1170 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1171 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1172 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1173 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1174 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1176 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1177 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1178 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1180 for (j = 0; j < decode_state->num_slice_params; j++) {
1181 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1182 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1183 slice_data_bo = decode_state->slice_datas[j]->bo;
1184 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1186 if (j == decode_state->num_slice_params - 1)
1187 next_slice_group_param = NULL;
1189 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1191 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1192 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1194 if (i < decode_state->slice_params[j]->num_elements - 1)
1195 next_slice_param = slice_param + 1;
1197 next_slice_param = next_slice_group_param;
1199 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1204 intel_batchbuffer_end_atomic(batch);
1205 intel_batchbuffer_flush(batch);
1208 static const int va_to_gen7_vc1_pic_type[5] = {
1212 GEN7_VC1_BI_PICTURE,
1216 static const int va_to_gen7_vc1_mv[4] = {
1218 2, /* 1-MV half-pel */
1219 3, /* 1-MV half-pef bilinear */
1223 static const int b_picture_scale_factor[21] = {
1224 128, 85, 170, 64, 192,
1225 51, 102, 153, 204, 43,
1226 215, 37, 74, 111, 148,
1227 185, 222, 32, 96, 160,
1231 static const int va_to_gen7_vc1_condover[3] = {
1237 static const int va_to_gen7_vc1_profile[4] = {
1238 GEN7_VC1_SIMPLE_PROFILE,
1239 GEN7_VC1_MAIN_PROFILE,
1240 GEN7_VC1_RESERVED_PROFILE,
1241 GEN7_VC1_ADVANCED_PROFILE
1245 gen8_mfd_free_vc1_surface(void **data)
1247 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1249 if (!gen7_vc1_surface)
1252 dri_bo_unreference(gen7_vc1_surface->dmv);
1253 free(gen7_vc1_surface);
1258 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1259 VAPictureParameterBufferVC1 *pic_param,
1260 struct object_surface *obj_surface)
1262 struct i965_driver_data *i965 = i965_driver_data(ctx);
1263 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1264 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1265 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1267 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1269 if (!gen7_vc1_surface) {
1270 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1271 assert((obj_surface->size & 0x3f) == 0);
1272 obj_surface->private_data = gen7_vc1_surface;
1275 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1277 if (gen7_vc1_surface->dmv == NULL) {
1278 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1279 "direct mv w/r buffer",
1280 width_in_mbs * height_in_mbs * 64,
1286 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1287 struct decode_state *decode_state,
1288 struct gen7_mfd_context *gen7_mfd_context)
1290 VAPictureParameterBufferVC1 *pic_param;
1291 struct i965_driver_data *i965 = i965_driver_data(ctx);
1292 struct object_surface *obj_surface;
1297 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1298 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1299 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1300 picture_type = pic_param->picture_fields.bits.picture_type;
1302 intel_update_vc1_frame_store_index(ctx,
1305 gen7_mfd_context->reference_surface);
1307 /* Current decoded picture */
1308 obj_surface = decode_state->render_object;
1309 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1310 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1312 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1313 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1314 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1315 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1317 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1318 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1319 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1320 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1322 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1323 bo = dri_bo_alloc(i965->intel.bufmgr,
1328 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1329 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1331 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1332 bo = dri_bo_alloc(i965->intel.bufmgr,
1333 "deblocking filter row store",
1334 width_in_mbs * 7 * 64,
1337 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1338 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1340 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1341 bo = dri_bo_alloc(i965->intel.bufmgr,
1342 "bsd mpc row store",
1346 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1347 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1349 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1351 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1352 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1354 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1355 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1356 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1357 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1359 uint8_t *src = NULL, *dst = NULL;
1361 assert(decode_state->bit_plane->buffer);
1362 src = decode_state->bit_plane->buffer;
1364 bo = dri_bo_alloc(i965->intel.bufmgr,
1366 bitplane_width * height_in_mbs,
1369 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1371 dri_bo_map(bo, True);
1372 assert(bo->virtual);
1375 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1376 for(src_w = 0; src_w < width_in_mbs; src_w++) {
1377 int src_index, dst_index;
1381 src_index = (src_h * width_in_mbs + src_w) / 2;
1382 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1383 src_value = ((src[src_index] >> src_shift) & 0xf);
1385 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1389 dst_index = src_w / 2;
1390 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1394 dst[src_w / 2] >>= 4;
1396 dst += bitplane_width;
1401 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1405 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1406 struct decode_state *decode_state,
1407 struct gen7_mfd_context *gen7_mfd_context)
1409 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1410 VAPictureParameterBufferVC1 *pic_param;
1411 struct object_surface *obj_surface;
1412 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1413 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1414 int unified_mv_mode;
1415 int ref_field_pic_polarity = 0;
1416 int scale_factor = 0;
1418 int dmv_surface_valid = 0;
1424 int interpolation_mode = 0;
1426 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1427 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1429 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1430 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1431 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1432 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1433 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1434 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1435 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1436 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1439 alt_pquant_config = 0;
1440 alt_pquant_edge_mask = 0;
1441 } else if (dquant == 2) {
1442 alt_pquant_config = 1;
1443 alt_pquant_edge_mask = 0xf;
1445 assert(dquant == 1);
1446 if (dquantfrm == 0) {
1447 alt_pquant_config = 0;
1448 alt_pquant_edge_mask = 0;
1451 assert(dquantfrm == 1);
1452 alt_pquant_config = 1;
1454 switch (dqprofile) {
1456 if (dqbilevel == 0) {
1457 alt_pquant_config = 2;
1458 alt_pquant_edge_mask = 0;
1460 assert(dqbilevel == 1);
1461 alt_pquant_config = 3;
1462 alt_pquant_edge_mask = 0;
1467 alt_pquant_edge_mask = 0xf;
1472 alt_pquant_edge_mask = 0x9;
1474 alt_pquant_edge_mask = (0x3 << dqdbedge);
1479 alt_pquant_edge_mask = (0x1 << dqsbedge);
1488 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1489 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1490 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1492 assert(pic_param->mv_fields.bits.mv_mode < 4);
1493 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1496 if (pic_param->sequence_fields.bits.interlace == 1 &&
1497 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1498 /* FIXME: calculate reference field picture polarity */
1500 ref_field_pic_polarity = 0;
1503 if (pic_param->b_picture_fraction < 21)
1504 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1506 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1508 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1509 picture_type == GEN7_VC1_I_PICTURE)
1510 picture_type = GEN7_VC1_BI_PICTURE;
1512 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1513 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1515 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1518 * 8.3.6.2.1 Transform Type Selection
1519 * If variable-sized transform coding is not enabled,
1520 * then the 8x8 transform shall be used for all blocks.
1521 * it is also MFX_VC1_PIC_STATE requirement.
1523 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1524 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1525 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1529 if (picture_type == GEN7_VC1_B_PICTURE) {
1530 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1532 obj_surface = decode_state->reference_objects[1];
1535 gen7_vc1_surface = obj_surface->private_data;
1537 if (!gen7_vc1_surface ||
1538 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1539 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1540 dmv_surface_valid = 0;
1542 dmv_surface_valid = 1;
1545 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1547 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1548 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1550 if (pic_param->picture_fields.bits.top_field_first)
1556 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1557 brfd = pic_param->reference_fields.bits.reference_distance;
1558 brfd = (scale_factor * brfd) >> 8;
1559 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1566 if (profile != GEN7_VC1_ADVANCED_PROFILE){
1567 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1568 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1572 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1573 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1576 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1577 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1578 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1580 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1581 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1587 assert(pic_param->conditional_overlap_flag < 3);
1588 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1590 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1591 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1592 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1593 interpolation_mode = 9; /* Half-pel bilinear */
1594 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1595 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1596 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1597 interpolation_mode = 1; /* Half-pel bicubic */
1599 interpolation_mode = 0; /* Quarter-pel bicubic */
1601 BEGIN_BCS_BATCH(batch, 6);
1602 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1603 OUT_BCS_BATCH(batch,
1604 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1605 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1606 OUT_BCS_BATCH(batch,
1607 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1608 dmv_surface_valid << 15 |
1609 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1610 pic_param->rounding_control << 13 |
1611 pic_param->sequence_fields.bits.syncmarker << 12 |
1612 interpolation_mode << 8 |
1613 0 << 7 | /* FIXME: scale up or down ??? */
1614 pic_param->range_reduction_frame << 6 |
1615 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1617 !pic_param->picture_fields.bits.is_first_field << 3 |
1618 (pic_param->sequence_fields.bits.profile == 3) << 0);
1619 OUT_BCS_BATCH(batch,
1620 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1621 picture_type << 26 |
1624 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1626 OUT_BCS_BATCH(batch,
1627 unified_mv_mode << 28 |
1628 pic_param->mv_fields.bits.four_mv_switch << 27 |
1629 pic_param->fast_uvmc_flag << 26 |
1630 ref_field_pic_polarity << 25 |
1631 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1632 pic_param->reference_fields.bits.reference_distance << 20 |
1633 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1634 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1635 pic_param->mv_fields.bits.extended_mv_range << 8 |
1636 alt_pquant_edge_mask << 4 |
1637 alt_pquant_config << 2 |
1638 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1639 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1640 OUT_BCS_BATCH(batch,
1641 !!pic_param->bitplane_present.value << 31 |
1642 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1643 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1644 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1645 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1646 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1647 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1648 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1649 pic_param->mv_fields.bits.mv_table << 20 |
1650 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1651 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1652 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1653 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1654 pic_param->mb_mode_table << 8 |
1656 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1657 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1658 pic_param->cbp_table << 0);
1659 ADVANCE_BCS_BATCH(batch);
1663 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1664 struct decode_state *decode_state,
1665 struct gen7_mfd_context *gen7_mfd_context)
1667 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1668 VAPictureParameterBufferVC1 *pic_param;
1669 int intensitycomp_single;
1671 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1672 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1674 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1675 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1676 intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1678 BEGIN_BCS_BATCH(batch, 6);
1679 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1680 OUT_BCS_BATCH(batch,
1681 0 << 14 | /* FIXME: double ??? */
1683 intensitycomp_single << 10 |
1684 intensitycomp_single << 8 |
1685 0 << 4 | /* FIXME: interlace mode */
1687 OUT_BCS_BATCH(batch,
1688 pic_param->luma_shift << 16 |
1689 pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1690 OUT_BCS_BATCH(batch, 0);
1691 OUT_BCS_BATCH(batch, 0);
1692 OUT_BCS_BATCH(batch, 0);
1693 ADVANCE_BCS_BATCH(batch);
1697 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1698 struct decode_state *decode_state,
1699 struct gen7_mfd_context *gen7_mfd_context)
1701 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1702 struct object_surface *obj_surface;
1703 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1705 obj_surface = decode_state->render_object;
1707 if (obj_surface && obj_surface->private_data) {
1708 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1711 obj_surface = decode_state->reference_objects[1];
1713 if (obj_surface && obj_surface->private_data) {
1714 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1717 BEGIN_BCS_BATCH(batch, 7);
1718 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1720 if (dmv_write_buffer)
1721 OUT_BCS_RELOC(batch, dmv_write_buffer,
1722 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1725 OUT_BCS_BATCH(batch, 0);
1727 OUT_BCS_BATCH(batch, 0);
1728 OUT_BCS_BATCH(batch, 0);
1730 if (dmv_read_buffer)
1731 OUT_BCS_RELOC(batch, dmv_read_buffer,
1732 I915_GEM_DOMAIN_INSTRUCTION, 0,
1735 OUT_BCS_BATCH(batch, 0);
1737 OUT_BCS_BATCH(batch, 0);
1738 OUT_BCS_BATCH(batch, 0);
1740 ADVANCE_BCS_BATCH(batch);
1744 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1746 int out_slice_data_bit_offset;
1747 int slice_header_size = in_slice_data_bit_offset / 8;
1751 out_slice_data_bit_offset = in_slice_data_bit_offset;
1753 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1754 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1759 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1762 return out_slice_data_bit_offset;
1766 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1767 VAPictureParameterBufferVC1 *pic_param,
1768 VASliceParameterBufferVC1 *slice_param,
1769 VASliceParameterBufferVC1 *next_slice_param,
1770 dri_bo *slice_data_bo,
1771 struct gen7_mfd_context *gen7_mfd_context)
1773 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1774 int next_slice_start_vert_pos;
1775 int macroblock_offset;
1776 uint8_t *slice_data = NULL;
1778 dri_bo_map(slice_data_bo, 0);
1779 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1780 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1781 slice_param->macroblock_offset,
1782 pic_param->sequence_fields.bits.profile);
1783 dri_bo_unmap(slice_data_bo);
1785 if (next_slice_param)
1786 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1788 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1790 BEGIN_BCS_BATCH(batch, 5);
1791 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1792 OUT_BCS_BATCH(batch,
1793 slice_param->slice_data_size - (macroblock_offset >> 3));
1794 OUT_BCS_BATCH(batch,
1795 slice_param->slice_data_offset + (macroblock_offset >> 3));
1796 OUT_BCS_BATCH(batch,
1797 slice_param->slice_vertical_position << 16 |
1798 next_slice_start_vert_pos << 0);
1799 OUT_BCS_BATCH(batch,
1800 (macroblock_offset & 0x7));
1801 ADVANCE_BCS_BATCH(batch);
1805 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1806 struct decode_state *decode_state,
1807 struct gen7_mfd_context *gen7_mfd_context)
1809 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1810 VAPictureParameterBufferVC1 *pic_param;
1811 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1812 dri_bo *slice_data_bo;
1815 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1816 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1818 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1819 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1820 intel_batchbuffer_emit_mi_flush(batch);
1821 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1822 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1823 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1824 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1825 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1826 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1827 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1829 for (j = 0; j < decode_state->num_slice_params; j++) {
1830 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1831 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1832 slice_data_bo = decode_state->slice_datas[j]->bo;
1833 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1835 if (j == decode_state->num_slice_params - 1)
1836 next_slice_group_param = NULL;
1838 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1840 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1841 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1843 if (i < decode_state->slice_params[j]->num_elements - 1)
1844 next_slice_param = slice_param + 1;
1846 next_slice_param = next_slice_group_param;
1848 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1853 intel_batchbuffer_end_atomic(batch);
1854 intel_batchbuffer_flush(batch);
1858 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1859 struct decode_state *decode_state,
1860 struct gen7_mfd_context *gen7_mfd_context)
1862 struct object_surface *obj_surface;
1863 VAPictureParameterBufferJPEGBaseline *pic_param;
1864 int subsampling = SUBSAMPLE_YUV420;
1865 int fourcc = VA_FOURCC_IMC3;
1867 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1869 if (pic_param->num_components == 1)
1870 subsampling = SUBSAMPLE_YUV400;
1871 else if (pic_param->num_components == 3) {
1872 int h1 = pic_param->components[0].h_sampling_factor;
1873 int h2 = pic_param->components[1].h_sampling_factor;
1874 int h3 = pic_param->components[2].h_sampling_factor;
1875 int v1 = pic_param->components[0].v_sampling_factor;
1876 int v2 = pic_param->components[1].v_sampling_factor;
1877 int v3 = pic_param->components[2].v_sampling_factor;
1879 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1880 v1 == 2 && v2 == 1 && v3 == 1) {
1881 subsampling = SUBSAMPLE_YUV420;
1882 fourcc = VA_FOURCC_IMC3;
1883 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1884 v1 == 1 && v2 == 1 && v3 == 1) {
1885 subsampling = SUBSAMPLE_YUV422H;
1886 fourcc = VA_FOURCC_422H;
1887 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1888 v1 == 1 && v2 == 1 && v3 == 1) {
1889 subsampling = SUBSAMPLE_YUV444;
1890 fourcc = VA_FOURCC_444P;
1891 } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1892 v1 == 1 && v2 == 1 && v3 == 1) {
1893 subsampling = SUBSAMPLE_YUV411;
1894 fourcc = VA_FOURCC_411P;
1895 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1896 v1 == 2 && v2 == 1 && v3 == 1) {
1897 subsampling = SUBSAMPLE_YUV422V;
1898 fourcc = VA_FOURCC_422V;
1899 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1900 v1 == 2 && v2 == 2 && v3 == 2) {
1901 subsampling = SUBSAMPLE_YUV422H;
1902 fourcc = VA_FOURCC_422H;
1903 } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1904 v1 == 2 && v2 == 1 && v3 == 1) {
1905 subsampling = SUBSAMPLE_YUV422V;
1906 fourcc = VA_FOURCC_422V;
1914 /* Current decoded picture */
1915 obj_surface = decode_state->render_object;
1916 i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1918 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1919 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1920 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1921 gen7_mfd_context->pre_deblocking_output.valid = 1;
1923 gen7_mfd_context->post_deblocking_output.bo = NULL;
1924 gen7_mfd_context->post_deblocking_output.valid = 0;
1926 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1927 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1929 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1930 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1932 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1933 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1935 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1936 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1938 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1939 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1942 static const int va_to_gen7_jpeg_rotation[4] = {
1943 GEN7_JPEG_ROTATION_0,
1944 GEN7_JPEG_ROTATION_90,
1945 GEN7_JPEG_ROTATION_180,
1946 GEN7_JPEG_ROTATION_270
1950 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1951 struct decode_state *decode_state,
1952 struct gen7_mfd_context *gen7_mfd_context)
1954 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1955 VAPictureParameterBufferJPEGBaseline *pic_param;
1956 int chroma_type = GEN7_YUV420;
1957 int frame_width_in_blks;
1958 int frame_height_in_blks;
1960 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1961 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1963 if (pic_param->num_components == 1)
1964 chroma_type = GEN7_YUV400;
1965 else if (pic_param->num_components == 3) {
1966 int h1 = pic_param->components[0].h_sampling_factor;
1967 int h2 = pic_param->components[1].h_sampling_factor;
1968 int h3 = pic_param->components[2].h_sampling_factor;
1969 int v1 = pic_param->components[0].v_sampling_factor;
1970 int v2 = pic_param->components[1].v_sampling_factor;
1971 int v3 = pic_param->components[2].v_sampling_factor;
1973 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1974 v1 == 2 && v2 == 1 && v3 == 1)
1975 chroma_type = GEN7_YUV420;
1976 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1977 v1 == 1 && v2 == 1 && v3 == 1)
1978 chroma_type = GEN7_YUV422H_2Y;
1979 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1980 v1 == 1 && v2 == 1 && v3 == 1)
1981 chroma_type = GEN7_YUV444;
1982 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1983 v1 == 1 && v2 == 1 && v3 == 1)
1984 chroma_type = GEN7_YUV411;
1985 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1986 v1 == 2 && v2 == 1 && v3 == 1)
1987 chroma_type = GEN7_YUV422V_2Y;
1988 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1989 v1 == 2 && v2 == 2 && v3 == 2)
1990 chroma_type = GEN7_YUV422H_4Y;
1991 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1992 v1 == 2 && v2 == 1 && v3 == 1)
1993 chroma_type = GEN7_YUV422V_4Y;
1998 if (chroma_type == GEN7_YUV400 ||
1999 chroma_type == GEN7_YUV444 ||
2000 chroma_type == GEN7_YUV422V_2Y) {
2001 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2002 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2003 } else if (chroma_type == GEN7_YUV411) {
2004 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2005 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2007 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2008 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2011 BEGIN_BCS_BATCH(batch, 3);
2012 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2013 OUT_BCS_BATCH(batch,
2014 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2015 (chroma_type << 0));
2016 OUT_BCS_BATCH(batch,
2017 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2018 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2019 ADVANCE_BCS_BATCH(batch);
2022 static const int va_to_gen7_jpeg_hufftable[2] = {
2028 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2029 struct decode_state *decode_state,
2030 struct gen7_mfd_context *gen7_mfd_context,
2033 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2034 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2037 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2040 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2042 for (index = 0; index < num_tables; index++) {
2043 int id = va_to_gen7_jpeg_hufftable[index];
2044 if (!huffman_table->load_huffman_table[index])
2046 BEGIN_BCS_BATCH(batch, 53);
2047 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2048 OUT_BCS_BATCH(batch, id);
2049 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2050 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2051 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2052 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2053 ADVANCE_BCS_BATCH(batch);
2057 static const int va_to_gen7_jpeg_qm[5] = {
2059 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2060 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2061 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2062 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2066 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2067 struct decode_state *decode_state,
2068 struct gen7_mfd_context *gen7_mfd_context)
2070 VAPictureParameterBufferJPEGBaseline *pic_param;
2071 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2074 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2077 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2078 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2080 assert(pic_param->num_components <= 3);
2082 for (index = 0; index < pic_param->num_components; index++) {
2083 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2085 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2086 unsigned char raster_qm[64];
2089 if (id > 4 || id < 1)
2092 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2095 qm_type = va_to_gen7_jpeg_qm[id];
2097 for (j = 0; j < 64; j++)
2098 raster_qm[zigzag_direct[j]] = qm[j];
2100 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2105 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2106 VAPictureParameterBufferJPEGBaseline *pic_param,
2107 VASliceParameterBufferJPEGBaseline *slice_param,
2108 VASliceParameterBufferJPEGBaseline *next_slice_param,
2109 dri_bo *slice_data_bo,
2110 struct gen7_mfd_context *gen7_mfd_context)
2112 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2113 int scan_component_mask = 0;
2116 assert(slice_param->num_components > 0);
2117 assert(slice_param->num_components < 4);
2118 assert(slice_param->num_components <= pic_param->num_components);
2120 for (i = 0; i < slice_param->num_components; i++) {
2121 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2123 scan_component_mask |= (1 << 0);
2126 scan_component_mask |= (1 << 1);
2129 scan_component_mask |= (1 << 2);
2137 BEGIN_BCS_BATCH(batch, 6);
2138 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2139 OUT_BCS_BATCH(batch,
2140 slice_param->slice_data_size);
2141 OUT_BCS_BATCH(batch,
2142 slice_param->slice_data_offset);
2143 OUT_BCS_BATCH(batch,
2144 slice_param->slice_horizontal_position << 16 |
2145 slice_param->slice_vertical_position << 0);
2146 OUT_BCS_BATCH(batch,
2147 ((slice_param->num_components != 1) << 30) | /* interleaved */
2148 (scan_component_mask << 27) | /* scan components */
2149 (0 << 26) | /* disable interrupt allowed */
2150 (slice_param->num_mcus << 0)); /* MCU count */
2151 OUT_BCS_BATCH(batch,
2152 (slice_param->restart_interval << 0)); /* RestartInterval */
2153 ADVANCE_BCS_BATCH(batch);
2156 /* Workaround for JPEG decoding on Ivybridge */
2162 unsigned char data[32];
2164 int data_bit_offset;
2166 } gen7_jpeg_wa_clip = {
2170 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2171 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2179 gen8_jpeg_wa_init(VADriverContextP ctx,
2180 struct gen7_mfd_context *gen7_mfd_context)
2182 struct i965_driver_data *i965 = i965_driver_data(ctx);
2184 struct object_surface *obj_surface;
2186 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2187 i965_DestroySurfaces(ctx,
2188 &gen7_mfd_context->jpeg_wa_surface_id,
2191 status = i965_CreateSurfaces(ctx,
2192 gen7_jpeg_wa_clip.width,
2193 gen7_jpeg_wa_clip.height,
2194 VA_RT_FORMAT_YUV420,
2196 &gen7_mfd_context->jpeg_wa_surface_id);
2197 assert(status == VA_STATUS_SUCCESS);
2199 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2200 assert(obj_surface);
2201 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2202 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2204 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2205 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2209 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2211 gen7_jpeg_wa_clip.data_size,
2212 gen7_jpeg_wa_clip.data);
2217 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2218 struct gen7_mfd_context *gen7_mfd_context)
2220 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2222 BEGIN_BCS_BATCH(batch, 5);
2223 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2224 OUT_BCS_BATCH(batch,
2225 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2226 (MFD_MODE_VLD << 15) | /* VLD mode */
2227 (0 << 10) | /* disable Stream-Out */
2228 (0 << 9) | /* Post Deblocking Output */
2229 (1 << 8) | /* Pre Deblocking Output */
2230 (0 << 5) | /* not in stitch mode */
2231 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2232 (MFX_FORMAT_AVC << 0));
2233 OUT_BCS_BATCH(batch,
2234 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2235 (0 << 3) | /* terminate if AVC mbdata error occurs */
2236 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2239 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2240 OUT_BCS_BATCH(batch, 0); /* reserved */
2241 ADVANCE_BCS_BATCH(batch);
2245 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2246 struct gen7_mfd_context *gen7_mfd_context)
2248 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2249 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2251 BEGIN_BCS_BATCH(batch, 6);
2252 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2253 OUT_BCS_BATCH(batch, 0);
2254 OUT_BCS_BATCH(batch,
2255 ((obj_surface->orig_width - 1) << 18) |
2256 ((obj_surface->orig_height - 1) << 4));
2257 OUT_BCS_BATCH(batch,
2258 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2259 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2260 (0 << 22) | /* surface object control state, ignored */
2261 ((obj_surface->width - 1) << 3) | /* pitch */
2262 (0 << 2) | /* must be 0 */
2263 (1 << 1) | /* must be tiled */
2264 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2265 OUT_BCS_BATCH(batch,
2266 (0 << 16) | /* X offset for U(Cb), must be 0 */
2267 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2268 OUT_BCS_BATCH(batch,
2269 (0 << 16) | /* X offset for V(Cr), must be 0 */
2270 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2271 ADVANCE_BCS_BATCH(batch);
2275 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2276 struct gen7_mfd_context *gen7_mfd_context)
2278 struct i965_driver_data *i965 = i965_driver_data(ctx);
2279 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2280 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2284 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2289 BEGIN_BCS_BATCH(batch, 61);
2290 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2291 OUT_BCS_RELOC(batch,
2293 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2295 OUT_BCS_BATCH(batch, 0);
2296 OUT_BCS_BATCH(batch, 0);
2299 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2300 OUT_BCS_BATCH(batch, 0);
2301 OUT_BCS_BATCH(batch, 0);
2303 /* uncompressed-video & stream out 7-12 */
2304 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2305 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2306 OUT_BCS_BATCH(batch, 0);
2307 OUT_BCS_BATCH(batch, 0);
2308 OUT_BCS_BATCH(batch, 0);
2309 OUT_BCS_BATCH(batch, 0);
2311 /* the DW 13-15 is for intra row store scratch */
2312 OUT_BCS_RELOC(batch,
2314 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2316 OUT_BCS_BATCH(batch, 0);
2317 OUT_BCS_BATCH(batch, 0);
2319 /* the DW 16-18 is for deblocking filter */
2320 OUT_BCS_BATCH(batch, 0);
2321 OUT_BCS_BATCH(batch, 0);
2322 OUT_BCS_BATCH(batch, 0);
2325 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2326 OUT_BCS_BATCH(batch, 0);
2327 OUT_BCS_BATCH(batch, 0);
2329 OUT_BCS_BATCH(batch, 0);
2331 /* the DW52-54 is for mb status address */
2332 OUT_BCS_BATCH(batch, 0);
2333 OUT_BCS_BATCH(batch, 0);
2334 OUT_BCS_BATCH(batch, 0);
2335 /* the DW56-60 is for ILDB & second ILDB address */
2336 OUT_BCS_BATCH(batch, 0);
2337 OUT_BCS_BATCH(batch, 0);
2338 OUT_BCS_BATCH(batch, 0);
2339 OUT_BCS_BATCH(batch, 0);
2340 OUT_BCS_BATCH(batch, 0);
2341 OUT_BCS_BATCH(batch, 0);
2343 ADVANCE_BCS_BATCH(batch);
2345 dri_bo_unreference(intra_bo);
2349 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2350 struct gen7_mfd_context *gen7_mfd_context)
2352 struct i965_driver_data *i965 = i965_driver_data(ctx);
2353 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2354 dri_bo *bsd_mpc_bo, *mpr_bo;
2356 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2357 "bsd mpc row store",
2358 11520, /* 1.5 * 120 * 64 */
2361 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2363 7680, /* 1. 0 * 120 * 64 */
2366 BEGIN_BCS_BATCH(batch, 10);
2367 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2369 OUT_BCS_RELOC(batch,
2371 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2374 OUT_BCS_BATCH(batch, 0);
2375 OUT_BCS_BATCH(batch, 0);
2377 OUT_BCS_RELOC(batch,
2379 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2381 OUT_BCS_BATCH(batch, 0);
2382 OUT_BCS_BATCH(batch, 0);
2384 OUT_BCS_BATCH(batch, 0);
2385 OUT_BCS_BATCH(batch, 0);
2386 OUT_BCS_BATCH(batch, 0);
2388 ADVANCE_BCS_BATCH(batch);
2390 dri_bo_unreference(bsd_mpc_bo);
2391 dri_bo_unreference(mpr_bo);
2395 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2396 struct gen7_mfd_context *gen7_mfd_context)
2402 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2403 struct gen7_mfd_context *gen7_mfd_context)
2405 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2407 int mbaff_frame_flag = 0;
2408 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2410 BEGIN_BCS_BATCH(batch, 16);
2411 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2412 OUT_BCS_BATCH(batch,
2413 width_in_mbs * height_in_mbs);
2414 OUT_BCS_BATCH(batch,
2415 ((height_in_mbs - 1) << 16) |
2416 ((width_in_mbs - 1) << 0));
2417 OUT_BCS_BATCH(batch,
2422 (0 << 12) | /* differ from GEN6 */
2425 OUT_BCS_BATCH(batch,
2426 (1 << 10) | /* 4:2:0 */
2427 (1 << 7) | /* CABAC */
2433 (mbaff_frame_flag << 1) |
2435 OUT_BCS_BATCH(batch, 0);
2436 OUT_BCS_BATCH(batch, 0);
2437 OUT_BCS_BATCH(batch, 0);
2438 OUT_BCS_BATCH(batch, 0);
2439 OUT_BCS_BATCH(batch, 0);
2440 OUT_BCS_BATCH(batch, 0);
2441 OUT_BCS_BATCH(batch, 0);
2442 OUT_BCS_BATCH(batch, 0);
2443 OUT_BCS_BATCH(batch, 0);
2444 OUT_BCS_BATCH(batch, 0);
2445 OUT_BCS_BATCH(batch, 0);
2446 ADVANCE_BCS_BATCH(batch);
2450 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2451 struct gen7_mfd_context *gen7_mfd_context)
2453 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2456 BEGIN_BCS_BATCH(batch, 71);
2457 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2459 /* reference surfaces 0..15 */
2460 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2461 OUT_BCS_BATCH(batch, 0); /* top */
2462 OUT_BCS_BATCH(batch, 0); /* bottom */
2465 OUT_BCS_BATCH(batch, 0);
2467 /* the current decoding frame/field */
2468 OUT_BCS_BATCH(batch, 0); /* top */
2469 OUT_BCS_BATCH(batch, 0);
2470 OUT_BCS_BATCH(batch, 0);
2473 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2474 OUT_BCS_BATCH(batch, 0);
2475 OUT_BCS_BATCH(batch, 0);
2478 OUT_BCS_BATCH(batch, 0);
2479 OUT_BCS_BATCH(batch, 0);
2481 ADVANCE_BCS_BATCH(batch);
2485 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2486 struct gen7_mfd_context *gen7_mfd_context)
2488 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2490 BEGIN_BCS_BATCH(batch, 11);
2491 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2492 OUT_BCS_RELOC(batch,
2493 gen7_mfd_context->jpeg_wa_slice_data_bo,
2494 I915_GEM_DOMAIN_INSTRUCTION, 0,
2496 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2497 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2498 OUT_BCS_BATCH(batch, 0);
2499 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2500 OUT_BCS_BATCH(batch, 0);
2501 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2502 OUT_BCS_BATCH(batch, 0);
2503 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2504 OUT_BCS_BATCH(batch, 0);
2505 ADVANCE_BCS_BATCH(batch);
2509 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2510 struct gen7_mfd_context *gen7_mfd_context)
2512 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2514 /* the input bitsteam format on GEN7 differs from GEN6 */
2515 BEGIN_BCS_BATCH(batch, 6);
2516 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2517 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2518 OUT_BCS_BATCH(batch, 0);
2519 OUT_BCS_BATCH(batch,
2525 OUT_BCS_BATCH(batch,
2526 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2529 (1 << 3) | /* LastSlice Flag */
2530 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2531 OUT_BCS_BATCH(batch, 0);
2532 ADVANCE_BCS_BATCH(batch);
2536 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2537 struct gen7_mfd_context *gen7_mfd_context)
2539 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2540 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2541 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2542 int first_mb_in_slice = 0;
2543 int slice_type = SLICE_TYPE_I;
2545 BEGIN_BCS_BATCH(batch, 11);
2546 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2547 OUT_BCS_BATCH(batch, slice_type);
2548 OUT_BCS_BATCH(batch,
2549 (num_ref_idx_l1 << 24) |
2550 (num_ref_idx_l0 << 16) |
2553 OUT_BCS_BATCH(batch,
2555 (1 << 27) | /* disable Deblocking */
2557 (gen7_jpeg_wa_clip.qp << 16) |
2560 OUT_BCS_BATCH(batch,
2561 (slice_ver_pos << 24) |
2562 (slice_hor_pos << 16) |
2563 (first_mb_in_slice << 0));
2564 OUT_BCS_BATCH(batch,
2565 (next_slice_ver_pos << 16) |
2566 (next_slice_hor_pos << 0));
2567 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2568 OUT_BCS_BATCH(batch, 0);
2569 OUT_BCS_BATCH(batch, 0);
2570 OUT_BCS_BATCH(batch, 0);
2571 OUT_BCS_BATCH(batch, 0);
2572 ADVANCE_BCS_BATCH(batch);
2576 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2577 struct gen7_mfd_context *gen7_mfd_context)
2579 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2580 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2581 intel_batchbuffer_emit_mi_flush(batch);
2582 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2583 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2584 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2585 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2586 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2587 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2588 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2590 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2591 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2592 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2598 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2599 struct decode_state *decode_state,
2600 struct gen7_mfd_context *gen7_mfd_context)
2602 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2603 VAPictureParameterBufferJPEGBaseline *pic_param;
2604 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2605 dri_bo *slice_data_bo;
2606 int i, j, max_selector = 0;
2608 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2609 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2611 /* Currently only support Baseline DCT */
2612 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2613 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2615 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2617 intel_batchbuffer_emit_mi_flush(batch);
2618 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2619 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2620 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2621 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2622 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2624 for (j = 0; j < decode_state->num_slice_params; j++) {
2625 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2626 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2627 slice_data_bo = decode_state->slice_datas[j]->bo;
2628 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2630 if (j == decode_state->num_slice_params - 1)
2631 next_slice_group_param = NULL;
2633 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2635 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2638 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2640 if (i < decode_state->slice_params[j]->num_elements - 1)
2641 next_slice_param = slice_param + 1;
2643 next_slice_param = next_slice_group_param;
2645 for (component = 0; component < slice_param->num_components; component++) {
2646 if (max_selector < slice_param->components[component].dc_table_selector)
2647 max_selector = slice_param->components[component].dc_table_selector;
2649 if (max_selector < slice_param->components[component].ac_table_selector)
2650 max_selector = slice_param->components[component].ac_table_selector;
2657 assert(max_selector < 2);
2658 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2660 for (j = 0; j < decode_state->num_slice_params; j++) {
2661 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2662 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2663 slice_data_bo = decode_state->slice_datas[j]->bo;
2664 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2666 if (j == decode_state->num_slice_params - 1)
2667 next_slice_group_param = NULL;
2669 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2671 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2672 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2674 if (i < decode_state->slice_params[j]->num_elements - 1)
2675 next_slice_param = slice_param + 1;
2677 next_slice_param = next_slice_group_param;
2679 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2684 intel_batchbuffer_end_atomic(batch);
2685 intel_batchbuffer_flush(batch);
2688 static const int vp8_dc_qlookup[128] =
2690 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
2691 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
2692 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
2693 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
2694 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
2695 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
2696 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2697 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2700 static const int vp8_ac_qlookup[128] =
2702 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
2703 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
2704 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
2705 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
2706 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
2707 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2708 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2709 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2712 static inline unsigned int vp8_clip_quantization_index(int index)
2723 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2724 struct decode_state *decode_state,
2725 struct gen7_mfd_context *gen7_mfd_context)
2727 struct object_surface *obj_surface;
2728 struct i965_driver_data *i965 = i965_driver_data(ctx);
2730 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2731 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2732 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2734 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2735 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2737 intel_update_vp8_frame_store_index(ctx,
2740 gen7_mfd_context->reference_surface);
2742 /* Current decoded picture */
2743 obj_surface = decode_state->render_object;
2744 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2746 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2747 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2748 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2749 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2751 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2752 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2753 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2754 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2756 intel_ensure_vp8_segmentation_buffer(ctx,
2757 &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2759 /* The same as AVC */
2760 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2761 bo = dri_bo_alloc(i965->intel.bufmgr,
2766 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2767 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2769 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2770 bo = dri_bo_alloc(i965->intel.bufmgr,
2771 "deblocking filter row store",
2772 width_in_mbs * 64 * 4,
2775 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2776 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2778 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2779 bo = dri_bo_alloc(i965->intel.bufmgr,
2780 "bsd mpc row store",
2781 width_in_mbs * 64 * 2,
2784 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2785 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2787 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2788 bo = dri_bo_alloc(i965->intel.bufmgr,
2790 width_in_mbs * 64 * 2,
2793 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2794 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2796 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2800 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2801 struct decode_state *decode_state,
2802 struct gen7_mfd_context *gen7_mfd_context)
2804 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2805 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2806 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2807 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2808 dri_bo *probs_bo = decode_state->probability_data->bo;
2810 unsigned int quantization_value[4][6];
2812 /* There is no safe way to error out if the segmentation buffer
2813 could not be allocated. So, instead of aborting, simply decode
2814 something even if the result may look totally inacurate */
2815 const unsigned int enable_segmentation =
2816 pic_param->pic_fields.bits.segmentation_enabled &&
2817 gen7_mfd_context->segmentation_buffer.valid;
2819 log2num = (int)log2(slice_param->num_of_partitions - 1);
2821 BEGIN_BCS_BATCH(batch, 38);
2822 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2823 OUT_BCS_BATCH(batch,
2824 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2825 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2826 OUT_BCS_BATCH(batch,
2828 pic_param->pic_fields.bits.sharpness_level << 16 |
2829 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2830 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2831 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2832 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2833 pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2834 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2835 (enable_segmentation &&
2836 !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2837 (enable_segmentation &&
2838 pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2839 (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2840 pic_param->pic_fields.bits.filter_type << 4 |
2841 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2842 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2844 OUT_BCS_BATCH(batch,
2845 pic_param->loop_filter_level[3] << 24 |
2846 pic_param->loop_filter_level[2] << 16 |
2847 pic_param->loop_filter_level[1] << 8 |
2848 pic_param->loop_filter_level[0] << 0);
2850 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2851 for (i = 0; i < 4; i++) {
2852 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2853 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2854 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2855 /* 101581>>16 is equivalent to 155/100 */
2856 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2857 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2858 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2860 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2861 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2863 OUT_BCS_BATCH(batch,
2864 quantization_value[i][0] << 16 | /* Y1AC */
2865 quantization_value[i][1] << 0); /* Y1DC */
2866 OUT_BCS_BATCH(batch,
2867 quantization_value[i][5] << 16 | /* UVAC */
2868 quantization_value[i][4] << 0); /* UVDC */
2869 OUT_BCS_BATCH(batch,
2870 quantization_value[i][3] << 16 | /* Y2AC */
2871 quantization_value[i][2] << 0); /* Y2DC */
2874 /* CoeffProbability table for non-key frame, DW16-DW18 */
2876 OUT_BCS_RELOC(batch, probs_bo,
2877 0, I915_GEM_DOMAIN_INSTRUCTION,
2879 OUT_BCS_BATCH(batch, 0);
2880 OUT_BCS_BATCH(batch, 0);
2882 OUT_BCS_BATCH(batch, 0);
2883 OUT_BCS_BATCH(batch, 0);
2884 OUT_BCS_BATCH(batch, 0);
2887 OUT_BCS_BATCH(batch,
2888 pic_param->mb_segment_tree_probs[2] << 16 |
2889 pic_param->mb_segment_tree_probs[1] << 8 |
2890 pic_param->mb_segment_tree_probs[0] << 0);
2892 OUT_BCS_BATCH(batch,
2893 pic_param->prob_skip_false << 24 |
2894 pic_param->prob_intra << 16 |
2895 pic_param->prob_last << 8 |
2896 pic_param->prob_gf << 0);
2898 OUT_BCS_BATCH(batch,
2899 pic_param->y_mode_probs[3] << 24 |
2900 pic_param->y_mode_probs[2] << 16 |
2901 pic_param->y_mode_probs[1] << 8 |
2902 pic_param->y_mode_probs[0] << 0);
2904 OUT_BCS_BATCH(batch,
2905 pic_param->uv_mode_probs[2] << 16 |
2906 pic_param->uv_mode_probs[1] << 8 |
2907 pic_param->uv_mode_probs[0] << 0);
2909 /* MV update value, DW23-DW32 */
2910 for (i = 0; i < 2; i++) {
2911 for (j = 0; j < 20; j += 4) {
2912 OUT_BCS_BATCH(batch,
2913 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2914 pic_param->mv_probs[i][j + 2] << 16 |
2915 pic_param->mv_probs[i][j + 1] << 8 |
2916 pic_param->mv_probs[i][j + 0] << 0);
2920 OUT_BCS_BATCH(batch,
2921 (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2922 (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2923 (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 |
2924 (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0);
2926 OUT_BCS_BATCH(batch,
2927 (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2928 (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2929 (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 |
2930 (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0);
2932 /* segmentation id stream base address, DW35-DW37 */
2933 if (enable_segmentation) {
2934 OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2935 0, I915_GEM_DOMAIN_INSTRUCTION,
2937 OUT_BCS_BATCH(batch, 0);
2938 OUT_BCS_BATCH(batch, 0);
2941 OUT_BCS_BATCH(batch, 0);
2942 OUT_BCS_BATCH(batch, 0);
2943 OUT_BCS_BATCH(batch, 0);
2945 ADVANCE_BCS_BATCH(batch);
2949 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2950 VAPictureParameterBufferVP8 *pic_param,
2951 VASliceParameterBufferVP8 *slice_param,
2952 dri_bo *slice_data_bo,
2953 struct gen7_mfd_context *gen7_mfd_context)
2955 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2957 unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2958 unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2959 unsigned int partition_size_0 = slice_param->partition_size[0];
2961 assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2962 if (used_bits == 8) {
2965 partition_size_0 -= 1;
2968 assert(slice_param->num_of_partitions >= 2);
2969 assert(slice_param->num_of_partitions <= 9);
2971 log2num = (int)log2(slice_param->num_of_partitions - 1);
2973 BEGIN_BCS_BATCH(batch, 22);
2974 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2975 OUT_BCS_BATCH(batch,
2976 used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2977 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
2979 (slice_param->macroblock_offset & 0x7));
2980 OUT_BCS_BATCH(batch,
2981 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2984 OUT_BCS_BATCH(batch, partition_size_0);
2985 OUT_BCS_BATCH(batch, offset);
2986 //partion sizes in bytes are present after the above first partition when there are more than one token partition
2987 offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
2988 for (i = 1; i < 9; i++) {
2989 if (i < slice_param->num_of_partitions) {
2990 OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
2991 OUT_BCS_BATCH(batch, offset);
2993 OUT_BCS_BATCH(batch, 0);
2994 OUT_BCS_BATCH(batch, 0);
2997 offset += slice_param->partition_size[i];
3000 OUT_BCS_BATCH(batch,
3001 1 << 31 | /* concealment method */
3004 ADVANCE_BCS_BATCH(batch);
3008 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3009 struct decode_state *decode_state,
3010 struct gen7_mfd_context *gen7_mfd_context)
3012 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3013 VAPictureParameterBufferVP8 *pic_param;
3014 VASliceParameterBufferVP8 *slice_param;
3015 dri_bo *slice_data_bo;
3017 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3018 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3020 /* one slice per frame */
3021 if (decode_state->num_slice_params != 1 ||
3022 (!decode_state->slice_params ||
3023 !decode_state->slice_params[0] ||
3024 (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3025 (!decode_state->slice_datas ||
3026 !decode_state->slice_datas[0] ||
3027 !decode_state->slice_datas[0]->bo) ||
3028 !decode_state->probability_data) {
3029 WARN_ONCE("Wrong parameters for VP8 decoding\n");
3034 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3035 slice_data_bo = decode_state->slice_datas[0]->bo;
3037 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3038 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3039 intel_batchbuffer_emit_mi_flush(batch);
3040 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3041 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3042 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3043 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3044 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3045 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3046 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3047 intel_batchbuffer_end_atomic(batch);
3048 intel_batchbuffer_flush(batch);
3052 gen8_mfd_decode_picture(VADriverContextP ctx,
3054 union codec_state *codec_state,
3055 struct hw_context *hw_context)
3058 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3059 struct decode_state *decode_state = &codec_state->decode;
3062 assert(gen7_mfd_context);
3064 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3066 if (vaStatus != VA_STATUS_SUCCESS)
3069 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3072 case VAProfileMPEG2Simple:
3073 case VAProfileMPEG2Main:
3074 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3077 case VAProfileH264ConstrainedBaseline:
3078 case VAProfileH264Main:
3079 case VAProfileH264High:
3080 case VAProfileH264StereoHigh:
3081 case VAProfileH264MultiviewHigh:
3082 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3085 case VAProfileVC1Simple:
3086 case VAProfileVC1Main:
3087 case VAProfileVC1Advanced:
3088 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3091 case VAProfileJPEGBaseline:
3092 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3095 case VAProfileVP8Version0_3:
3096 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3104 vaStatus = VA_STATUS_SUCCESS;
3111 gen8_mfd_context_destroy(void *hw_context)
3113 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3115 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3116 gen7_mfd_context->post_deblocking_output.bo = NULL;
3118 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3119 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3121 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3122 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3124 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3125 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3127 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3128 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3130 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3131 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3133 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3134 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3136 dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3137 gen7_mfd_context->segmentation_buffer.bo = NULL;
3139 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3141 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3142 free(gen7_mfd_context);
3145 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3146 struct gen7_mfd_context *gen7_mfd_context)
3148 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3149 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3150 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3151 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3155 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3157 struct intel_driver_data *intel = intel_driver_data(ctx);
3158 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3161 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3162 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3163 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3165 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3166 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3167 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3170 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3171 gen7_mfd_context->segmentation_buffer.valid = 0;
3173 switch (obj_config->profile) {
3174 case VAProfileMPEG2Simple:
3175 case VAProfileMPEG2Main:
3176 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3179 case VAProfileH264ConstrainedBaseline:
3180 case VAProfileH264Main:
3181 case VAProfileH264High:
3182 case VAProfileH264StereoHigh:
3183 case VAProfileH264MultiviewHigh:
3184 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3189 return (struct hw_context *)gen7_mfd_context;