2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial portions
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 * Xiang Haihao <haihao.xiang@intel.com>
26 * Zhao Yakui <yakui.zhao@intel.com>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
46 #include "intel_media.h"
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51 static const uint32_t zigzag_direct[64] = {
52 0, 1, 8, 16, 9, 2, 3, 10,
53 17, 24, 32, 25, 18, 11, 4, 5,
54 12, 19, 26, 33, 40, 48, 41, 34,
55 27, 20, 13, 6, 7, 14, 21, 28,
56 35, 42, 49, 56, 57, 50, 43, 36,
57 29, 22, 15, 23, 30, 37, 44, 51,
58 58, 59, 52, 45, 38, 31, 39, 46,
59 53, 60, 61, 54, 47, 55, 62, 63
63 gen8_mfd_init_avc_surface(VADriverContextP ctx,
64 VAPictureParameterBufferH264 *pic_param,
65 struct object_surface *obj_surface)
67 struct i965_driver_data *i965 = i965_driver_data(ctx);
68 GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69 int width_in_mbs, height_in_mbs;
71 obj_surface->free_private_data = gen_free_avc_surface;
72 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
75 if (!gen7_avc_surface) {
76 gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77 assert((obj_surface->size & 0x3f) == 0);
78 obj_surface->private_data = gen7_avc_surface;
81 /* DMV buffers now relate to the whole frame, irrespective of
83 if (gen7_avc_surface->dmv_top == NULL) {
84 gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
85 "direct mv w/r buffer",
86 width_in_mbs * height_in_mbs * 128,
88 assert(gen7_avc_surface->dmv_top);
93 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
94 struct decode_state *decode_state,
96 struct gen7_mfd_context *gen7_mfd_context)
98 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
100 assert(standard_select == MFX_FORMAT_MPEG2 ||
101 standard_select == MFX_FORMAT_AVC ||
102 standard_select == MFX_FORMAT_VC1 ||
103 standard_select == MFX_FORMAT_JPEG ||
104 standard_select == MFX_FORMAT_VP8);
106 BEGIN_BCS_BATCH(batch, 5);
107 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
109 (MFX_LONG_MODE << 17) | /* Currently only support long format */
110 (MFD_MODE_VLD << 15) | /* VLD mode */
111 (0 << 10) | /* disable Stream-Out */
112 (gen7_mfd_context->post_deblocking_output.valid << 9) | /* Post Deblocking Output */
113 (gen7_mfd_context->pre_deblocking_output.valid << 8) | /* Pre Deblocking Output */
114 (0 << 5) | /* not in stitch mode */
115 (MFX_CODEC_DECODE << 4) | /* decoding mode */
116 (standard_select << 0));
118 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
119 (0 << 3) | /* terminate if AVC mbdata error occurs */
120 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
123 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
124 OUT_BCS_BATCH(batch, 0); /* reserved */
125 ADVANCE_BCS_BATCH(batch);
129 gen8_mfd_surface_state(VADriverContextP ctx,
130 struct decode_state *decode_state,
132 struct gen7_mfd_context *gen7_mfd_context)
134 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
135 struct object_surface *obj_surface = decode_state->render_object;
136 unsigned int y_cb_offset;
137 unsigned int y_cr_offset;
138 unsigned int surface_format;
142 y_cb_offset = obj_surface->y_cb_offset;
143 y_cr_offset = obj_surface->y_cr_offset;
145 surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
146 MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
148 BEGIN_BCS_BATCH(batch, 6);
149 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
150 OUT_BCS_BATCH(batch, 0);
152 ((obj_surface->orig_height - 1) << 18) |
153 ((obj_surface->orig_width - 1) << 4));
155 (surface_format << 28) | /* 420 planar YUV surface */
156 ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
157 (0 << 22) | /* surface object control state, ignored */
158 ((obj_surface->width - 1) << 3) | /* pitch */
159 (0 << 2) | /* must be 0 */
160 (1 << 1) | /* must be tiled */
161 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
163 (0 << 16) | /* X offset for U(Cb), must be 0 */
164 (y_cb_offset << 0)); /* Y offset for U(Cb) */
166 (0 << 16) | /* X offset for V(Cr), must be 0 */
167 (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
168 ADVANCE_BCS_BATCH(batch);
172 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
173 struct decode_state *decode_state,
175 struct gen7_mfd_context *gen7_mfd_context)
177 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
180 BEGIN_BCS_BATCH(batch, 61);
181 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
182 /* Pre-deblock 1-3 */
183 if (gen7_mfd_context->pre_deblocking_output.valid)
184 OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
185 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
188 OUT_BCS_BATCH(batch, 0);
190 OUT_BCS_BATCH(batch, 0);
191 OUT_BCS_BATCH(batch, 0);
192 /* Post-debloing 4-6 */
193 if (gen7_mfd_context->post_deblocking_output.valid)
194 OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
195 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
198 OUT_BCS_BATCH(batch, 0);
200 OUT_BCS_BATCH(batch, 0);
201 OUT_BCS_BATCH(batch, 0);
203 /* uncompressed-video & stream out 7-12 */
204 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
205 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
206 OUT_BCS_BATCH(batch, 0);
207 OUT_BCS_BATCH(batch, 0);
208 OUT_BCS_BATCH(batch, 0);
209 OUT_BCS_BATCH(batch, 0);
211 /* intra row-store scratch 13-15 */
212 if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
213 OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
214 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
217 OUT_BCS_BATCH(batch, 0);
219 OUT_BCS_BATCH(batch, 0);
220 OUT_BCS_BATCH(batch, 0);
221 /* deblocking-filter-row-store 16-18 */
222 if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
223 OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
224 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
227 OUT_BCS_BATCH(batch, 0);
228 OUT_BCS_BATCH(batch, 0);
229 OUT_BCS_BATCH(batch, 0);
232 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
233 struct object_surface *obj_surface;
235 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
236 gen7_mfd_context->reference_surface[i].obj_surface &&
237 gen7_mfd_context->reference_surface[i].obj_surface->bo) {
238 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
240 OUT_BCS_RELOC(batch, obj_surface->bo,
241 I915_GEM_DOMAIN_INSTRUCTION, 0,
244 OUT_BCS_BATCH(batch, 0);
247 OUT_BCS_BATCH(batch, 0);
250 /* reference property 51 */
251 OUT_BCS_BATCH(batch, 0);
253 /* Macroblock status & ILDB 52-57 */
254 OUT_BCS_BATCH(batch, 0);
255 OUT_BCS_BATCH(batch, 0);
256 OUT_BCS_BATCH(batch, 0);
257 OUT_BCS_BATCH(batch, 0);
258 OUT_BCS_BATCH(batch, 0);
259 OUT_BCS_BATCH(batch, 0);
261 /* the second Macroblock status 58-60 */
262 OUT_BCS_BATCH(batch, 0);
263 OUT_BCS_BATCH(batch, 0);
264 OUT_BCS_BATCH(batch, 0);
266 ADVANCE_BCS_BATCH(batch);
270 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
271 dri_bo *slice_data_bo,
273 struct gen7_mfd_context *gen7_mfd_context)
275 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
277 BEGIN_BCS_BATCH(batch, 26);
278 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
280 OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
281 OUT_BCS_BATCH(batch, 0);
282 OUT_BCS_BATCH(batch, 0);
283 /* Upper bound 4-5 */
284 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
285 OUT_BCS_BATCH(batch, 0);
287 /* MFX indirect MV 6-10 */
288 OUT_BCS_BATCH(batch, 0);
289 OUT_BCS_BATCH(batch, 0);
290 OUT_BCS_BATCH(batch, 0);
291 OUT_BCS_BATCH(batch, 0);
292 OUT_BCS_BATCH(batch, 0);
294 /* MFX IT_COFF 11-15 */
295 OUT_BCS_BATCH(batch, 0);
296 OUT_BCS_BATCH(batch, 0);
297 OUT_BCS_BATCH(batch, 0);
298 OUT_BCS_BATCH(batch, 0);
299 OUT_BCS_BATCH(batch, 0);
301 /* MFX IT_DBLK 16-20 */
302 OUT_BCS_BATCH(batch, 0);
303 OUT_BCS_BATCH(batch, 0);
304 OUT_BCS_BATCH(batch, 0);
305 OUT_BCS_BATCH(batch, 0);
306 OUT_BCS_BATCH(batch, 0);
308 /* MFX PAK_BSE object for encoder 21-25 */
309 OUT_BCS_BATCH(batch, 0);
310 OUT_BCS_BATCH(batch, 0);
311 OUT_BCS_BATCH(batch, 0);
312 OUT_BCS_BATCH(batch, 0);
313 OUT_BCS_BATCH(batch, 0);
315 ADVANCE_BCS_BATCH(batch);
319 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
320 struct decode_state *decode_state,
322 struct gen7_mfd_context *gen7_mfd_context)
324 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
326 BEGIN_BCS_BATCH(batch, 10);
327 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
329 if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
330 OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
331 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
334 OUT_BCS_BATCH(batch, 0);
336 OUT_BCS_BATCH(batch, 0);
337 OUT_BCS_BATCH(batch, 0);
338 /* MPR Row Store Scratch buffer 4-6 */
339 if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
340 OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
341 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
344 OUT_BCS_BATCH(batch, 0);
346 OUT_BCS_BATCH(batch, 0);
347 OUT_BCS_BATCH(batch, 0);
350 if (gen7_mfd_context->bitplane_read_buffer.valid)
351 OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
352 I915_GEM_DOMAIN_INSTRUCTION, 0,
355 OUT_BCS_BATCH(batch, 0);
356 OUT_BCS_BATCH(batch, 0);
357 OUT_BCS_BATCH(batch, 0);
358 ADVANCE_BCS_BATCH(batch);
362 gen8_mfd_qm_state(VADriverContextP ctx,
366 struct gen7_mfd_context *gen7_mfd_context)
368 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
369 unsigned int qm_buffer[16];
371 assert(qm_length <= 16 * 4);
372 memcpy(qm_buffer, qm, qm_length);
374 BEGIN_BCS_BATCH(batch, 18);
375 OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
376 OUT_BCS_BATCH(batch, qm_type << 0);
377 intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
378 ADVANCE_BCS_BATCH(batch);
382 gen8_mfd_avc_img_state(VADriverContextP ctx,
383 struct decode_state *decode_state,
384 struct gen7_mfd_context *gen7_mfd_context)
386 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
388 int mbaff_frame_flag;
389 unsigned int width_in_mbs, height_in_mbs;
390 VAPictureParameterBufferH264 *pic_param;
392 assert(decode_state->pic_param && decode_state->pic_param->buffer);
393 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
394 assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
396 if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
398 else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
403 if ((img_struct & 0x1) == 0x1) {
404 assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
406 assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
409 if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
410 assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
411 assert(pic_param->pic_fields.bits.field_pic_flag == 0);
413 assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
416 mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
417 !pic_param->pic_fields.bits.field_pic_flag);
419 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
420 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
422 /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
423 assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
424 pic_param->seq_fields.bits.chroma_format_idc == 1); /* 4:2:0 */
425 assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
427 BEGIN_BCS_BATCH(batch, 17);
428 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
430 (width_in_mbs * height_in_mbs - 1));
432 ((height_in_mbs - 1) << 16) |
433 ((width_in_mbs - 1) << 0));
435 ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
436 ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
437 (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
438 (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
439 (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
440 (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
443 (pic_param->seq_fields.bits.chroma_format_idc << 10) |
444 (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
445 ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
446 (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
447 (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
448 (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
449 (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
450 (mbaff_frame_flag << 1) |
451 (pic_param->pic_fields.bits.field_pic_flag << 0));
452 OUT_BCS_BATCH(batch, 0);
453 OUT_BCS_BATCH(batch, 0);
454 OUT_BCS_BATCH(batch, 0);
455 OUT_BCS_BATCH(batch, 0);
456 OUT_BCS_BATCH(batch, 0);
457 OUT_BCS_BATCH(batch, 0);
458 OUT_BCS_BATCH(batch, 0);
459 OUT_BCS_BATCH(batch, 0);
460 OUT_BCS_BATCH(batch, 0);
461 OUT_BCS_BATCH(batch, 0);
462 OUT_BCS_BATCH(batch, 0);
463 OUT_BCS_BATCH(batch, 0);
464 ADVANCE_BCS_BATCH(batch);
468 gen8_mfd_avc_qm_state(VADriverContextP ctx,
469 struct decode_state *decode_state,
470 struct gen7_mfd_context *gen7_mfd_context)
472 VAIQMatrixBufferH264 *iq_matrix;
473 VAPictureParameterBufferH264 *pic_param;
475 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
476 iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
478 iq_matrix = &gen7_mfd_context->iq_matrix.h264;
480 assert(decode_state->pic_param && decode_state->pic_param->buffer);
481 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
483 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
484 gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
486 if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
487 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
488 gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
493 gen8_mfd_avc_picid_state(VADriverContextP ctx,
494 struct decode_state *decode_state,
495 struct gen7_mfd_context *gen7_mfd_context)
497 gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
498 gen7_mfd_context->reference_surface);
502 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
503 struct decode_state *decode_state,
504 VAPictureParameterBufferH264 *pic_param,
505 VASliceParameterBufferH264 *slice_param,
506 struct gen7_mfd_context *gen7_mfd_context)
508 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
509 struct object_surface *obj_surface;
510 GenAvcSurface *gen7_avc_surface;
511 VAPictureH264 *va_pic;
514 BEGIN_BCS_BATCH(batch, 71);
515 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
517 /* reference surfaces 0..15 */
518 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
519 if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
520 gen7_mfd_context->reference_surface[i].obj_surface &&
521 gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
523 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
524 gen7_avc_surface = obj_surface->private_data;
526 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
527 I915_GEM_DOMAIN_INSTRUCTION, 0,
529 OUT_BCS_BATCH(batch, 0);
531 OUT_BCS_BATCH(batch, 0);
532 OUT_BCS_BATCH(batch, 0);
536 OUT_BCS_BATCH(batch, 0);
538 /* the current decoding frame/field */
539 va_pic = &pic_param->CurrPic;
540 obj_surface = decode_state->render_object;
541 assert(obj_surface->bo && obj_surface->private_data);
542 gen7_avc_surface = obj_surface->private_data;
544 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
545 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
548 OUT_BCS_BATCH(batch, 0);
549 OUT_BCS_BATCH(batch, 0);
552 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
553 obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
556 const VAPictureH264 * const va_pic = avc_find_picture(
557 obj_surface->base.id, pic_param->ReferenceFrames,
558 ARRAY_ELEMS(pic_param->ReferenceFrames));
560 assert(va_pic != NULL);
561 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
562 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
564 OUT_BCS_BATCH(batch, 0);
565 OUT_BCS_BATCH(batch, 0);
569 va_pic = &pic_param->CurrPic;
570 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
571 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
573 ADVANCE_BCS_BATCH(batch);
577 gen8_mfd_avc_slice_state(VADriverContextP ctx,
578 VAPictureParameterBufferH264 *pic_param,
579 VASliceParameterBufferH264 *slice_param,
580 VASliceParameterBufferH264 *next_slice_param,
581 struct gen7_mfd_context *gen7_mfd_context)
583 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
584 int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
585 int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
586 int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
587 int num_ref_idx_l0, num_ref_idx_l1;
588 int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
589 pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
590 int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
593 if (slice_param->slice_type == SLICE_TYPE_I ||
594 slice_param->slice_type == SLICE_TYPE_SI) {
595 slice_type = SLICE_TYPE_I;
596 } else if (slice_param->slice_type == SLICE_TYPE_P ||
597 slice_param->slice_type == SLICE_TYPE_SP) {
598 slice_type = SLICE_TYPE_P;
600 assert(slice_param->slice_type == SLICE_TYPE_B);
601 slice_type = SLICE_TYPE_B;
604 if (slice_type == SLICE_TYPE_I) {
605 assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
606 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
609 } else if (slice_type == SLICE_TYPE_P) {
610 assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
611 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
614 num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
615 num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
618 first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
619 slice_hor_pos = first_mb_in_slice % width_in_mbs;
620 slice_ver_pos = first_mb_in_slice / width_in_mbs;
622 if (next_slice_param) {
623 first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
624 next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
625 next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
627 next_slice_hor_pos = 0;
628 next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
631 BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
632 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
633 OUT_BCS_BATCH(batch, slice_type);
635 (num_ref_idx_l1 << 24) |
636 (num_ref_idx_l0 << 16) |
637 (slice_param->chroma_log2_weight_denom << 8) |
638 (slice_param->luma_log2_weight_denom << 0));
640 (slice_param->direct_spatial_mv_pred_flag << 29) |
641 (slice_param->disable_deblocking_filter_idc << 27) |
642 (slice_param->cabac_init_idc << 24) |
643 ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
644 ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
645 ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
647 (slice_ver_pos << 24) |
648 (slice_hor_pos << 16) |
649 (first_mb_in_slice << 0));
651 (next_slice_ver_pos << 16) |
652 (next_slice_hor_pos << 0));
654 (next_slice_param == NULL) << 19); /* last slice flag */
655 OUT_BCS_BATCH(batch, 0);
656 OUT_BCS_BATCH(batch, 0);
657 OUT_BCS_BATCH(batch, 0);
658 OUT_BCS_BATCH(batch, 0);
659 ADVANCE_BCS_BATCH(batch);
663 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
664 VAPictureParameterBufferH264 *pic_param,
665 VASliceParameterBufferH264 *slice_param,
666 struct gen7_mfd_context *gen7_mfd_context)
668 gen6_send_avc_ref_idx_state(
669 gen7_mfd_context->base.batch,
671 gen7_mfd_context->reference_surface
676 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
677 VAPictureParameterBufferH264 *pic_param,
678 VASliceParameterBufferH264 *slice_param,
679 struct gen7_mfd_context *gen7_mfd_context)
681 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
682 int i, j, num_weight_offset_table = 0;
683 short weightoffsets[32 * 6];
685 if ((slice_param->slice_type == SLICE_TYPE_P ||
686 slice_param->slice_type == SLICE_TYPE_SP) &&
687 (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
688 num_weight_offset_table = 1;
691 if ((slice_param->slice_type == SLICE_TYPE_B) &&
692 (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
693 num_weight_offset_table = 2;
696 for (i = 0; i < num_weight_offset_table; i++) {
697 BEGIN_BCS_BATCH(batch, 98);
698 OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
699 OUT_BCS_BATCH(batch, i);
702 for (j = 0; j < 32; j++) {
703 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
704 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
705 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
706 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
707 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
708 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
711 for (j = 0; j < 32; j++) {
712 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
713 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
714 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
715 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
716 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
717 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
721 intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
722 ADVANCE_BCS_BATCH(batch);
727 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
728 VAPictureParameterBufferH264 *pic_param,
729 VASliceParameterBufferH264 *slice_param,
730 dri_bo *slice_data_bo,
731 VASliceParameterBufferH264 *next_slice_param,
732 struct gen7_mfd_context *gen7_mfd_context)
734 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
735 int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
737 pic_param->pic_fields.bits.entropy_coding_mode_flag);
739 /* the input bitsteam format on GEN7 differs from GEN6 */
740 BEGIN_BCS_BATCH(batch, 6);
741 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
743 (slice_param->slice_data_size));
744 OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
752 ((slice_data_bit_offset >> 3) << 16) |
756 ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
757 (slice_data_bit_offset & 0x7));
758 OUT_BCS_BATCH(batch, 0);
759 ADVANCE_BCS_BATCH(batch);
763 gen8_mfd_avc_context_init(
764 VADriverContextP ctx,
765 struct gen7_mfd_context *gen7_mfd_context
768 /* Initialize flat scaling lists */
769 avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
773 gen8_mfd_avc_decode_init(VADriverContextP ctx,
774 struct decode_state *decode_state,
775 struct gen7_mfd_context *gen7_mfd_context)
777 VAPictureParameterBufferH264 *pic_param;
778 VASliceParameterBufferH264 *slice_param;
779 struct i965_driver_data *i965 = i965_driver_data(ctx);
780 struct object_surface *obj_surface;
782 int i, j, enable_avc_ildb = 0;
783 unsigned int width_in_mbs, height_in_mbs;
785 for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
786 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
787 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
789 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
790 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
791 assert((slice_param->slice_type == SLICE_TYPE_I) ||
792 (slice_param->slice_type == SLICE_TYPE_SI) ||
793 (slice_param->slice_type == SLICE_TYPE_P) ||
794 (slice_param->slice_type == SLICE_TYPE_SP) ||
795 (slice_param->slice_type == SLICE_TYPE_B));
797 if (slice_param->disable_deblocking_filter_idc != 1) {
806 assert(decode_state->pic_param && decode_state->pic_param->buffer);
807 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
808 gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
809 gen7_mfd_context->reference_surface);
810 width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
811 height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
812 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
813 assert(height_in_mbs > 0 && height_in_mbs <= 256);
815 /* Current decoded picture */
816 obj_surface = decode_state->render_object;
817 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
818 obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
820 avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
821 gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
823 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
824 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
825 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
826 gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
828 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
829 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
830 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
831 gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
833 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
834 bo = dri_bo_alloc(i965->intel.bufmgr,
839 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
840 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
842 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
843 bo = dri_bo_alloc(i965->intel.bufmgr,
844 "deblocking filter row store",
845 width_in_mbs * 64 * 4,
848 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
849 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
851 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
852 bo = dri_bo_alloc(i965->intel.bufmgr,
854 width_in_mbs * 64 * 2,
857 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
858 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
860 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
861 bo = dri_bo_alloc(i965->intel.bufmgr,
863 width_in_mbs * 64 * 2,
866 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
867 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
869 gen7_mfd_context->bitplane_read_buffer.valid = 0;
873 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
874 struct decode_state *decode_state,
875 struct gen7_mfd_context *gen7_mfd_context)
877 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
878 VAPictureParameterBufferH264 *pic_param;
879 VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
880 dri_bo *slice_data_bo;
883 assert(decode_state->pic_param && decode_state->pic_param->buffer);
884 pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
885 gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
887 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
888 intel_batchbuffer_emit_mi_flush(batch);
889 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
890 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
891 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
892 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
893 gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
894 gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
895 gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
897 for (j = 0; j < decode_state->num_slice_params; j++) {
898 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
899 slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
900 slice_data_bo = decode_state->slice_datas[j]->bo;
901 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
903 if (j == decode_state->num_slice_params - 1)
904 next_slice_group_param = NULL;
906 next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
908 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
909 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
910 assert((slice_param->slice_type == SLICE_TYPE_I) ||
911 (slice_param->slice_type == SLICE_TYPE_SI) ||
912 (slice_param->slice_type == SLICE_TYPE_P) ||
913 (slice_param->slice_type == SLICE_TYPE_SP) ||
914 (slice_param->slice_type == SLICE_TYPE_B));
916 if (i < decode_state->slice_params[j]->num_elements - 1)
917 next_slice_param = slice_param + 1;
919 next_slice_param = next_slice_group_param;
921 gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
922 gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
923 gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
924 gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
925 gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
930 intel_batchbuffer_end_atomic(batch);
931 intel_batchbuffer_flush(batch);
935 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
936 struct decode_state *decode_state,
937 struct gen7_mfd_context *gen7_mfd_context)
939 VAPictureParameterBufferMPEG2 *pic_param;
940 struct i965_driver_data *i965 = i965_driver_data(ctx);
941 struct object_surface *obj_surface;
943 unsigned int width_in_mbs;
945 assert(decode_state->pic_param && decode_state->pic_param->buffer);
946 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
947 width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
949 mpeg2_set_reference_surfaces(
951 gen7_mfd_context->reference_surface,
956 /* Current decoded picture */
957 obj_surface = decode_state->render_object;
958 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
960 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
961 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
962 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
963 gen7_mfd_context->pre_deblocking_output.valid = 1;
965 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
966 bo = dri_bo_alloc(i965->intel.bufmgr,
971 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
972 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
974 gen7_mfd_context->post_deblocking_output.valid = 0;
975 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
976 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
977 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
978 gen7_mfd_context->bitplane_read_buffer.valid = 0;
982 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
983 struct decode_state *decode_state,
984 struct gen7_mfd_context *gen7_mfd_context)
986 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
987 VAPictureParameterBufferMPEG2 *pic_param;
988 unsigned int slice_concealment_disable_bit = 0;
990 assert(decode_state->pic_param && decode_state->pic_param->buffer);
991 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
993 slice_concealment_disable_bit = 1;
995 BEGIN_BCS_BATCH(batch, 13);
996 OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
998 (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
999 ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1000 ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1001 ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1002 pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1003 pic_param->picture_coding_extension.bits.picture_structure << 12 |
1004 pic_param->picture_coding_extension.bits.top_field_first << 11 |
1005 pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1006 pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1007 pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1008 pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
1009 pic_param->picture_coding_extension.bits.alternate_scan << 6);
1010 OUT_BCS_BATCH(batch,
1011 pic_param->picture_coding_type << 9);
1012 OUT_BCS_BATCH(batch,
1013 (slice_concealment_disable_bit << 31) |
1014 ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1015 ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1016 OUT_BCS_BATCH(batch, 0);
1017 OUT_BCS_BATCH(batch, 0);
1018 OUT_BCS_BATCH(batch, 0);
1019 OUT_BCS_BATCH(batch, 0);
1020 OUT_BCS_BATCH(batch, 0);
1021 OUT_BCS_BATCH(batch, 0);
1022 OUT_BCS_BATCH(batch, 0);
1023 OUT_BCS_BATCH(batch, 0);
1024 OUT_BCS_BATCH(batch, 0);
1025 ADVANCE_BCS_BATCH(batch);
1029 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1030 struct decode_state *decode_state,
1031 struct gen7_mfd_context *gen7_mfd_context)
1033 VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1036 /* Update internal QM state */
1037 if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1038 VAIQMatrixBufferMPEG2 * const iq_matrix =
1039 (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1041 if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1042 iq_matrix->load_intra_quantiser_matrix) {
1043 gen_iq_matrix->load_intra_quantiser_matrix =
1044 iq_matrix->load_intra_quantiser_matrix;
1045 if (iq_matrix->load_intra_quantiser_matrix) {
1046 for (j = 0; j < 64; j++)
1047 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1048 iq_matrix->intra_quantiser_matrix[j];
1052 if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1053 iq_matrix->load_non_intra_quantiser_matrix) {
1054 gen_iq_matrix->load_non_intra_quantiser_matrix =
1055 iq_matrix->load_non_intra_quantiser_matrix;
1056 if (iq_matrix->load_non_intra_quantiser_matrix) {
1057 for (j = 0; j < 64; j++)
1058 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1059 iq_matrix->non_intra_quantiser_matrix[j];
1064 /* Commit QM state to HW */
1065 for (i = 0; i < 2; i++) {
1066 unsigned char *qm = NULL;
1070 if (gen_iq_matrix->load_intra_quantiser_matrix) {
1071 qm = gen_iq_matrix->intra_quantiser_matrix;
1072 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1075 if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1076 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1077 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1084 gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1089 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1090 VAPictureParameterBufferMPEG2 *pic_param,
1091 VASliceParameterBufferMPEG2 *slice_param,
1092 VASliceParameterBufferMPEG2 *next_slice_param,
1093 struct gen7_mfd_context *gen7_mfd_context)
1095 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1096 unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1097 int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1099 if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1100 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1102 is_field_pic_wa = is_field_pic &&
1103 gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1105 vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1106 hpos0 = slice_param->slice_horizontal_position;
1108 if (next_slice_param == NULL) {
1109 vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1112 vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1113 hpos1 = next_slice_param->slice_horizontal_position;
1116 mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1118 BEGIN_BCS_BATCH(batch, 5);
1119 OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1120 OUT_BCS_BATCH(batch,
1121 slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1122 OUT_BCS_BATCH(batch,
1123 slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1124 OUT_BCS_BATCH(batch,
1128 (next_slice_param == NULL) << 5 |
1129 (next_slice_param == NULL) << 3 |
1130 (slice_param->macroblock_offset & 0x7));
1131 OUT_BCS_BATCH(batch,
1132 (slice_param->quantiser_scale_code << 24) |
1133 (vpos1 << 8 | hpos1));
1134 ADVANCE_BCS_BATCH(batch);
1138 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1139 struct decode_state *decode_state,
1140 struct gen7_mfd_context *gen7_mfd_context)
1142 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1143 VAPictureParameterBufferMPEG2 *pic_param;
1144 VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1145 dri_bo *slice_data_bo;
1148 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1149 pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1151 gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1152 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1153 intel_batchbuffer_emit_mi_flush(batch);
1154 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1155 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1156 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1157 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1158 gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1159 gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1161 if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1162 gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1163 mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1165 for (j = 0; j < decode_state->num_slice_params; j++) {
1166 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1167 slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1168 slice_data_bo = decode_state->slice_datas[j]->bo;
1169 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1171 if (j == decode_state->num_slice_params - 1)
1172 next_slice_group_param = NULL;
1174 next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1176 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1177 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1179 if (i < decode_state->slice_params[j]->num_elements - 1)
1180 next_slice_param = slice_param + 1;
1182 next_slice_param = next_slice_group_param;
1184 gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1189 intel_batchbuffer_end_atomic(batch);
1190 intel_batchbuffer_flush(batch);
1193 static const int va_to_gen7_vc1_pic_type[5] = {
1197 GEN7_VC1_BI_PICTURE,
1201 static const int va_to_gen7_vc1_mv[4] = {
1203 2, /* 1-MV half-pel */
1204 3, /* 1-MV half-pef bilinear */
1208 static const int b_picture_scale_factor[21] = {
1209 128, 85, 170, 64, 192,
1210 51, 102, 153, 204, 43,
1211 215, 37, 74, 111, 148,
1212 185, 222, 32, 96, 160,
1216 static const int va_to_gen7_vc1_condover[3] = {
1222 static const int va_to_gen7_vc1_profile[4] = {
1223 GEN7_VC1_SIMPLE_PROFILE,
1224 GEN7_VC1_MAIN_PROFILE,
1225 GEN7_VC1_RESERVED_PROFILE,
1226 GEN7_VC1_ADVANCED_PROFILE
1230 gen8_mfd_free_vc1_surface(void **data)
1232 struct gen7_vc1_surface *gen7_vc1_surface = *data;
1234 if (!gen7_vc1_surface)
1237 dri_bo_unreference(gen7_vc1_surface->dmv);
1238 free(gen7_vc1_surface);
1243 gen8_mfd_init_vc1_surface(VADriverContextP ctx,
1244 VAPictureParameterBufferVC1 *pic_param,
1245 struct object_surface *obj_surface)
1247 struct i965_driver_data *i965 = i965_driver_data(ctx);
1248 struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1249 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1250 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1252 obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1254 if (!gen7_vc1_surface) {
1255 gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1256 assert((obj_surface->size & 0x3f) == 0);
1257 obj_surface->private_data = gen7_vc1_surface;
1260 gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1262 if (gen7_vc1_surface->dmv == NULL) {
1263 gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1264 "direct mv w/r buffer",
1265 width_in_mbs * height_in_mbs * 64,
1271 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1272 struct decode_state *decode_state,
1273 struct gen7_mfd_context *gen7_mfd_context)
1275 VAPictureParameterBufferVC1 *pic_param;
1276 struct i965_driver_data *i965 = i965_driver_data(ctx);
1277 struct object_surface *obj_surface;
1282 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1283 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1284 width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1285 picture_type = pic_param->picture_fields.bits.picture_type;
1287 intel_update_vc1_frame_store_index(ctx,
1290 gen7_mfd_context->reference_surface);
1292 /* Current decoded picture */
1293 obj_surface = decode_state->render_object;
1294 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1295 gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1297 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1298 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1299 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1300 gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1302 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1303 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1304 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1305 gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1307 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1308 bo = dri_bo_alloc(i965->intel.bufmgr,
1313 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1314 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1316 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1317 bo = dri_bo_alloc(i965->intel.bufmgr,
1318 "deblocking filter row store",
1319 width_in_mbs * 7 * 64,
1322 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1323 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1325 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1326 bo = dri_bo_alloc(i965->intel.bufmgr,
1327 "bsd mpc row store",
1331 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1332 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1334 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1336 gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1337 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1339 if (gen7_mfd_context->bitplane_read_buffer.valid) {
1340 int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1341 int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1342 int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1344 uint8_t *src = NULL, *dst = NULL;
1346 assert(decode_state->bit_plane->buffer);
1347 src = decode_state->bit_plane->buffer;
1349 bo = dri_bo_alloc(i965->intel.bufmgr,
1351 bitplane_width * height_in_mbs,
1354 gen7_mfd_context->bitplane_read_buffer.bo = bo;
1356 dri_bo_map(bo, True);
1357 assert(bo->virtual);
1360 for (src_h = 0; src_h < height_in_mbs; src_h++) {
1361 for(src_w = 0; src_w < width_in_mbs; src_w++) {
1362 int src_index, dst_index;
1366 src_index = (src_h * width_in_mbs + src_w) / 2;
1367 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1368 src_value = ((src[src_index] >> src_shift) & 0xf);
1370 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1374 dst_index = src_w / 2;
1375 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1379 dst[src_w / 2] >>= 4;
1381 dst += bitplane_width;
1386 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1390 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1391 struct decode_state *decode_state,
1392 struct gen7_mfd_context *gen7_mfd_context)
1394 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1395 VAPictureParameterBufferVC1 *pic_param;
1396 struct object_surface *obj_surface;
1397 int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1398 int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1399 int unified_mv_mode;
1400 int ref_field_pic_polarity = 0;
1401 int scale_factor = 0;
1403 int dmv_surface_valid = 0;
1409 int interpolation_mode = 0;
1411 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1412 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1414 profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1415 dquant = pic_param->pic_quantizer_fields.bits.dquant;
1416 dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1417 dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1418 dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1419 dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1420 dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1421 alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1424 alt_pquant_config = 0;
1425 alt_pquant_edge_mask = 0;
1426 } else if (dquant == 2) {
1427 alt_pquant_config = 1;
1428 alt_pquant_edge_mask = 0xf;
1430 assert(dquant == 1);
1431 if (dquantfrm == 0) {
1432 alt_pquant_config = 0;
1433 alt_pquant_edge_mask = 0;
1436 assert(dquantfrm == 1);
1437 alt_pquant_config = 1;
1439 switch (dqprofile) {
1441 if (dqbilevel == 0) {
1442 alt_pquant_config = 2;
1443 alt_pquant_edge_mask = 0;
1445 assert(dqbilevel == 1);
1446 alt_pquant_config = 3;
1447 alt_pquant_edge_mask = 0;
1452 alt_pquant_edge_mask = 0xf;
1457 alt_pquant_edge_mask = 0x9;
1459 alt_pquant_edge_mask = (0x3 << dqdbedge);
1464 alt_pquant_edge_mask = (0x1 << dqsbedge);
1473 if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1474 assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1475 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1477 assert(pic_param->mv_fields.bits.mv_mode < 4);
1478 unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1481 if (pic_param->sequence_fields.bits.interlace == 1 &&
1482 pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1483 /* FIXME: calculate reference field picture polarity */
1485 ref_field_pic_polarity = 0;
1488 if (pic_param->b_picture_fraction < 21)
1489 scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1491 picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1493 if (profile == GEN7_VC1_ADVANCED_PROFILE &&
1494 picture_type == GEN7_VC1_I_PICTURE)
1495 picture_type = GEN7_VC1_BI_PICTURE;
1497 if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1498 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1500 trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1503 * 8.3.6.2.1 Transform Type Selection
1504 * If variable-sized transform coding is not enabled,
1505 * then the 8x8 transform shall be used for all blocks.
1506 * it is also MFX_VC1_PIC_STATE requirement.
1508 if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1509 pic_param->transform_fields.bits.mb_level_transform_type_flag = 1;
1510 pic_param->transform_fields.bits.frame_level_transform_type = 0;
1514 if (picture_type == GEN7_VC1_B_PICTURE) {
1515 struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1517 obj_surface = decode_state->reference_objects[1];
1520 gen7_vc1_surface = obj_surface->private_data;
1522 if (!gen7_vc1_surface ||
1523 (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1524 va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1525 dmv_surface_valid = 0;
1527 dmv_surface_valid = 1;
1530 assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1532 if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1533 fcm = pic_param->picture_fields.bits.frame_coding_mode;
1535 if (pic_param->picture_fields.bits.top_field_first)
1541 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1542 brfd = pic_param->reference_fields.bits.reference_distance;
1543 brfd = (scale_factor * brfd) >> 8;
1544 brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1551 if (profile != GEN7_VC1_ADVANCED_PROFILE){
1552 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1553 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1557 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1558 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1561 if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1562 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1563 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1565 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1566 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1572 assert(pic_param->conditional_overlap_flag < 3);
1573 assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1575 if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1576 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1577 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1578 interpolation_mode = 9; /* Half-pel bilinear */
1579 else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1580 (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1581 pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1582 interpolation_mode = 1; /* Half-pel bicubic */
1584 interpolation_mode = 0; /* Quarter-pel bicubic */
1586 BEGIN_BCS_BATCH(batch, 6);
1587 OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1588 OUT_BCS_BATCH(batch,
1589 (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1590 ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1591 OUT_BCS_BATCH(batch,
1592 ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1593 dmv_surface_valid << 15 |
1594 (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1595 pic_param->rounding_control << 13 |
1596 pic_param->sequence_fields.bits.syncmarker << 12 |
1597 interpolation_mode << 8 |
1598 0 << 7 | /* FIXME: scale up or down ??? */
1599 pic_param->range_reduction_frame << 6 |
1600 pic_param->entrypoint_fields.bits.loopfilter << 5 |
1602 !pic_param->picture_fields.bits.is_first_field << 3 |
1603 (pic_param->sequence_fields.bits.profile == 3) << 0);
1604 OUT_BCS_BATCH(batch,
1605 va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1606 picture_type << 26 |
1609 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1611 OUT_BCS_BATCH(batch,
1612 unified_mv_mode << 28 |
1613 pic_param->mv_fields.bits.four_mv_switch << 27 |
1614 pic_param->fast_uvmc_flag << 26 |
1615 ref_field_pic_polarity << 25 |
1616 pic_param->reference_fields.bits.num_reference_pictures << 24 |
1617 pic_param->reference_fields.bits.reference_distance << 20 |
1618 pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1619 pic_param->mv_fields.bits.extended_dmv_range << 10 |
1620 pic_param->mv_fields.bits.extended_mv_range << 8 |
1621 alt_pquant_edge_mask << 4 |
1622 alt_pquant_config << 2 |
1623 pic_param->pic_quantizer_fields.bits.half_qp << 1 |
1624 pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1625 OUT_BCS_BATCH(batch,
1626 !!pic_param->bitplane_present.value << 31 |
1627 !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1628 !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1629 !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1630 !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1631 !pic_param->bitplane_present.flags.bp_overflags << 26 |
1632 !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1633 !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1634 pic_param->mv_fields.bits.mv_table << 20 |
1635 pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1636 pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1637 pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1638 pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1639 pic_param->mb_mode_table << 8 |
1641 pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1642 pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1643 pic_param->cbp_table << 0);
1644 ADVANCE_BCS_BATCH(batch);
1648 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1649 struct decode_state *decode_state,
1650 struct gen7_mfd_context *gen7_mfd_context)
1652 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1653 VAPictureParameterBufferVC1 *pic_param;
1654 int intensitycomp_single;
1656 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1657 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1659 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1660 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1661 intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1663 BEGIN_BCS_BATCH(batch, 6);
1664 OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1665 OUT_BCS_BATCH(batch,
1666 0 << 14 | /* FIXME: double ??? */
1668 intensitycomp_single << 10 |
1669 intensitycomp_single << 8 |
1670 0 << 4 | /* FIXME: interlace mode */
1672 OUT_BCS_BATCH(batch,
1673 pic_param->luma_shift << 16 |
1674 pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1675 OUT_BCS_BATCH(batch, 0);
1676 OUT_BCS_BATCH(batch, 0);
1677 OUT_BCS_BATCH(batch, 0);
1678 ADVANCE_BCS_BATCH(batch);
1682 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1683 struct decode_state *decode_state,
1684 struct gen7_mfd_context *gen7_mfd_context)
1686 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1687 struct object_surface *obj_surface;
1688 dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1690 obj_surface = decode_state->render_object;
1692 if (obj_surface && obj_surface->private_data) {
1693 dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1696 obj_surface = decode_state->reference_objects[1];
1698 if (obj_surface && obj_surface->private_data) {
1699 dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1702 BEGIN_BCS_BATCH(batch, 7);
1703 OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1705 if (dmv_write_buffer)
1706 OUT_BCS_RELOC(batch, dmv_write_buffer,
1707 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1710 OUT_BCS_BATCH(batch, 0);
1712 OUT_BCS_BATCH(batch, 0);
1713 OUT_BCS_BATCH(batch, 0);
1715 if (dmv_read_buffer)
1716 OUT_BCS_RELOC(batch, dmv_read_buffer,
1717 I915_GEM_DOMAIN_INSTRUCTION, 0,
1720 OUT_BCS_BATCH(batch, 0);
1722 OUT_BCS_BATCH(batch, 0);
1723 OUT_BCS_BATCH(batch, 0);
1725 ADVANCE_BCS_BATCH(batch);
1729 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1731 int out_slice_data_bit_offset;
1732 int slice_header_size = in_slice_data_bit_offset / 8;
1736 out_slice_data_bit_offset = in_slice_data_bit_offset;
1738 for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1739 if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1744 out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1747 return out_slice_data_bit_offset;
1751 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1752 VAPictureParameterBufferVC1 *pic_param,
1753 VASliceParameterBufferVC1 *slice_param,
1754 VASliceParameterBufferVC1 *next_slice_param,
1755 dri_bo *slice_data_bo,
1756 struct gen7_mfd_context *gen7_mfd_context)
1758 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1759 int next_slice_start_vert_pos;
1760 int macroblock_offset;
1761 uint8_t *slice_data = NULL;
1763 dri_bo_map(slice_data_bo, 0);
1764 slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1765 macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
1766 slice_param->macroblock_offset,
1767 pic_param->sequence_fields.bits.profile);
1768 dri_bo_unmap(slice_data_bo);
1770 if (next_slice_param)
1771 next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1773 next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1775 BEGIN_BCS_BATCH(batch, 5);
1776 OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1777 OUT_BCS_BATCH(batch,
1778 slice_param->slice_data_size - (macroblock_offset >> 3));
1779 OUT_BCS_BATCH(batch,
1780 slice_param->slice_data_offset + (macroblock_offset >> 3));
1781 OUT_BCS_BATCH(batch,
1782 slice_param->slice_vertical_position << 16 |
1783 next_slice_start_vert_pos << 0);
1784 OUT_BCS_BATCH(batch,
1785 (macroblock_offset & 0x7));
1786 ADVANCE_BCS_BATCH(batch);
1790 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1791 struct decode_state *decode_state,
1792 struct gen7_mfd_context *gen7_mfd_context)
1794 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1795 VAPictureParameterBufferVC1 *pic_param;
1796 VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1797 dri_bo *slice_data_bo;
1800 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1801 pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1803 gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1804 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1805 intel_batchbuffer_emit_mi_flush(batch);
1806 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1807 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1808 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1809 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1810 gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1811 gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1812 gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1814 for (j = 0; j < decode_state->num_slice_params; j++) {
1815 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1816 slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1817 slice_data_bo = decode_state->slice_datas[j]->bo;
1818 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1820 if (j == decode_state->num_slice_params - 1)
1821 next_slice_group_param = NULL;
1823 next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1825 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1826 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1828 if (i < decode_state->slice_params[j]->num_elements - 1)
1829 next_slice_param = slice_param + 1;
1831 next_slice_param = next_slice_group_param;
1833 gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1838 intel_batchbuffer_end_atomic(batch);
1839 intel_batchbuffer_flush(batch);
1843 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1844 struct decode_state *decode_state,
1845 struct gen7_mfd_context *gen7_mfd_context)
1847 struct object_surface *obj_surface;
1848 VAPictureParameterBufferJPEGBaseline *pic_param;
1849 int subsampling = SUBSAMPLE_YUV420;
1850 int fourcc = VA_FOURCC_IMC3;
1852 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1854 if (pic_param->num_components == 1)
1855 subsampling = SUBSAMPLE_YUV400;
1856 else if (pic_param->num_components == 3) {
1857 int h1 = pic_param->components[0].h_sampling_factor;
1858 int h2 = pic_param->components[1].h_sampling_factor;
1859 int h3 = pic_param->components[2].h_sampling_factor;
1860 int v1 = pic_param->components[0].v_sampling_factor;
1861 int v2 = pic_param->components[1].v_sampling_factor;
1862 int v3 = pic_param->components[2].v_sampling_factor;
1864 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1865 v1 == 2 && v2 == 1 && v3 == 1) {
1866 subsampling = SUBSAMPLE_YUV420;
1867 fourcc = VA_FOURCC_IMC3;
1868 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1869 v1 == 1 && v2 == 1 && v3 == 1) {
1870 subsampling = SUBSAMPLE_YUV422H;
1871 fourcc = VA_FOURCC_422H;
1872 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1873 v1 == 1 && v2 == 1 && v3 == 1) {
1874 subsampling = SUBSAMPLE_YUV444;
1875 fourcc = VA_FOURCC_444P;
1876 } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1877 v1 == 1 && v2 == 1 && v3 == 1) {
1878 subsampling = SUBSAMPLE_YUV411;
1879 fourcc = VA_FOURCC_411P;
1880 } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1881 v1 == 2 && v2 == 1 && v3 == 1) {
1882 subsampling = SUBSAMPLE_YUV422V;
1883 fourcc = VA_FOURCC_422V;
1884 } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1885 v1 == 2 && v2 == 2 && v3 == 2) {
1886 subsampling = SUBSAMPLE_YUV422H;
1887 fourcc = VA_FOURCC_422H;
1888 } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1889 v1 == 2 && v2 == 1 && v3 == 1) {
1890 subsampling = SUBSAMPLE_YUV422V;
1891 fourcc = VA_FOURCC_422V;
1899 /* Current decoded picture */
1900 obj_surface = decode_state->render_object;
1901 i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1903 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1904 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1905 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1906 gen7_mfd_context->pre_deblocking_output.valid = 1;
1908 gen7_mfd_context->post_deblocking_output.bo = NULL;
1909 gen7_mfd_context->post_deblocking_output.valid = 0;
1911 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1912 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1914 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1915 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1917 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1918 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1920 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1921 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1923 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1924 gen7_mfd_context->bitplane_read_buffer.valid = 0;
1927 static const int va_to_gen7_jpeg_rotation[4] = {
1928 GEN7_JPEG_ROTATION_0,
1929 GEN7_JPEG_ROTATION_90,
1930 GEN7_JPEG_ROTATION_180,
1931 GEN7_JPEG_ROTATION_270
1935 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1936 struct decode_state *decode_state,
1937 struct gen7_mfd_context *gen7_mfd_context)
1939 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1940 VAPictureParameterBufferJPEGBaseline *pic_param;
1941 int chroma_type = GEN7_YUV420;
1942 int frame_width_in_blks;
1943 int frame_height_in_blks;
1945 assert(decode_state->pic_param && decode_state->pic_param->buffer);
1946 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1948 if (pic_param->num_components == 1)
1949 chroma_type = GEN7_YUV400;
1950 else if (pic_param->num_components == 3) {
1951 int h1 = pic_param->components[0].h_sampling_factor;
1952 int h2 = pic_param->components[1].h_sampling_factor;
1953 int h3 = pic_param->components[2].h_sampling_factor;
1954 int v1 = pic_param->components[0].v_sampling_factor;
1955 int v2 = pic_param->components[1].v_sampling_factor;
1956 int v3 = pic_param->components[2].v_sampling_factor;
1958 if (h1 == 2 && h2 == 1 && h3 == 1 &&
1959 v1 == 2 && v2 == 1 && v3 == 1)
1960 chroma_type = GEN7_YUV420;
1961 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1962 v1 == 1 && v2 == 1 && v3 == 1)
1963 chroma_type = GEN7_YUV422H_2Y;
1964 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1965 v1 == 1 && v2 == 1 && v3 == 1)
1966 chroma_type = GEN7_YUV444;
1967 else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1968 v1 == 1 && v2 == 1 && v3 == 1)
1969 chroma_type = GEN7_YUV411;
1970 else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1971 v1 == 2 && v2 == 1 && v3 == 1)
1972 chroma_type = GEN7_YUV422V_2Y;
1973 else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1974 v1 == 2 && v2 == 2 && v3 == 2)
1975 chroma_type = GEN7_YUV422H_4Y;
1976 else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1977 v1 == 2 && v2 == 1 && v3 == 1)
1978 chroma_type = GEN7_YUV422V_4Y;
1983 if (chroma_type == GEN7_YUV400 ||
1984 chroma_type == GEN7_YUV444 ||
1985 chroma_type == GEN7_YUV422V_2Y) {
1986 frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
1987 frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
1988 } else if (chroma_type == GEN7_YUV411) {
1989 frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
1990 frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
1992 frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
1993 frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
1996 BEGIN_BCS_BATCH(batch, 3);
1997 OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
1998 OUT_BCS_BATCH(batch,
1999 (va_to_gen7_jpeg_rotation[0] << 4) | /* without rotation */
2000 (chroma_type << 0));
2001 OUT_BCS_BATCH(batch,
2002 ((frame_height_in_blks - 1) << 16) | /* FrameHeightInBlks */
2003 ((frame_width_in_blks - 1) << 0)); /* FrameWidthInBlks */
2004 ADVANCE_BCS_BATCH(batch);
2007 static const int va_to_gen7_jpeg_hufftable[2] = {
2013 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2014 struct decode_state *decode_state,
2015 struct gen7_mfd_context *gen7_mfd_context,
2018 VAHuffmanTableBufferJPEGBaseline *huffman_table;
2019 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2022 if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2025 huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2027 for (index = 0; index < num_tables; index++) {
2028 int id = va_to_gen7_jpeg_hufftable[index];
2029 if (!huffman_table->load_huffman_table[index])
2031 BEGIN_BCS_BATCH(batch, 53);
2032 OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2033 OUT_BCS_BATCH(batch, id);
2034 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2035 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2036 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2037 intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2038 ADVANCE_BCS_BATCH(batch);
2042 static const int va_to_gen7_jpeg_qm[5] = {
2044 MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2045 MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2046 MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2047 MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2051 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2052 struct decode_state *decode_state,
2053 struct gen7_mfd_context *gen7_mfd_context)
2055 VAPictureParameterBufferJPEGBaseline *pic_param;
2056 VAIQMatrixBufferJPEGBaseline *iq_matrix;
2059 if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2062 iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2063 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2065 assert(pic_param->num_components <= 3);
2067 for (index = 0; index < pic_param->num_components; index++) {
2068 int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2070 unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2071 unsigned char raster_qm[64];
2074 if (id > 4 || id < 1)
2077 if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2080 qm_type = va_to_gen7_jpeg_qm[id];
2082 for (j = 0; j < 64; j++)
2083 raster_qm[zigzag_direct[j]] = qm[j];
2085 gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2090 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2091 VAPictureParameterBufferJPEGBaseline *pic_param,
2092 VASliceParameterBufferJPEGBaseline *slice_param,
2093 VASliceParameterBufferJPEGBaseline *next_slice_param,
2094 dri_bo *slice_data_bo,
2095 struct gen7_mfd_context *gen7_mfd_context)
2097 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2098 int scan_component_mask = 0;
2101 assert(slice_param->num_components > 0);
2102 assert(slice_param->num_components < 4);
2103 assert(slice_param->num_components <= pic_param->num_components);
2105 for (i = 0; i < slice_param->num_components; i++) {
2106 switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2108 scan_component_mask |= (1 << 0);
2111 scan_component_mask |= (1 << 1);
2114 scan_component_mask |= (1 << 2);
2122 BEGIN_BCS_BATCH(batch, 6);
2123 OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2124 OUT_BCS_BATCH(batch,
2125 slice_param->slice_data_size);
2126 OUT_BCS_BATCH(batch,
2127 slice_param->slice_data_offset);
2128 OUT_BCS_BATCH(batch,
2129 slice_param->slice_horizontal_position << 16 |
2130 slice_param->slice_vertical_position << 0);
2131 OUT_BCS_BATCH(batch,
2132 ((slice_param->num_components != 1) << 30) | /* interleaved */
2133 (scan_component_mask << 27) | /* scan components */
2134 (0 << 26) | /* disable interrupt allowed */
2135 (slice_param->num_mcus << 0)); /* MCU count */
2136 OUT_BCS_BATCH(batch,
2137 (slice_param->restart_interval << 0)); /* RestartInterval */
2138 ADVANCE_BCS_BATCH(batch);
2141 /* Workaround for JPEG decoding on Ivybridge */
2147 unsigned char data[32];
2149 int data_bit_offset;
2151 } gen7_jpeg_wa_clip = {
2155 0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2156 0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2164 gen8_jpeg_wa_init(VADriverContextP ctx,
2165 struct gen7_mfd_context *gen7_mfd_context)
2167 struct i965_driver_data *i965 = i965_driver_data(ctx);
2169 struct object_surface *obj_surface;
2171 if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2172 i965_DestroySurfaces(ctx,
2173 &gen7_mfd_context->jpeg_wa_surface_id,
2176 status = i965_CreateSurfaces(ctx,
2177 gen7_jpeg_wa_clip.width,
2178 gen7_jpeg_wa_clip.height,
2179 VA_RT_FORMAT_YUV420,
2181 &gen7_mfd_context->jpeg_wa_surface_id);
2182 assert(status == VA_STATUS_SUCCESS);
2184 obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2185 assert(obj_surface);
2186 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2187 gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2189 if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2190 gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2194 dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2196 gen7_jpeg_wa_clip.data_size,
2197 gen7_jpeg_wa_clip.data);
2202 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2203 struct gen7_mfd_context *gen7_mfd_context)
2205 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2207 BEGIN_BCS_BATCH(batch, 5);
2208 OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2209 OUT_BCS_BATCH(batch,
2210 (MFX_LONG_MODE << 17) | /* Currently only support long format */
2211 (MFD_MODE_VLD << 15) | /* VLD mode */
2212 (0 << 10) | /* disable Stream-Out */
2213 (0 << 9) | /* Post Deblocking Output */
2214 (1 << 8) | /* Pre Deblocking Output */
2215 (0 << 5) | /* not in stitch mode */
2216 (MFX_CODEC_DECODE << 4) | /* decoding mode */
2217 (MFX_FORMAT_AVC << 0));
2218 OUT_BCS_BATCH(batch,
2219 (0 << 4) | /* terminate if AVC motion and POC table error occurs */
2220 (0 << 3) | /* terminate if AVC mbdata error occurs */
2221 (0 << 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
2224 OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
2225 OUT_BCS_BATCH(batch, 0); /* reserved */
2226 ADVANCE_BCS_BATCH(batch);
2230 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2231 struct gen7_mfd_context *gen7_mfd_context)
2233 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2234 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2236 BEGIN_BCS_BATCH(batch, 6);
2237 OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2238 OUT_BCS_BATCH(batch, 0);
2239 OUT_BCS_BATCH(batch,
2240 ((obj_surface->orig_width - 1) << 18) |
2241 ((obj_surface->orig_height - 1) << 4));
2242 OUT_BCS_BATCH(batch,
2243 (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2244 (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2245 (0 << 22) | /* surface object control state, ignored */
2246 ((obj_surface->width - 1) << 3) | /* pitch */
2247 (0 << 2) | /* must be 0 */
2248 (1 << 1) | /* must be tiled */
2249 (I965_TILEWALK_YMAJOR << 0)); /* tile walk, must be 1 */
2250 OUT_BCS_BATCH(batch,
2251 (0 << 16) | /* X offset for U(Cb), must be 0 */
2252 (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2253 OUT_BCS_BATCH(batch,
2254 (0 << 16) | /* X offset for V(Cr), must be 0 */
2255 (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2256 ADVANCE_BCS_BATCH(batch);
2260 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2261 struct gen7_mfd_context *gen7_mfd_context)
2263 struct i965_driver_data *i965 = i965_driver_data(ctx);
2264 struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2265 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2269 intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2274 BEGIN_BCS_BATCH(batch, 61);
2275 OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2276 OUT_BCS_RELOC(batch,
2278 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2280 OUT_BCS_BATCH(batch, 0);
2281 OUT_BCS_BATCH(batch, 0);
2284 OUT_BCS_BATCH(batch, 0); /* post deblocking */
2285 OUT_BCS_BATCH(batch, 0);
2286 OUT_BCS_BATCH(batch, 0);
2288 /* uncompressed-video & stream out 7-12 */
2289 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2290 OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2291 OUT_BCS_BATCH(batch, 0);
2292 OUT_BCS_BATCH(batch, 0);
2293 OUT_BCS_BATCH(batch, 0);
2294 OUT_BCS_BATCH(batch, 0);
2296 /* the DW 13-15 is for intra row store scratch */
2297 OUT_BCS_RELOC(batch,
2299 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2301 OUT_BCS_BATCH(batch, 0);
2302 OUT_BCS_BATCH(batch, 0);
2304 /* the DW 16-18 is for deblocking filter */
2305 OUT_BCS_BATCH(batch, 0);
2306 OUT_BCS_BATCH(batch, 0);
2307 OUT_BCS_BATCH(batch, 0);
2310 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2311 OUT_BCS_BATCH(batch, 0);
2312 OUT_BCS_BATCH(batch, 0);
2314 OUT_BCS_BATCH(batch, 0);
2316 /* the DW52-54 is for mb status address */
2317 OUT_BCS_BATCH(batch, 0);
2318 OUT_BCS_BATCH(batch, 0);
2319 OUT_BCS_BATCH(batch, 0);
2320 /* the DW56-60 is for ILDB & second ILDB address */
2321 OUT_BCS_BATCH(batch, 0);
2322 OUT_BCS_BATCH(batch, 0);
2323 OUT_BCS_BATCH(batch, 0);
2324 OUT_BCS_BATCH(batch, 0);
2325 OUT_BCS_BATCH(batch, 0);
2326 OUT_BCS_BATCH(batch, 0);
2328 ADVANCE_BCS_BATCH(batch);
2330 dri_bo_unreference(intra_bo);
2334 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2335 struct gen7_mfd_context *gen7_mfd_context)
2337 struct i965_driver_data *i965 = i965_driver_data(ctx);
2338 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2339 dri_bo *bsd_mpc_bo, *mpr_bo;
2341 bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2342 "bsd mpc row store",
2343 11520, /* 1.5 * 120 * 64 */
2346 mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2348 7680, /* 1. 0 * 120 * 64 */
2351 BEGIN_BCS_BATCH(batch, 10);
2352 OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2354 OUT_BCS_RELOC(batch,
2356 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2359 OUT_BCS_BATCH(batch, 0);
2360 OUT_BCS_BATCH(batch, 0);
2362 OUT_BCS_RELOC(batch,
2364 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2366 OUT_BCS_BATCH(batch, 0);
2367 OUT_BCS_BATCH(batch, 0);
2369 OUT_BCS_BATCH(batch, 0);
2370 OUT_BCS_BATCH(batch, 0);
2371 OUT_BCS_BATCH(batch, 0);
2373 ADVANCE_BCS_BATCH(batch);
2375 dri_bo_unreference(bsd_mpc_bo);
2376 dri_bo_unreference(mpr_bo);
2380 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2381 struct gen7_mfd_context *gen7_mfd_context)
2387 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2388 struct gen7_mfd_context *gen7_mfd_context)
2390 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2392 int mbaff_frame_flag = 0;
2393 unsigned int width_in_mbs = 1, height_in_mbs = 1;
2395 BEGIN_BCS_BATCH(batch, 16);
2396 OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2397 OUT_BCS_BATCH(batch,
2398 width_in_mbs * height_in_mbs);
2399 OUT_BCS_BATCH(batch,
2400 ((height_in_mbs - 1) << 16) |
2401 ((width_in_mbs - 1) << 0));
2402 OUT_BCS_BATCH(batch,
2407 (0 << 12) | /* differ from GEN6 */
2410 OUT_BCS_BATCH(batch,
2411 (1 << 10) | /* 4:2:0 */
2412 (1 << 7) | /* CABAC */
2418 (mbaff_frame_flag << 1) |
2420 OUT_BCS_BATCH(batch, 0);
2421 OUT_BCS_BATCH(batch, 0);
2422 OUT_BCS_BATCH(batch, 0);
2423 OUT_BCS_BATCH(batch, 0);
2424 OUT_BCS_BATCH(batch, 0);
2425 OUT_BCS_BATCH(batch, 0);
2426 OUT_BCS_BATCH(batch, 0);
2427 OUT_BCS_BATCH(batch, 0);
2428 OUT_BCS_BATCH(batch, 0);
2429 OUT_BCS_BATCH(batch, 0);
2430 OUT_BCS_BATCH(batch, 0);
2431 ADVANCE_BCS_BATCH(batch);
2435 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2436 struct gen7_mfd_context *gen7_mfd_context)
2438 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2441 BEGIN_BCS_BATCH(batch, 71);
2442 OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2444 /* reference surfaces 0..15 */
2445 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2446 OUT_BCS_BATCH(batch, 0); /* top */
2447 OUT_BCS_BATCH(batch, 0); /* bottom */
2450 OUT_BCS_BATCH(batch, 0);
2452 /* the current decoding frame/field */
2453 OUT_BCS_BATCH(batch, 0); /* top */
2454 OUT_BCS_BATCH(batch, 0);
2455 OUT_BCS_BATCH(batch, 0);
2458 for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2459 OUT_BCS_BATCH(batch, 0);
2460 OUT_BCS_BATCH(batch, 0);
2463 OUT_BCS_BATCH(batch, 0);
2464 OUT_BCS_BATCH(batch, 0);
2466 ADVANCE_BCS_BATCH(batch);
2470 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2471 struct gen7_mfd_context *gen7_mfd_context)
2473 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2475 BEGIN_BCS_BATCH(batch, 11);
2476 OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2477 OUT_BCS_RELOC(batch,
2478 gen7_mfd_context->jpeg_wa_slice_data_bo,
2479 I915_GEM_DOMAIN_INSTRUCTION, 0,
2481 OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2482 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2483 OUT_BCS_BATCH(batch, 0);
2484 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2485 OUT_BCS_BATCH(batch, 0);
2486 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2487 OUT_BCS_BATCH(batch, 0);
2488 OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2489 OUT_BCS_BATCH(batch, 0);
2490 ADVANCE_BCS_BATCH(batch);
2494 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2495 struct gen7_mfd_context *gen7_mfd_context)
2497 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2499 /* the input bitsteam format on GEN7 differs from GEN6 */
2500 BEGIN_BCS_BATCH(batch, 6);
2501 OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2502 OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2503 OUT_BCS_BATCH(batch, 0);
2504 OUT_BCS_BATCH(batch,
2510 OUT_BCS_BATCH(batch,
2511 ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2514 (1 << 3) | /* LastSlice Flag */
2515 (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2516 OUT_BCS_BATCH(batch, 0);
2517 ADVANCE_BCS_BATCH(batch);
2521 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2522 struct gen7_mfd_context *gen7_mfd_context)
2524 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2525 int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2526 int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2527 int first_mb_in_slice = 0;
2528 int slice_type = SLICE_TYPE_I;
2530 BEGIN_BCS_BATCH(batch, 11);
2531 OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2532 OUT_BCS_BATCH(batch, slice_type);
2533 OUT_BCS_BATCH(batch,
2534 (num_ref_idx_l1 << 24) |
2535 (num_ref_idx_l0 << 16) |
2538 OUT_BCS_BATCH(batch,
2540 (1 << 27) | /* disable Deblocking */
2542 (gen7_jpeg_wa_clip.qp << 16) |
2545 OUT_BCS_BATCH(batch,
2546 (slice_ver_pos << 24) |
2547 (slice_hor_pos << 16) |
2548 (first_mb_in_slice << 0));
2549 OUT_BCS_BATCH(batch,
2550 (next_slice_ver_pos << 16) |
2551 (next_slice_hor_pos << 0));
2552 OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2553 OUT_BCS_BATCH(batch, 0);
2554 OUT_BCS_BATCH(batch, 0);
2555 OUT_BCS_BATCH(batch, 0);
2556 OUT_BCS_BATCH(batch, 0);
2557 ADVANCE_BCS_BATCH(batch);
2561 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2562 struct gen7_mfd_context *gen7_mfd_context)
2564 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2565 gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2566 intel_batchbuffer_emit_mi_flush(batch);
2567 gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2568 gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2569 gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2570 gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2571 gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2572 gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2573 gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2575 gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2576 gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2577 gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2583 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2584 struct decode_state *decode_state,
2585 struct gen7_mfd_context *gen7_mfd_context)
2587 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2588 VAPictureParameterBufferJPEGBaseline *pic_param;
2589 VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2590 dri_bo *slice_data_bo;
2591 int i, j, max_selector = 0;
2593 assert(decode_state->pic_param && decode_state->pic_param->buffer);
2594 pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2596 /* Currently only support Baseline DCT */
2597 gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2598 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2600 gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2602 intel_batchbuffer_emit_mi_flush(batch);
2603 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2604 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2605 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2606 gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2607 gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2609 for (j = 0; j < decode_state->num_slice_params; j++) {
2610 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2611 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2612 slice_data_bo = decode_state->slice_datas[j]->bo;
2613 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2615 if (j == decode_state->num_slice_params - 1)
2616 next_slice_group_param = NULL;
2618 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2620 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2623 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2625 if (i < decode_state->slice_params[j]->num_elements - 1)
2626 next_slice_param = slice_param + 1;
2628 next_slice_param = next_slice_group_param;
2630 for (component = 0; component < slice_param->num_components; component++) {
2631 if (max_selector < slice_param->components[component].dc_table_selector)
2632 max_selector = slice_param->components[component].dc_table_selector;
2634 if (max_selector < slice_param->components[component].ac_table_selector)
2635 max_selector = slice_param->components[component].ac_table_selector;
2642 assert(max_selector < 2);
2643 gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2645 for (j = 0; j < decode_state->num_slice_params; j++) {
2646 assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2647 slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2648 slice_data_bo = decode_state->slice_datas[j]->bo;
2649 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2651 if (j == decode_state->num_slice_params - 1)
2652 next_slice_group_param = NULL;
2654 next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2656 for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2657 assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2659 if (i < decode_state->slice_params[j]->num_elements - 1)
2660 next_slice_param = slice_param + 1;
2662 next_slice_param = next_slice_group_param;
2664 gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2669 intel_batchbuffer_end_atomic(batch);
2670 intel_batchbuffer_flush(batch);
2673 static const int vp8_dc_qlookup[128] =
2675 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17,
2676 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28,
2677 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43,
2678 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
2679 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
2680 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
2681 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2682 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2685 static const int vp8_ac_qlookup[128] =
2687 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
2688 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
2689 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
2690 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76,
2691 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108,
2692 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2693 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2694 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2697 static inline unsigned int vp8_clip_quantization_index(int index)
2708 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2709 struct decode_state *decode_state,
2710 struct gen7_mfd_context *gen7_mfd_context)
2712 struct object_surface *obj_surface;
2713 struct i965_driver_data *i965 = i965_driver_data(ctx);
2715 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2716 int width_in_mbs = (pic_param->frame_width + 15) / 16;
2717 int height_in_mbs = (pic_param->frame_height + 15) / 16;
2719 assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2720 assert(height_in_mbs > 0 && height_in_mbs <= 256);
2722 intel_update_vp8_frame_store_index(ctx,
2725 gen7_mfd_context->reference_surface);
2727 /* Current decoded picture */
2728 obj_surface = decode_state->render_object;
2729 i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2731 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2732 gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2733 dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2734 gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2736 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2737 gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2738 dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2739 gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2741 intel_ensure_vp8_segmentation_buffer(ctx,
2742 &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2744 /* The same as AVC */
2745 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2746 bo = dri_bo_alloc(i965->intel.bufmgr,
2751 gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2752 gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2754 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2755 bo = dri_bo_alloc(i965->intel.bufmgr,
2756 "deblocking filter row store",
2757 width_in_mbs * 64 * 4,
2760 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2761 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2763 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2764 bo = dri_bo_alloc(i965->intel.bufmgr,
2765 "bsd mpc row store",
2766 width_in_mbs * 64 * 2,
2769 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2770 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2772 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2773 bo = dri_bo_alloc(i965->intel.bufmgr,
2775 width_in_mbs * 64 * 2,
2778 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2779 gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2781 gen7_mfd_context->bitplane_read_buffer.valid = 0;
2785 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2786 struct decode_state *decode_state,
2787 struct gen7_mfd_context *gen7_mfd_context)
2789 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2790 VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2791 VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2792 VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2793 dri_bo *probs_bo = decode_state->probability_data->bo;
2795 unsigned int quantization_value[4][6];
2797 /* There is no safe way to error out if the segmentation buffer
2798 could not be allocated. So, instead of aborting, simply decode
2799 something even if the result may look totally inacurate */
2800 const unsigned int enable_segmentation =
2801 pic_param->pic_fields.bits.segmentation_enabled &&
2802 gen7_mfd_context->segmentation_buffer.valid;
2804 log2num = (int)log2(slice_param->num_of_partitions - 1);
2806 BEGIN_BCS_BATCH(batch, 38);
2807 OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2808 OUT_BCS_BATCH(batch,
2809 (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2810 (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2811 OUT_BCS_BATCH(batch,
2813 pic_param->pic_fields.bits.sharpness_level << 16 |
2814 pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2815 pic_param->pic_fields.bits.sign_bias_golden << 12 |
2816 pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2817 pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2818 pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2819 pic_param->pic_fields.bits.segmentation_enabled << 8 |
2820 (enable_segmentation &&
2821 !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2822 (enable_segmentation &&
2823 pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2824 (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2825 pic_param->pic_fields.bits.filter_type << 4 |
2826 (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2827 !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2829 OUT_BCS_BATCH(batch,
2830 pic_param->loop_filter_level[3] << 24 |
2831 pic_param->loop_filter_level[2] << 16 |
2832 pic_param->loop_filter_level[1] << 8 |
2833 pic_param->loop_filter_level[0] << 0);
2835 /* Quantizer Value for 4 segmetns, DW4-DW15 */
2836 for (i = 0; i < 4; i++) {
2837 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2838 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2839 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2840 /* 101581>>16 is equivalent to 155/100 */
2841 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2842 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2843 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2845 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2846 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2848 OUT_BCS_BATCH(batch,
2849 quantization_value[i][0] << 16 | /* Y1AC */
2850 quantization_value[i][1] << 0); /* Y1DC */
2851 OUT_BCS_BATCH(batch,
2852 quantization_value[i][5] << 16 | /* UVAC */
2853 quantization_value[i][4] << 0); /* UVDC */
2854 OUT_BCS_BATCH(batch,
2855 quantization_value[i][3] << 16 | /* Y2AC */
2856 quantization_value[i][2] << 0); /* Y2DC */
2859 /* CoeffProbability table for non-key frame, DW16-DW18 */
2861 OUT_BCS_RELOC(batch, probs_bo,
2862 0, I915_GEM_DOMAIN_INSTRUCTION,
2864 OUT_BCS_BATCH(batch, 0);
2865 OUT_BCS_BATCH(batch, 0);
2867 OUT_BCS_BATCH(batch, 0);
2868 OUT_BCS_BATCH(batch, 0);
2869 OUT_BCS_BATCH(batch, 0);
2872 OUT_BCS_BATCH(batch,
2873 pic_param->mb_segment_tree_probs[2] << 16 |
2874 pic_param->mb_segment_tree_probs[1] << 8 |
2875 pic_param->mb_segment_tree_probs[0] << 0);
2877 OUT_BCS_BATCH(batch,
2878 pic_param->prob_skip_false << 24 |
2879 pic_param->prob_intra << 16 |
2880 pic_param->prob_last << 8 |
2881 pic_param->prob_gf << 0);
2883 OUT_BCS_BATCH(batch,
2884 pic_param->y_mode_probs[3] << 24 |
2885 pic_param->y_mode_probs[2] << 16 |
2886 pic_param->y_mode_probs[1] << 8 |
2887 pic_param->y_mode_probs[0] << 0);
2889 OUT_BCS_BATCH(batch,
2890 pic_param->uv_mode_probs[2] << 16 |
2891 pic_param->uv_mode_probs[1] << 8 |
2892 pic_param->uv_mode_probs[0] << 0);
2894 /* MV update value, DW23-DW32 */
2895 for (i = 0; i < 2; i++) {
2896 for (j = 0; j < 20; j += 4) {
2897 OUT_BCS_BATCH(batch,
2898 (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2899 pic_param->mv_probs[i][j + 2] << 16 |
2900 pic_param->mv_probs[i][j + 1] << 8 |
2901 pic_param->mv_probs[i][j + 0] << 0);
2905 OUT_BCS_BATCH(batch,
2906 (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2907 (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2908 (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) << 8 |
2909 (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) << 0);
2911 OUT_BCS_BATCH(batch,
2912 (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2913 (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2914 (pic_param->loop_filter_deltas_mode[1] & 0x7f) << 8 |
2915 (pic_param->loop_filter_deltas_mode[0] & 0x7f) << 0);
2917 /* segmentation id stream base address, DW35-DW37 */
2918 if (enable_segmentation) {
2919 OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2920 0, I915_GEM_DOMAIN_INSTRUCTION,
2922 OUT_BCS_BATCH(batch, 0);
2923 OUT_BCS_BATCH(batch, 0);
2926 OUT_BCS_BATCH(batch, 0);
2927 OUT_BCS_BATCH(batch, 0);
2928 OUT_BCS_BATCH(batch, 0);
2930 ADVANCE_BCS_BATCH(batch);
2934 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2935 VAPictureParameterBufferVP8 *pic_param,
2936 VASliceParameterBufferVP8 *slice_param,
2937 dri_bo *slice_data_bo,
2938 struct gen7_mfd_context *gen7_mfd_context)
2940 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2942 unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2943 unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2944 unsigned int partition_size_0 = slice_param->partition_size[0];
2946 assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2947 if (used_bits == 8) {
2950 partition_size_0 -= 1;
2953 assert(slice_param->num_of_partitions >= 2);
2954 assert(slice_param->num_of_partitions <= 9);
2956 log2num = (int)log2(slice_param->num_of_partitions - 1);
2958 BEGIN_BCS_BATCH(batch, 22);
2959 OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2960 OUT_BCS_BATCH(batch,
2961 used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2962 pic_param->bool_coder_ctx.range << 8 | /* Partition 0 Count Entropy Range */
2964 (slice_param->macroblock_offset & 0x7));
2965 OUT_BCS_BATCH(batch,
2966 pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2969 OUT_BCS_BATCH(batch, partition_size_0);
2970 OUT_BCS_BATCH(batch, offset);
2971 //partion sizes in bytes are present after the above first partition when there are more than one token partition
2972 offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
2973 for (i = 1; i < 9; i++) {
2974 if (i < slice_param->num_of_partitions) {
2975 OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
2976 OUT_BCS_BATCH(batch, offset);
2978 OUT_BCS_BATCH(batch, 0);
2979 OUT_BCS_BATCH(batch, 0);
2982 offset += slice_param->partition_size[i];
2985 OUT_BCS_BATCH(batch,
2986 1 << 31 | /* concealment method */
2989 ADVANCE_BCS_BATCH(batch);
2993 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
2994 struct decode_state *decode_state,
2995 struct gen7_mfd_context *gen7_mfd_context)
2997 struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2998 VAPictureParameterBufferVP8 *pic_param;
2999 VASliceParameterBufferVP8 *slice_param;
3000 dri_bo *slice_data_bo;
3002 assert(decode_state->pic_param && decode_state->pic_param->buffer);
3003 pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3005 /* one slice per frame */
3006 if (decode_state->num_slice_params != 1 ||
3007 (!decode_state->slice_params ||
3008 !decode_state->slice_params[0] ||
3009 (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3010 (!decode_state->slice_datas ||
3011 !decode_state->slice_datas[0] ||
3012 !decode_state->slice_datas[0]->bo) ||
3013 !decode_state->probability_data) {
3014 WARN_ONCE("Wrong parameters for VP8 decoding\n");
3019 slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3020 slice_data_bo = decode_state->slice_datas[0]->bo;
3022 gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3023 intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3024 intel_batchbuffer_emit_mi_flush(batch);
3025 gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3026 gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3027 gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3028 gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3029 gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3030 gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3031 gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3032 intel_batchbuffer_end_atomic(batch);
3033 intel_batchbuffer_flush(batch);
3037 gen8_mfd_decode_picture(VADriverContextP ctx,
3039 union codec_state *codec_state,
3040 struct hw_context *hw_context)
3043 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3044 struct decode_state *decode_state = &codec_state->decode;
3047 assert(gen7_mfd_context);
3049 vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3051 if (vaStatus != VA_STATUS_SUCCESS)
3054 gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3057 case VAProfileMPEG2Simple:
3058 case VAProfileMPEG2Main:
3059 gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3062 case VAProfileH264ConstrainedBaseline:
3063 case VAProfileH264Main:
3064 case VAProfileH264High:
3065 case VAProfileH264StereoHigh:
3066 case VAProfileH264MultiviewHigh:
3067 gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3070 case VAProfileVC1Simple:
3071 case VAProfileVC1Main:
3072 case VAProfileVC1Advanced:
3073 gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3076 case VAProfileJPEGBaseline:
3077 gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3080 case VAProfileVP8Version0_3:
3081 gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3089 vaStatus = VA_STATUS_SUCCESS;
3096 gen8_mfd_context_destroy(void *hw_context)
3098 struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3100 dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3101 gen7_mfd_context->post_deblocking_output.bo = NULL;
3103 dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3104 gen7_mfd_context->pre_deblocking_output.bo = NULL;
3106 dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3107 gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3109 dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3110 gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3112 dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3113 gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3115 dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3116 gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3118 dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3119 gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3121 dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3122 gen7_mfd_context->segmentation_buffer.bo = NULL;
3124 dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3126 intel_batchbuffer_free(gen7_mfd_context->base.batch);
3127 free(gen7_mfd_context);
3130 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3131 struct gen7_mfd_context *gen7_mfd_context)
3133 gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3134 gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3135 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3136 gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3140 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3142 struct intel_driver_data *intel = intel_driver_data(ctx);
3143 struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3146 gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3147 gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3148 gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3150 for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3151 gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3152 gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3155 gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3156 gen7_mfd_context->segmentation_buffer.valid = 0;
3158 switch (obj_config->profile) {
3159 case VAProfileMPEG2Simple:
3160 case VAProfileMPEG2Main:
3161 gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3164 case VAProfileH264ConstrainedBaseline:
3165 case VAProfileH264Main:
3166 case VAProfileH264High:
3167 case VAProfileH264StereoHigh:
3168 case VAProfileH264MultiviewHigh:
3169 gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3174 return (struct hw_context *)gen7_mfd_context;