render: don't deallocate surface storage of displayed frames.
[platform/upstream/libva-intel-driver.git] / src / gen7_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include "sysdeps.h"
30
31 #include <va/va_dec_jpeg.h>
32
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
35 #include "i965_defines.h"
36 #include "i965_drv_video.h"
37 #include "i965_decoder_utils.h"
38
39 #include "gen7_mfd.h"
40 #include "intel_media.h"
41
42 static const uint32_t zigzag_direct[64] = {
43     0,   1,  8, 16,  9,  2,  3, 10,
44     17, 24, 32, 25, 18, 11,  4,  5,
45     12, 19, 26, 33, 40, 48, 41, 34,
46     27, 20, 13,  6,  7, 14, 21, 28,
47     35, 42, 49, 56, 57, 50, 43, 36,
48     29, 22, 15, 23, 30, 37, 44, 51,
49     58, 59, 52, 45, 38, 31, 39, 46,
50     53, 60, 61, 54, 47, 55, 62, 63
51 };
52
53 static void
54 gen7_mfd_init_avc_surface(VADriverContextP ctx, 
55                           VAPictureParameterBufferH264 *pic_param,
56                           struct object_surface *obj_surface)
57 {
58     struct i965_driver_data *i965 = i965_driver_data(ctx);
59     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
60     int width_in_mbs, height_in_mbs;
61
62     obj_surface->free_private_data = gen_free_avc_surface;
63     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
64     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
65
66     if (!gen7_avc_surface) {
67         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
68         assert((obj_surface->size & 0x3f) == 0);
69         obj_surface->private_data = gen7_avc_surface;
70     }
71
72     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
73                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
74
75     if (gen7_avc_surface->dmv_top == NULL) {
76         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
77                                                  "direct mv w/r buffer",
78                                                  width_in_mbs * (height_in_mbs + 1) * 64,
79                                                  0x1000);
80         assert(gen7_avc_surface->dmv_top);
81     }
82
83     if (gen7_avc_surface->dmv_bottom_flag &&
84         gen7_avc_surface->dmv_bottom == NULL) {
85         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
86                                                     "direct mv w/r buffer",
87                                                     width_in_mbs * (height_in_mbs + 1) * 64,
88                                                     0x1000);
89         assert(gen7_avc_surface->dmv_bottom);
90     }
91 }
92
93 static void
94 gen7_mfd_pipe_mode_select(VADriverContextP ctx,
95                           struct decode_state *decode_state,
96                           int standard_select,
97                           struct gen7_mfd_context *gen7_mfd_context)
98 {
99     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
100
101     assert(standard_select == MFX_FORMAT_MPEG2 ||
102            standard_select == MFX_FORMAT_AVC ||
103            standard_select == MFX_FORMAT_VC1 ||
104            standard_select == MFX_FORMAT_JPEG);
105
106     BEGIN_BCS_BATCH(batch, 5);
107     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
108     OUT_BCS_BATCH(batch,
109                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
110                   (MFD_MODE_VLD << 15) | /* VLD mode */
111                   (0 << 10) | /* disable Stream-Out */
112                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
113                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
114                   (0 << 5)  | /* not in stitch mode */
115                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
116                   (standard_select << 0));
117     OUT_BCS_BATCH(batch,
118                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
119                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
120                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
121                   (0 << 1)  |
122                   (0 << 0));
123     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
124     OUT_BCS_BATCH(batch, 0); /* reserved */
125     ADVANCE_BCS_BATCH(batch);
126 }
127
128 static void
129 gen7_mfd_surface_state(VADriverContextP ctx,
130                        struct decode_state *decode_state,
131                        int standard_select,
132                        struct gen7_mfd_context *gen7_mfd_context)
133 {
134     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
135     struct object_surface *obj_surface = decode_state->render_object;
136     unsigned int y_cb_offset;
137     unsigned int y_cr_offset;
138     unsigned int surface_format;
139
140     assert(obj_surface);
141
142     y_cb_offset = obj_surface->y_cb_offset;
143     y_cr_offset = obj_surface->y_cr_offset;
144
145     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
146         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
147
148     BEGIN_BCS_BATCH(batch, 6);
149     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
150     OUT_BCS_BATCH(batch, 0);
151     OUT_BCS_BATCH(batch,
152                   ((obj_surface->orig_height - 1) << 18) |
153                   ((obj_surface->orig_width - 1) << 4));
154     OUT_BCS_BATCH(batch,
155                   (surface_format << 28) | /* 420 planar YUV surface */
156                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
157                   (0 << 22) | /* surface object control state, ignored */
158                   ((obj_surface->width - 1) << 3) | /* pitch */
159                   (0 << 2)  | /* must be 0 */
160                   (1 << 1)  | /* must be tiled */
161                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
162     OUT_BCS_BATCH(batch,
163                   (0 << 16) | /* X offset for U(Cb), must be 0 */
164                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
165     OUT_BCS_BATCH(batch,
166                   (0 << 16) | /* X offset for V(Cr), must be 0 */
167                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
168     ADVANCE_BCS_BATCH(batch);
169 }
170
171 static void
172 gen7_mfd_pipe_buf_addr_state(VADriverContextP ctx,
173                              struct decode_state *decode_state,
174                              int standard_select,
175                              struct gen7_mfd_context *gen7_mfd_context)
176 {
177     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
178     int i;
179
180     BEGIN_BCS_BATCH(batch, 24);
181     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
182     if (gen7_mfd_context->pre_deblocking_output.valid)
183         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
184                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
185                       0);
186     else
187         OUT_BCS_BATCH(batch, 0);
188
189     if (gen7_mfd_context->post_deblocking_output.valid)
190         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
191                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192                       0);
193     else
194         OUT_BCS_BATCH(batch, 0);
195
196     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
197     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
198
199     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
200         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
201                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
202                       0);
203     else
204         OUT_BCS_BATCH(batch, 0);
205
206     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
207         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
208                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
209                       0);
210     else
211         OUT_BCS_BATCH(batch, 0);
212
213     /* DW 7..22 */
214     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
215         struct object_surface *obj_surface;
216
217         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
218             gen7_mfd_context->reference_surface[i].obj_surface &&
219             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
220             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
221
222             OUT_BCS_RELOC(batch, obj_surface->bo,
223                           I915_GEM_DOMAIN_INSTRUCTION, 0,
224                           0);
225         } else {
226             OUT_BCS_BATCH(batch, 0);
227         }
228     }
229
230     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
231     ADVANCE_BCS_BATCH(batch);
232 }
233
234 static void
235 gen7_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
236                                  dri_bo *slice_data_bo,
237                                  int standard_select,
238                                  struct gen7_mfd_context *gen7_mfd_context)
239 {
240     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
241
242     BEGIN_BCS_BATCH(batch, 11);
243     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
244     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
245     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
246     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
247     OUT_BCS_BATCH(batch, 0);
248     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
249     OUT_BCS_BATCH(batch, 0);
250     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
251     OUT_BCS_BATCH(batch, 0);
252     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
253     OUT_BCS_BATCH(batch, 0);
254     ADVANCE_BCS_BATCH(batch);
255 }
256
257 static void
258 gen7_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
259                                  struct decode_state *decode_state,
260                                  int standard_select,
261                                  struct gen7_mfd_context *gen7_mfd_context)
262 {
263     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
264
265     BEGIN_BCS_BATCH(batch, 4);
266     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
267
268     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
269         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
270                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
271                       0);
272     else
273         OUT_BCS_BATCH(batch, 0);
274
275     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
276         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
277                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
278                       0);
279     else
280         OUT_BCS_BATCH(batch, 0);
281
282     if (gen7_mfd_context->bitplane_read_buffer.valid)
283         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
284                       I915_GEM_DOMAIN_INSTRUCTION, 0,
285                       0);
286     else
287         OUT_BCS_BATCH(batch, 0);
288
289     ADVANCE_BCS_BATCH(batch);
290 }
291
292 static void
293 gen7_mfd_qm_state(VADriverContextP ctx,
294                   int qm_type,
295                   unsigned char *qm,
296                   int qm_length,
297                   struct gen7_mfd_context *gen7_mfd_context)
298 {
299     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
300     unsigned int qm_buffer[16];
301
302     assert(qm_length <= 16 * 4);
303     memcpy(qm_buffer, qm, qm_length);
304
305     BEGIN_BCS_BATCH(batch, 18);
306     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
307     OUT_BCS_BATCH(batch, qm_type << 0);
308     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
309     ADVANCE_BCS_BATCH(batch);
310 }
311
312 static void
313 gen7_mfd_avc_img_state(VADriverContextP ctx,
314                        struct decode_state *decode_state,
315                        struct gen7_mfd_context *gen7_mfd_context)
316 {
317     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
318     int img_struct;
319     int mbaff_frame_flag;
320     unsigned int width_in_mbs, height_in_mbs;
321     VAPictureParameterBufferH264 *pic_param;
322
323     assert(decode_state->pic_param && decode_state->pic_param->buffer);
324     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
325
326     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
327         img_struct = 1;
328     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
329         img_struct = 3;
330     else
331         img_struct = 0;
332
333     if ((img_struct & 0x1) == 0x1) {
334         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
335     } else {
336         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
337     }
338
339     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
340         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
341         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
342     } else {
343         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
344     }
345
346     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
347                         !pic_param->pic_fields.bits.field_pic_flag);
348
349     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
350     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
351
352     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
353     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
354            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
355     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
356
357     BEGIN_BCS_BATCH(batch, 16);
358     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
359     OUT_BCS_BATCH(batch, 
360                   (width_in_mbs * height_in_mbs - 1));
361     OUT_BCS_BATCH(batch, 
362                   ((height_in_mbs - 1) << 16) | 
363                   ((width_in_mbs - 1) << 0));
364     OUT_BCS_BATCH(batch, 
365                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
366                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
367                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
368                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
369                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
370                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
371                   (img_struct << 8));
372     OUT_BCS_BATCH(batch,
373                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
374                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
375                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
376                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
377                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
378                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
379                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
380                   (mbaff_frame_flag << 1) |
381                   (pic_param->pic_fields.bits.field_pic_flag << 0));
382     OUT_BCS_BATCH(batch, 0);
383     OUT_BCS_BATCH(batch, 0);
384     OUT_BCS_BATCH(batch, 0);
385     OUT_BCS_BATCH(batch, 0);
386     OUT_BCS_BATCH(batch, 0);
387     OUT_BCS_BATCH(batch, 0);
388     OUT_BCS_BATCH(batch, 0);
389     OUT_BCS_BATCH(batch, 0);
390     OUT_BCS_BATCH(batch, 0);
391     OUT_BCS_BATCH(batch, 0);
392     OUT_BCS_BATCH(batch, 0);
393     ADVANCE_BCS_BATCH(batch);
394 }
395
396 static void
397 gen7_mfd_avc_qm_state(VADriverContextP ctx,
398                       struct decode_state *decode_state,
399                       struct gen7_mfd_context *gen7_mfd_context)
400 {
401     VAIQMatrixBufferH264 *iq_matrix;
402     VAPictureParameterBufferH264 *pic_param;
403
404     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
405         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
406     else
407         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
408
409     assert(decode_state->pic_param && decode_state->pic_param->buffer);
410     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
411
412     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
413     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
414
415     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
416         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
417         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
418     }
419 }
420
421 static void
422 gen7_mfd_avc_directmode_state(VADriverContextP ctx,
423                               struct decode_state *decode_state,
424                               VAPictureParameterBufferH264 *pic_param,
425                               VASliceParameterBufferH264 *slice_param,
426                               struct gen7_mfd_context *gen7_mfd_context)
427 {
428     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
429     struct object_surface *obj_surface;
430     GenAvcSurface *gen7_avc_surface;
431     VAPictureH264 *va_pic;
432     int i;
433
434     BEGIN_BCS_BATCH(batch, 69);
435     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
436
437     /* reference surfaces 0..15 */
438     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
439         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
440             gen7_mfd_context->reference_surface[i].obj_surface &&
441             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
442
443             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
444             gen7_avc_surface = obj_surface->private_data;
445             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
446                           I915_GEM_DOMAIN_INSTRUCTION, 0,
447                           0);
448
449             if (gen7_avc_surface->dmv_bottom_flag == 1)
450                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
451                               I915_GEM_DOMAIN_INSTRUCTION, 0,
452                               0);
453             else
454                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
455                               I915_GEM_DOMAIN_INSTRUCTION, 0,
456                               0);
457         } else {
458             OUT_BCS_BATCH(batch, 0);
459             OUT_BCS_BATCH(batch, 0);
460         }
461     }
462
463     /* the current decoding frame/field */
464     va_pic = &pic_param->CurrPic;
465     obj_surface = decode_state->render_object;
466     assert(obj_surface->bo && obj_surface->private_data);
467     gen7_avc_surface = obj_surface->private_data;
468
469     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
470                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
471                   0);
472
473     if (gen7_avc_surface->dmv_bottom_flag == 1)
474         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
475                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
476                       0);
477     else
478         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
479                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
480                       0);
481
482     /* POC List */
483     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
484         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
485
486         if (obj_surface) {
487             const VAPictureH264 * const va_pic = avc_find_picture(
488                 obj_surface->base.id, pic_param->ReferenceFrames,
489                 ARRAY_ELEMS(pic_param->ReferenceFrames));
490
491             assert(va_pic != NULL);
492             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
493             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
494         } else {
495             OUT_BCS_BATCH(batch, 0);
496             OUT_BCS_BATCH(batch, 0);
497         }
498     }
499
500     va_pic = &pic_param->CurrPic;
501     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
502     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
503
504     ADVANCE_BCS_BATCH(batch);
505 }
506
507 static void
508 gen7_mfd_avc_slice_state(VADriverContextP ctx,
509                          VAPictureParameterBufferH264 *pic_param,
510                          VASliceParameterBufferH264 *slice_param,
511                          VASliceParameterBufferH264 *next_slice_param,
512                          struct gen7_mfd_context *gen7_mfd_context)
513 {
514     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
515     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
516     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
517     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
518     int num_ref_idx_l0, num_ref_idx_l1;
519     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
520                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
521     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
522     int slice_type;
523
524     if (slice_param->slice_type == SLICE_TYPE_I ||
525         slice_param->slice_type == SLICE_TYPE_SI) {
526         slice_type = SLICE_TYPE_I;
527     } else if (slice_param->slice_type == SLICE_TYPE_P ||
528                slice_param->slice_type == SLICE_TYPE_SP) {
529         slice_type = SLICE_TYPE_P;
530     } else { 
531         assert(slice_param->slice_type == SLICE_TYPE_B);
532         slice_type = SLICE_TYPE_B;
533     }
534
535     if (slice_type == SLICE_TYPE_I) {
536         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
537         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
538         num_ref_idx_l0 = 0;
539         num_ref_idx_l1 = 0;
540     } else if (slice_type == SLICE_TYPE_P) {
541         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
542         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
543         num_ref_idx_l1 = 0;
544     } else {
545         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
546         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
547     }
548
549     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
550     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
551     slice_ver_pos = first_mb_in_slice / width_in_mbs;
552
553     if (next_slice_param) {
554         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
555         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
556         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
557     } else {
558         next_slice_hor_pos = 0;
559         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
560     }
561
562     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
563     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
564     OUT_BCS_BATCH(batch, slice_type);
565     OUT_BCS_BATCH(batch, 
566                   (num_ref_idx_l1 << 24) |
567                   (num_ref_idx_l0 << 16) |
568                   (slice_param->chroma_log2_weight_denom << 8) |
569                   (slice_param->luma_log2_weight_denom << 0));
570     OUT_BCS_BATCH(batch, 
571                   (slice_param->direct_spatial_mv_pred_flag << 29) |
572                   (slice_param->disable_deblocking_filter_idc << 27) |
573                   (slice_param->cabac_init_idc << 24) |
574                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
575                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
576                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
577     OUT_BCS_BATCH(batch, 
578                   (slice_ver_pos << 24) |
579                   (slice_hor_pos << 16) | 
580                   (first_mb_in_slice << 0));
581     OUT_BCS_BATCH(batch,
582                   (next_slice_ver_pos << 16) |
583                   (next_slice_hor_pos << 0));
584     OUT_BCS_BATCH(batch, 
585                   (next_slice_param == NULL) << 19); /* last slice flag */
586     OUT_BCS_BATCH(batch, 0);
587     OUT_BCS_BATCH(batch, 0);
588     OUT_BCS_BATCH(batch, 0);
589     OUT_BCS_BATCH(batch, 0);
590     ADVANCE_BCS_BATCH(batch);
591 }
592
593 static inline void
594 gen7_mfd_avc_ref_idx_state(VADriverContextP ctx,
595                            VAPictureParameterBufferH264 *pic_param,
596                            VASliceParameterBufferH264 *slice_param,
597                            struct gen7_mfd_context *gen7_mfd_context)
598 {
599     gen6_send_avc_ref_idx_state(
600         gen7_mfd_context->base.batch,
601         slice_param,
602         gen7_mfd_context->reference_surface
603     );
604 }
605
606 static void
607 gen7_mfd_avc_weightoffset_state(VADriverContextP ctx,
608                                 VAPictureParameterBufferH264 *pic_param,
609                                 VASliceParameterBufferH264 *slice_param,
610                                 struct gen7_mfd_context *gen7_mfd_context)
611 {
612     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
613     int i, j, num_weight_offset_table = 0;
614     short weightoffsets[32 * 6];
615
616     if ((slice_param->slice_type == SLICE_TYPE_P ||
617          slice_param->slice_type == SLICE_TYPE_SP) &&
618         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
619         num_weight_offset_table = 1;
620     }
621     
622     if ((slice_param->slice_type == SLICE_TYPE_B) &&
623         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
624         num_weight_offset_table = 2;
625     }
626
627     for (i = 0; i < num_weight_offset_table; i++) {
628         BEGIN_BCS_BATCH(batch, 98);
629         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
630         OUT_BCS_BATCH(batch, i);
631
632         if (i == 0) {
633             for (j = 0; j < 32; j++) {
634                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
635                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
636                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
637                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
638                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
639                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
640             }
641         } else {
642             for (j = 0; j < 32; j++) {
643                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
644                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
645                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
646                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
647                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
648                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
649             }
650         }
651
652         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
653         ADVANCE_BCS_BATCH(batch);
654     }
655 }
656
657 static void
658 gen7_mfd_avc_bsd_object(VADriverContextP ctx,
659                         VAPictureParameterBufferH264 *pic_param,
660                         VASliceParameterBufferH264 *slice_param,
661                         dri_bo *slice_data_bo,
662                         VASliceParameterBufferH264 *next_slice_param,
663                         struct gen7_mfd_context *gen7_mfd_context)
664 {
665     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
666     unsigned int slice_data_bit_offset;
667
668     slice_data_bit_offset = avc_get_first_mb_bit_offset(
669         slice_data_bo,
670         slice_param,
671         pic_param->pic_fields.bits.entropy_coding_mode_flag
672     );
673
674     /* the input bitsteam format on GEN7 differs from GEN6 */
675     BEGIN_BCS_BATCH(batch, 6);
676     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
677     OUT_BCS_BATCH(batch, 
678                   (slice_param->slice_data_size - slice_param->slice_data_offset));
679     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
680     OUT_BCS_BATCH(batch,
681                   (0 << 31) |
682                   (0 << 14) |
683                   (0 << 12) |
684                   (0 << 10) |
685                   (0 << 8));
686     OUT_BCS_BATCH(batch,
687                   ((slice_data_bit_offset >> 3) << 16) |
688                   (1 << 7)  |
689                   (0 << 5)  |
690                   (0 << 4)  |
691                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
692                   (slice_data_bit_offset & 0x7));
693     OUT_BCS_BATCH(batch, 0);
694     ADVANCE_BCS_BATCH(batch);
695 }
696
697 static inline void
698 gen7_mfd_avc_context_init(
699     VADriverContextP         ctx,
700     struct gen7_mfd_context *gen7_mfd_context
701 )
702 {
703     /* Initialize flat scaling lists */
704     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
705 }
706
707 static void
708 gen7_mfd_avc_decode_init(VADriverContextP ctx,
709                          struct decode_state *decode_state,
710                          struct gen7_mfd_context *gen7_mfd_context)
711 {
712     VAPictureParameterBufferH264 *pic_param;
713     VASliceParameterBufferH264 *slice_param;
714     struct i965_driver_data *i965 = i965_driver_data(ctx);
715     struct object_surface *obj_surface;
716     dri_bo *bo;
717     int i, j, enable_avc_ildb = 0;
718     unsigned int width_in_mbs, height_in_mbs;
719
720     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
721         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
722         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
723
724         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
725             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
726             assert((slice_param->slice_type == SLICE_TYPE_I) ||
727                    (slice_param->slice_type == SLICE_TYPE_SI) ||
728                    (slice_param->slice_type == SLICE_TYPE_P) ||
729                    (slice_param->slice_type == SLICE_TYPE_SP) ||
730                    (slice_param->slice_type == SLICE_TYPE_B));
731
732             if (slice_param->disable_deblocking_filter_idc != 1) {
733                 enable_avc_ildb = 1;
734                 break;
735             }
736
737             slice_param++;
738         }
739     }
740
741     assert(decode_state->pic_param && decode_state->pic_param->buffer);
742     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
743     intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
744     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
745     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
746     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
747     assert(height_in_mbs > 0 && height_in_mbs <= 256);
748
749     /* Current decoded picture */
750     obj_surface = decode_state->render_object;
751     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
752     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
753
754     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
755     gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface);
756
757     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
758     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
759     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
760     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
761
762     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
763     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
764     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
765     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
766
767     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
768     bo = dri_bo_alloc(i965->intel.bufmgr,
769                       "intra row store",
770                       width_in_mbs * 64,
771                       0x1000);
772     assert(bo);
773     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
774     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
775
776     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
777     bo = dri_bo_alloc(i965->intel.bufmgr,
778                       "deblocking filter row store",
779                       width_in_mbs * 64 * 4,
780                       0x1000);
781     assert(bo);
782     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
783     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
784
785     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
786     bo = dri_bo_alloc(i965->intel.bufmgr,
787                       "bsd mpc row store",
788                       width_in_mbs * 64 * 2,
789                       0x1000);
790     assert(bo);
791     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
792     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
793
794     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
795     bo = dri_bo_alloc(i965->intel.bufmgr,
796                       "mpr row store",
797                       width_in_mbs * 64 * 2,
798                       0x1000);
799     assert(bo);
800     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
801     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
802
803     gen7_mfd_context->bitplane_read_buffer.valid = 0;
804 }
805
806 static void
807 gen7_mfd_avc_decode_picture(VADriverContextP ctx,
808                             struct decode_state *decode_state,
809                             struct gen7_mfd_context *gen7_mfd_context)
810 {
811     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
812     VAPictureParameterBufferH264 *pic_param;
813     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
814     dri_bo *slice_data_bo;
815     int i, j;
816
817     assert(decode_state->pic_param && decode_state->pic_param->buffer);
818     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
819     gen7_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
820
821     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
822     intel_batchbuffer_emit_mi_flush(batch);
823     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
824     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
825     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
826     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
827     gen7_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
828     gen7_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
829
830     for (j = 0; j < decode_state->num_slice_params; j++) {
831         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
832         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
833         slice_data_bo = decode_state->slice_datas[j]->bo;
834         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
835
836         if (j == decode_state->num_slice_params - 1)
837             next_slice_group_param = NULL;
838         else
839             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
840
841         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
842             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
843             assert((slice_param->slice_type == SLICE_TYPE_I) ||
844                    (slice_param->slice_type == SLICE_TYPE_SI) ||
845                    (slice_param->slice_type == SLICE_TYPE_P) ||
846                    (slice_param->slice_type == SLICE_TYPE_SP) ||
847                    (slice_param->slice_type == SLICE_TYPE_B));
848
849             if (i < decode_state->slice_params[j]->num_elements - 1)
850                 next_slice_param = slice_param + 1;
851             else
852                 next_slice_param = next_slice_group_param;
853
854             gen7_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
855             gen7_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
856             gen7_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
857             gen7_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
858             gen7_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
859             slice_param++;
860         }
861     }
862
863     intel_batchbuffer_end_atomic(batch);
864     intel_batchbuffer_flush(batch);
865 }
866
867 static void
868 gen7_mfd_mpeg2_decode_init(VADriverContextP ctx,
869                            struct decode_state *decode_state,
870                            struct gen7_mfd_context *gen7_mfd_context)
871 {
872     VAPictureParameterBufferMPEG2 *pic_param;
873     struct i965_driver_data *i965 = i965_driver_data(ctx);
874     struct object_surface *obj_surface;
875     dri_bo *bo;
876     unsigned int width_in_mbs;
877
878     assert(decode_state->pic_param && decode_state->pic_param->buffer);
879     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
880     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
881
882     mpeg2_set_reference_surfaces(
883         ctx,
884         gen7_mfd_context->reference_surface,
885         decode_state,
886         pic_param
887     );
888
889     /* Current decoded picture */
890     obj_surface = decode_state->render_object;
891     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
892
893     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
894     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
895     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
896     gen7_mfd_context->pre_deblocking_output.valid = 1;
897
898     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
899     bo = dri_bo_alloc(i965->intel.bufmgr,
900                       "bsd mpc row store",
901                       width_in_mbs * 96,
902                       0x1000);
903     assert(bo);
904     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
905     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
906
907     gen7_mfd_context->post_deblocking_output.valid = 0;
908     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
909     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
910     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
911     gen7_mfd_context->bitplane_read_buffer.valid = 0;
912 }
913
914 static void
915 gen7_mfd_mpeg2_pic_state(VADriverContextP ctx,
916                          struct decode_state *decode_state,
917                          struct gen7_mfd_context *gen7_mfd_context)
918 {
919     struct i965_driver_data * const i965 = i965_driver_data(ctx);
920     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
921     VAPictureParameterBufferMPEG2 *pic_param;
922     unsigned int slice_concealment_disable_bit = 0;
923
924     assert(decode_state->pic_param && decode_state->pic_param->buffer);
925     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
926
927     if (IS_HASWELL(i965->intel.device_info)) {
928         /* XXX: disable concealment for now */
929         slice_concealment_disable_bit = 1;
930     }
931
932     BEGIN_BCS_BATCH(batch, 13);
933     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
934     OUT_BCS_BATCH(batch,
935                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
936                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
937                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
938                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
939                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
940                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
941                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
942                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
943                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
944                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
945                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
946                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
947     OUT_BCS_BATCH(batch,
948                   pic_param->picture_coding_type << 9);
949     OUT_BCS_BATCH(batch,
950                   (slice_concealment_disable_bit << 31) |
951                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
952                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
953     OUT_BCS_BATCH(batch, 0);
954     OUT_BCS_BATCH(batch, 0);
955     OUT_BCS_BATCH(batch, 0);
956     OUT_BCS_BATCH(batch, 0);
957     OUT_BCS_BATCH(batch, 0);
958     OUT_BCS_BATCH(batch, 0);
959     OUT_BCS_BATCH(batch, 0);
960     OUT_BCS_BATCH(batch, 0);
961     OUT_BCS_BATCH(batch, 0);
962     ADVANCE_BCS_BATCH(batch);
963 }
964
965 static void
966 gen7_mfd_mpeg2_qm_state(VADriverContextP ctx,
967                         struct decode_state *decode_state,
968                         struct gen7_mfd_context *gen7_mfd_context)
969 {
970     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
971     int i, j;
972
973     /* Update internal QM state */
974     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
975         VAIQMatrixBufferMPEG2 * const iq_matrix =
976             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
977
978         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
979             iq_matrix->load_intra_quantiser_matrix) {
980             gen_iq_matrix->load_intra_quantiser_matrix =
981                 iq_matrix->load_intra_quantiser_matrix;
982             if (iq_matrix->load_intra_quantiser_matrix) {
983                 for (j = 0; j < 64; j++)
984                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
985                         iq_matrix->intra_quantiser_matrix[j];
986             }
987         }
988
989         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
990             iq_matrix->load_non_intra_quantiser_matrix) {
991             gen_iq_matrix->load_non_intra_quantiser_matrix =
992                 iq_matrix->load_non_intra_quantiser_matrix;
993             if (iq_matrix->load_non_intra_quantiser_matrix) {
994                 for (j = 0; j < 64; j++)
995                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
996                         iq_matrix->non_intra_quantiser_matrix[j];
997             }
998         }
999     }
1000
1001     /* Commit QM state to HW */
1002     for (i = 0; i < 2; i++) {
1003         unsigned char *qm = NULL;
1004         int qm_type;
1005
1006         if (i == 0) {
1007             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1008                 qm = gen_iq_matrix->intra_quantiser_matrix;
1009                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1010             }
1011         } else {
1012             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1013                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1014                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1015             }
1016         }
1017
1018         if (!qm)
1019             continue;
1020
1021         gen7_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1022     }
1023 }
1024
1025 uint32_t mpeg2_get_slice_data_length(dri_bo *slice_data_bo, VASliceParameterBufferMPEG2 *slice_param)
1026 {
1027     uint8_t *buf;
1028     uint32_t buf_offset = slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3);
1029     uint32_t buf_size = slice_param->slice_data_size - (slice_param->macroblock_offset >> 3);
1030     uint32_t i;
1031
1032     dri_bo_map(slice_data_bo, 0);
1033     buf = (uint8_t *)slice_data_bo->virtual + buf_offset;
1034
1035     for (i = 3; i < buf_size; i++) {
1036         if (buf[i - 3] &&
1037             !buf[i - 2] &&
1038             !buf[i - 1] &&
1039             !buf[i]) {
1040             dri_bo_unmap(slice_data_bo);
1041             return i - 3 + 1;
1042         }
1043     }
1044
1045     dri_bo_unmap(slice_data_bo);
1046     return buf_size;
1047 }
1048
1049 static void
1050 gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1051                           VAPictureParameterBufferMPEG2 *pic_param,
1052                           VASliceParameterBufferMPEG2 *slice_param,
1053                           dri_bo *slice_data_bo,
1054                           VASliceParameterBufferMPEG2 *next_slice_param,
1055                           struct gen7_mfd_context *gen7_mfd_context)
1056 {
1057     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1058     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1059     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1060     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1061
1062     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1063         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1064         is_field_pic = 1;
1065     is_field_pic_wa = is_field_pic &&
1066         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1067
1068     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1069     hpos0 = slice_param->slice_horizontal_position;
1070
1071     if (next_slice_param == NULL) {
1072         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1073         hpos1 = 0;
1074     } else {
1075         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1076         hpos1 = next_slice_param->slice_horizontal_position;
1077     }
1078
1079     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1080
1081     BEGIN_BCS_BATCH(batch, 5);
1082     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1083     OUT_BCS_BATCH(batch, 
1084                   mpeg2_get_slice_data_length(slice_data_bo, slice_param));
1085     OUT_BCS_BATCH(batch, 
1086                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1087     OUT_BCS_BATCH(batch,
1088                   hpos0 << 24 |
1089                   vpos0 << 16 |
1090                   mb_count << 8 |
1091                   (next_slice_param == NULL) << 5 |
1092                   (next_slice_param == NULL) << 3 |
1093                   (slice_param->macroblock_offset & 0x7));
1094     OUT_BCS_BATCH(batch,
1095                   (slice_param->quantiser_scale_code << 24) |
1096                   (IS_HASWELL(i965->intel.device_info) ? (vpos1 << 8 | hpos1) : 0));
1097     ADVANCE_BCS_BATCH(batch);
1098 }
1099
1100 static void
1101 gen7_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1102                               struct decode_state *decode_state,
1103                               struct gen7_mfd_context *gen7_mfd_context)
1104 {
1105     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1106     VAPictureParameterBufferMPEG2 *pic_param;
1107     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1108     dri_bo *slice_data_bo;
1109     int i, j;
1110
1111     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1112     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1113
1114     gen7_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1115     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1116     intel_batchbuffer_emit_mi_flush(batch);
1117     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1118     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1119     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1120     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1121     gen7_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1122     gen7_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1123
1124     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1125         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1126             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1127
1128     for (j = 0; j < decode_state->num_slice_params; j++) {
1129         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1130         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1131         slice_data_bo = decode_state->slice_datas[j]->bo;
1132         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1133
1134         if (j == decode_state->num_slice_params - 1)
1135             next_slice_group_param = NULL;
1136         else
1137             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1138
1139         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1140             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1141
1142             if (i < decode_state->slice_params[j]->num_elements - 1)
1143                 next_slice_param = slice_param + 1;
1144             else
1145                 next_slice_param = next_slice_group_param;
1146
1147             gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
1148             slice_param++;
1149         }
1150     }
1151
1152     intel_batchbuffer_end_atomic(batch);
1153     intel_batchbuffer_flush(batch);
1154 }
1155
1156 static const int va_to_gen7_vc1_pic_type[5] = {
1157     GEN7_VC1_I_PICTURE,
1158     GEN7_VC1_P_PICTURE,
1159     GEN7_VC1_B_PICTURE,
1160     GEN7_VC1_BI_PICTURE,
1161     GEN7_VC1_P_PICTURE,
1162 };
1163
1164 static const int va_to_gen7_vc1_mv[4] = {
1165     1, /* 1-MV */
1166     2, /* 1-MV half-pel */
1167     3, /* 1-MV half-pef bilinear */
1168     0, /* Mixed MV */
1169 };
1170
1171 static const int b_picture_scale_factor[21] = {
1172     128, 85,  170, 64,  192,
1173     51,  102, 153, 204, 43,
1174     215, 37,  74,  111, 148,
1175     185, 222, 32,  96,  160, 
1176     224,
1177 };
1178
1179 static const int va_to_gen7_vc1_condover[3] = {
1180     0,
1181     2,
1182     3
1183 };
1184
1185 static const int va_to_gen7_vc1_profile[4] = {
1186     GEN7_VC1_SIMPLE_PROFILE,
1187     GEN7_VC1_MAIN_PROFILE,
1188     GEN7_VC1_RESERVED_PROFILE,
1189     GEN7_VC1_ADVANCED_PROFILE
1190 };
1191
1192 static void 
1193 gen7_mfd_free_vc1_surface(void **data)
1194 {
1195     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1196
1197     if (!gen7_vc1_surface)
1198         return;
1199
1200     dri_bo_unreference(gen7_vc1_surface->dmv);
1201     free(gen7_vc1_surface);
1202     *data = NULL;
1203 }
1204
1205 static void
1206 gen7_mfd_init_vc1_surface(VADriverContextP ctx, 
1207                           VAPictureParameterBufferVC1 *pic_param,
1208                           struct object_surface *obj_surface)
1209 {
1210     struct i965_driver_data *i965 = i965_driver_data(ctx);
1211     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1212     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1213     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1214
1215     obj_surface->free_private_data = gen7_mfd_free_vc1_surface;
1216
1217     if (!gen7_vc1_surface) {
1218         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1219         assert((obj_surface->size & 0x3f) == 0);
1220         obj_surface->private_data = gen7_vc1_surface;
1221     }
1222
1223     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1224
1225     if (gen7_vc1_surface->dmv == NULL) {
1226         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1227                                              "direct mv w/r buffer",
1228                                              width_in_mbs * height_in_mbs * 64,
1229                                              0x1000);
1230     }
1231 }
1232
1233 static void
1234 gen7_mfd_vc1_decode_init(VADriverContextP ctx,
1235                          struct decode_state *decode_state,
1236                          struct gen7_mfd_context *gen7_mfd_context)
1237 {
1238     VAPictureParameterBufferVC1 *pic_param;
1239     struct i965_driver_data *i965 = i965_driver_data(ctx);
1240     struct object_surface *obj_surface;
1241     dri_bo *bo;
1242     int width_in_mbs;
1243     int picture_type;
1244  
1245     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1246     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1247     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1248     picture_type = pic_param->picture_fields.bits.picture_type;
1249  
1250     intel_update_vc1_frame_store_index(ctx,
1251                                        decode_state,
1252                                        pic_param,
1253                                        gen7_mfd_context->reference_surface);
1254
1255     /* Current decoded picture */
1256     obj_surface = decode_state->render_object;
1257     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1258     gen7_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1259
1260     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1261     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1262     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1263     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1264
1265     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1266     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1267     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1268     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1269
1270     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1271     bo = dri_bo_alloc(i965->intel.bufmgr,
1272                       "intra row store",
1273                       width_in_mbs * 64,
1274                       0x1000);
1275     assert(bo);
1276     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1277     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1278
1279     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1280     bo = dri_bo_alloc(i965->intel.bufmgr,
1281                       "deblocking filter row store",
1282                       width_in_mbs * 7 * 64,
1283                       0x1000);
1284     assert(bo);
1285     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1286     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1287
1288     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1289     bo = dri_bo_alloc(i965->intel.bufmgr,
1290                       "bsd mpc row store",
1291                       width_in_mbs * 96,
1292                       0x1000);
1293     assert(bo);
1294     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1295     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1296
1297     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1298
1299     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1300     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1301     
1302     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1303         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1304         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1305         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1306         int src_w, src_h;
1307         uint8_t *src = NULL, *dst = NULL;
1308
1309         assert(decode_state->bit_plane->buffer);
1310         src = decode_state->bit_plane->buffer;
1311
1312         bo = dri_bo_alloc(i965->intel.bufmgr,
1313                           "VC-1 Bitplane",
1314                           bitplane_width * height_in_mbs,
1315                           0x1000);
1316         assert(bo);
1317         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1318
1319         dri_bo_map(bo, True);
1320         assert(bo->virtual);
1321         dst = bo->virtual;
1322
1323         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1324             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1325                 int src_index, dst_index;
1326                 int src_shift;
1327                 uint8_t src_value;
1328
1329                 src_index = (src_h * width_in_mbs + src_w) / 2;
1330                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1331                 src_value = ((src[src_index] >> src_shift) & 0xf);
1332
1333                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1334                     src_value |= 0x2;
1335                 }
1336
1337                 dst_index = src_w / 2;
1338                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1339             }
1340
1341             if (src_w & 1)
1342                 dst[src_w / 2] >>= 4;
1343
1344             dst += bitplane_width;
1345         }
1346
1347         dri_bo_unmap(bo);
1348     } else
1349         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1350 }
1351
1352 static void
1353 gen7_mfd_vc1_pic_state(VADriverContextP ctx,
1354                        struct decode_state *decode_state,
1355                        struct gen7_mfd_context *gen7_mfd_context)
1356 {
1357     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1358     VAPictureParameterBufferVC1 *pic_param;
1359     struct object_surface *obj_surface;
1360     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1361     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1362     int unified_mv_mode;
1363     int ref_field_pic_polarity = 0;
1364     int scale_factor = 0;
1365     int trans_ac_y = 0;
1366     int dmv_surface_valid = 0;
1367     int brfd = 0;
1368     int fcm = 0;
1369     int picture_type;
1370     int profile;
1371     int overlap;
1372     int interpolation_mode = 0;
1373
1374     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1375     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1376
1377     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1378     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1379     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1380     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1381     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1382     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1383     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1384     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1385
1386     if (dquant == 0) {
1387         alt_pquant_config = 0;
1388         alt_pquant_edge_mask = 0;
1389     } else if (dquant == 2) {
1390         alt_pquant_config = 1;
1391         alt_pquant_edge_mask = 0xf;
1392     } else {
1393         assert(dquant == 1);
1394         if (dquantfrm == 0) {
1395             alt_pquant_config = 0;
1396             alt_pquant_edge_mask = 0;
1397             alt_pq = 0;
1398         } else {
1399             assert(dquantfrm == 1);
1400             alt_pquant_config = 1;
1401
1402             switch (dqprofile) {
1403             case 3:
1404                 if (dqbilevel == 0) {
1405                     alt_pquant_config = 2;
1406                     alt_pquant_edge_mask = 0;
1407                 } else {
1408                     assert(dqbilevel == 1);
1409                     alt_pquant_config = 3;
1410                     alt_pquant_edge_mask = 0;
1411                 }
1412                 break;
1413                 
1414             case 0:
1415                 alt_pquant_edge_mask = 0xf;
1416                 break;
1417
1418             case 1:
1419                 if (dqdbedge == 3)
1420                     alt_pquant_edge_mask = 0x9;
1421                 else
1422                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1423
1424                 break;
1425
1426             case 2:
1427                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1428                 break;
1429
1430             default:
1431                 assert(0);
1432             }
1433         }
1434     }
1435
1436     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1437         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1438         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1439     } else {
1440         assert(pic_param->mv_fields.bits.mv_mode < 4);
1441         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1442     }
1443
1444     if (pic_param->sequence_fields.bits.interlace == 1 &&
1445         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1446         /* FIXME: calculate reference field picture polarity */
1447         assert(0);
1448         ref_field_pic_polarity = 0;
1449     }
1450
1451     if (pic_param->b_picture_fraction < 21)
1452         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1453
1454     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1455     
1456     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1457         picture_type == GEN7_VC1_I_PICTURE)
1458         picture_type = GEN7_VC1_BI_PICTURE;
1459
1460     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1461         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1462     else {
1463         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1464         /*
1465          * 8.3.6.2.1 Transform Type Selection
1466          * If variable-sized transform coding is not enabled,
1467          * then the 8x8 transform shall be used for all blocks.
1468          * it is also MFX_VC1_PIC_STATE requirement.
1469          */
1470         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1471             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1472             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1473         }
1474     }
1475
1476
1477     if (picture_type == GEN7_VC1_B_PICTURE) {
1478         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1479
1480         obj_surface = decode_state->reference_objects[1];
1481
1482         if (obj_surface)
1483             gen7_vc1_surface = obj_surface->private_data;
1484
1485         if (!gen7_vc1_surface || 
1486             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1487              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1488             dmv_surface_valid = 0;
1489         else
1490             dmv_surface_valid = 1;
1491     }
1492
1493     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1494
1495     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1496         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1497     else {
1498         if (pic_param->picture_fields.bits.top_field_first)
1499             fcm = 2;
1500         else
1501             fcm = 3;
1502     }
1503
1504     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1505         brfd = pic_param->reference_fields.bits.reference_distance;
1506         brfd = (scale_factor * brfd) >> 8;
1507         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1508
1509         if (brfd < 0)
1510             brfd = 0;
1511     }
1512
1513     overlap = 0;
1514     if (profile != GEN7_VC1_ADVANCED_PROFILE){
1515         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1516             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1517             overlap = 1; 
1518         }
1519     }else {
1520         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1521              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1522               overlap = 1; 
1523         }
1524         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1525             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1526              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1527                 overlap = 1; 
1528              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1529                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1530                  overlap = 1;
1531              }
1532         }
1533     } 
1534
1535     assert(pic_param->conditional_overlap_flag < 3);
1536     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1537
1538     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1539         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1540          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1541         interpolation_mode = 9; /* Half-pel bilinear */
1542     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1543              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1544               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1545         interpolation_mode = 1; /* Half-pel bicubic */
1546     else
1547         interpolation_mode = 0; /* Quarter-pel bicubic */
1548
1549     BEGIN_BCS_BATCH(batch, 6);
1550     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1551     OUT_BCS_BATCH(batch,
1552                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1553                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1554     OUT_BCS_BATCH(batch,
1555                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1556                   dmv_surface_valid << 15 |
1557                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1558                   pic_param->rounding_control << 13 |
1559                   pic_param->sequence_fields.bits.syncmarker << 12 |
1560                   interpolation_mode << 8 |
1561                   0 << 7 | /* FIXME: scale up or down ??? */
1562                   pic_param->range_reduction_frame << 6 |
1563                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1564                   overlap << 4 |
1565                   !pic_param->picture_fields.bits.is_first_field << 3 |
1566                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1567     OUT_BCS_BATCH(batch,
1568                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1569                   picture_type << 26 |
1570                   fcm << 24 |
1571                   alt_pq << 16 |
1572                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1573                   scale_factor << 0);
1574     OUT_BCS_BATCH(batch,
1575                   unified_mv_mode << 28 |
1576                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1577                   pic_param->fast_uvmc_flag << 26 |
1578                   ref_field_pic_polarity << 25 |
1579                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1580                   pic_param->reference_fields.bits.reference_distance << 20 |
1581                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1582                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1583                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1584                   alt_pquant_edge_mask << 4 |
1585                   alt_pquant_config << 2 |
1586                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1587                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1588     OUT_BCS_BATCH(batch,
1589                   !!pic_param->bitplane_present.value << 31 |
1590                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1591                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1592                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1593                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1594                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1595                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1596                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1597                   pic_param->mv_fields.bits.mv_table << 20 |
1598                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1599                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1600                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1601                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1602                   pic_param->mb_mode_table << 8 |
1603                   trans_ac_y << 6 |
1604                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1605                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1606                   pic_param->cbp_table << 0);
1607     ADVANCE_BCS_BATCH(batch);
1608 }
1609
1610 static void
1611 gen7_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1612                              struct decode_state *decode_state,
1613                              struct gen7_mfd_context *gen7_mfd_context)
1614 {
1615     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1616     VAPictureParameterBufferVC1 *pic_param;
1617     int intensitycomp_single;
1618
1619     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1620     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1621
1622     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1623     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1624     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1625
1626     BEGIN_BCS_BATCH(batch, 6);
1627     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1628     OUT_BCS_BATCH(batch,
1629                   0 << 14 | /* FIXME: double ??? */
1630                   0 << 12 |
1631                   intensitycomp_single << 10 |
1632                   intensitycomp_single << 8 |
1633                   0 << 4 | /* FIXME: interlace mode */
1634                   0);
1635     OUT_BCS_BATCH(batch,
1636                   pic_param->luma_shift << 16 |
1637                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1638     OUT_BCS_BATCH(batch, 0);
1639     OUT_BCS_BATCH(batch, 0);
1640     OUT_BCS_BATCH(batch, 0);
1641     ADVANCE_BCS_BATCH(batch);
1642 }
1643
1644
1645 static void
1646 gen7_mfd_vc1_directmode_state(VADriverContextP ctx,
1647                               struct decode_state *decode_state,
1648                               struct gen7_mfd_context *gen7_mfd_context)
1649 {
1650     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1651     struct object_surface *obj_surface;
1652     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1653
1654     obj_surface = decode_state->render_object;
1655
1656     if (obj_surface && obj_surface->private_data) {
1657         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1658     }
1659
1660     obj_surface = decode_state->reference_objects[1];
1661
1662     if (obj_surface && obj_surface->private_data) {
1663         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1664     }
1665
1666     BEGIN_BCS_BATCH(batch, 3);
1667     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
1668
1669     if (dmv_write_buffer)
1670         OUT_BCS_RELOC(batch, dmv_write_buffer,
1671                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1672                       0);
1673     else
1674         OUT_BCS_BATCH(batch, 0);
1675
1676     if (dmv_read_buffer)
1677         OUT_BCS_RELOC(batch, dmv_read_buffer,
1678                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1679                       0);
1680     else
1681         OUT_BCS_BATCH(batch, 0);
1682                   
1683     ADVANCE_BCS_BATCH(batch);
1684 }
1685
1686 static int
1687 gen7_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1688 {
1689     int out_slice_data_bit_offset;
1690     int slice_header_size = in_slice_data_bit_offset / 8;
1691     int i, j;
1692
1693     if (profile != 3)
1694         out_slice_data_bit_offset = in_slice_data_bit_offset;
1695     else {
1696         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1697             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1698                 i++, j += 2;
1699             }
1700         }
1701
1702         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1703     }
1704
1705     return out_slice_data_bit_offset;
1706 }
1707
1708 static void
1709 gen7_mfd_vc1_bsd_object(VADriverContextP ctx,
1710                         VAPictureParameterBufferVC1 *pic_param,
1711                         VASliceParameterBufferVC1 *slice_param,
1712                         VASliceParameterBufferVC1 *next_slice_param,
1713                         dri_bo *slice_data_bo,
1714                         struct gen7_mfd_context *gen7_mfd_context)
1715 {
1716     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1717     int next_slice_start_vert_pos;
1718     int macroblock_offset;
1719     uint8_t *slice_data = NULL;
1720
1721     dri_bo_map(slice_data_bo, 0);
1722     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1723     macroblock_offset = gen7_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1724                                                                slice_param->macroblock_offset,
1725                                                                pic_param->sequence_fields.bits.profile);
1726     dri_bo_unmap(slice_data_bo);
1727
1728     if (next_slice_param)
1729         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1730     else
1731         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1732
1733     BEGIN_BCS_BATCH(batch, 5);
1734     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1735     OUT_BCS_BATCH(batch, 
1736                   slice_param->slice_data_size - (macroblock_offset >> 3));
1737     OUT_BCS_BATCH(batch, 
1738                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1739     OUT_BCS_BATCH(batch,
1740                   slice_param->slice_vertical_position << 16 |
1741                   next_slice_start_vert_pos << 0);
1742     OUT_BCS_BATCH(batch,
1743                   (macroblock_offset & 0x7));
1744     ADVANCE_BCS_BATCH(batch);
1745 }
1746
1747 static void
1748 gen7_mfd_vc1_decode_picture(VADriverContextP ctx,
1749                             struct decode_state *decode_state,
1750                             struct gen7_mfd_context *gen7_mfd_context)
1751 {
1752     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1753     VAPictureParameterBufferVC1 *pic_param;
1754     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1755     dri_bo *slice_data_bo;
1756     int i, j;
1757
1758     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1759     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1760
1761     gen7_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1762     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1763     intel_batchbuffer_emit_mi_flush(batch);
1764     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1765     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1766     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1767     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1768     gen7_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1769     gen7_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1770     gen7_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1771
1772     for (j = 0; j < decode_state->num_slice_params; j++) {
1773         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1774         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1775         slice_data_bo = decode_state->slice_datas[j]->bo;
1776         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1777
1778         if (j == decode_state->num_slice_params - 1)
1779             next_slice_group_param = NULL;
1780         else
1781             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1782
1783         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1784             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1785
1786             if (i < decode_state->slice_params[j]->num_elements - 1)
1787                 next_slice_param = slice_param + 1;
1788             else
1789                 next_slice_param = next_slice_group_param;
1790
1791             gen7_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1792             slice_param++;
1793         }
1794     }
1795
1796     intel_batchbuffer_end_atomic(batch);
1797     intel_batchbuffer_flush(batch);
1798 }
1799
1800 static void
1801 gen7_mfd_jpeg_decode_init(VADriverContextP ctx,
1802                           struct decode_state *decode_state,
1803                           struct gen7_mfd_context *gen7_mfd_context)
1804 {
1805     struct object_surface *obj_surface;
1806     VAPictureParameterBufferJPEGBaseline *pic_param;
1807     int subsampling = SUBSAMPLE_YUV420;
1808     int fourcc = VA_FOURCC_IMC3;
1809
1810     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1811
1812     if (pic_param->num_components == 1) {
1813         subsampling = SUBSAMPLE_YUV400;
1814         fourcc = VA_FOURCC_Y800;
1815     } else if (pic_param->num_components == 3) {
1816         int h1 = pic_param->components[0].h_sampling_factor;
1817         int h2 = pic_param->components[1].h_sampling_factor;
1818         int h3 = pic_param->components[2].h_sampling_factor;
1819         int v1 = pic_param->components[0].v_sampling_factor;
1820         int v2 = pic_param->components[1].v_sampling_factor;
1821         int v3 = pic_param->components[2].v_sampling_factor;
1822
1823         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1824             v1 == 2 && v2 == 1 && v3 == 1) {
1825             subsampling = SUBSAMPLE_YUV420;
1826             fourcc = VA_FOURCC_IMC3;
1827         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1828                    v1 == 1 && v2 == 1 && v3 == 1) {
1829             subsampling = SUBSAMPLE_YUV422H;
1830             fourcc = VA_FOURCC_422H;
1831         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1832                    v1 == 1 && v2 == 1 && v3 == 1) {
1833             subsampling = SUBSAMPLE_YUV444;
1834             fourcc = VA_FOURCC_444P;
1835         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1836                    v1 == 1 && v2 == 1 && v3 == 1) {
1837             subsampling = SUBSAMPLE_YUV411;
1838             fourcc = VA_FOURCC_411P;
1839         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1840                    v1 == 2 && v2 == 1 && v3 == 1) {
1841             subsampling = SUBSAMPLE_YUV422V;
1842             fourcc = VA_FOURCC_422V;
1843         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1844                    v1 == 2 && v2 == 2 && v3 == 2) {
1845             subsampling = SUBSAMPLE_YUV422H;
1846             fourcc = VA_FOURCC_422H;
1847         } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1848                    v1 == 2 && v2 == 1 && v3 == 1) {
1849             subsampling = SUBSAMPLE_YUV422V;
1850             fourcc = VA_FOURCC_422V;
1851         } else
1852             assert(0);
1853     } else {
1854         assert(0);
1855     }
1856
1857     /* Current decoded picture */
1858     obj_surface = decode_state->render_object;
1859     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1860
1861     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1862     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1863     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1864     gen7_mfd_context->pre_deblocking_output.valid = 1;
1865
1866     gen7_mfd_context->post_deblocking_output.bo = NULL;
1867     gen7_mfd_context->post_deblocking_output.valid = 0;
1868
1869     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1870     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1871
1872     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1873     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1874
1875     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1876     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1877
1878     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1879     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1880
1881     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1882     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1883 }
1884
1885 static const int va_to_gen7_jpeg_rotation[4] = {
1886     GEN7_JPEG_ROTATION_0,
1887     GEN7_JPEG_ROTATION_90,
1888     GEN7_JPEG_ROTATION_180,
1889     GEN7_JPEG_ROTATION_270
1890 };
1891
1892 static void
1893 gen7_mfd_jpeg_pic_state(VADriverContextP ctx,
1894                         struct decode_state *decode_state,
1895                         struct gen7_mfd_context *gen7_mfd_context)
1896 {
1897     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1898     VAPictureParameterBufferJPEGBaseline *pic_param;
1899     int chroma_type = GEN7_YUV420;
1900     int frame_width_in_blks;
1901     int frame_height_in_blks;
1902
1903     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1904     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1905
1906     if (pic_param->num_components == 1)
1907         chroma_type = GEN7_YUV400;
1908     else if (pic_param->num_components == 3) {
1909         int h1 = pic_param->components[0].h_sampling_factor;
1910         int h2 = pic_param->components[1].h_sampling_factor;
1911         int h3 = pic_param->components[2].h_sampling_factor;
1912         int v1 = pic_param->components[0].v_sampling_factor;
1913         int v2 = pic_param->components[1].v_sampling_factor;
1914         int v3 = pic_param->components[2].v_sampling_factor;
1915
1916         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1917             v1 == 2 && v2 == 1 && v3 == 1)
1918             chroma_type = GEN7_YUV420;
1919         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1920                  v1 == 1 && v2 == 1 && v3 == 1)
1921             chroma_type = GEN7_YUV422H_2Y;
1922         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1923                  v1 == 1 && v2 == 1 && v3 == 1)
1924             chroma_type = GEN7_YUV444;
1925         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1926                  v1 == 1 && v2 == 1 && v3 == 1)
1927             chroma_type = GEN7_YUV411;
1928         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1929                  v1 == 2 && v2 == 1 && v3 == 1)
1930             chroma_type = GEN7_YUV422V_2Y;
1931         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1932                  v1 == 2 && v2 == 2 && v3 == 2)
1933             chroma_type = GEN7_YUV422H_4Y;
1934         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1935                  v1 == 2 && v2 == 1 && v3 == 1)
1936             chroma_type = GEN7_YUV422V_4Y;
1937         else
1938             assert(0);
1939     }
1940
1941     if (chroma_type == GEN7_YUV400 ||
1942         chroma_type == GEN7_YUV444 ||
1943         chroma_type == GEN7_YUV422V_2Y) {
1944         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
1945         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
1946     } else if (chroma_type == GEN7_YUV411) {
1947         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
1948         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
1949     } else {
1950         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
1951         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
1952     }
1953
1954     BEGIN_BCS_BATCH(batch, 3);
1955     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
1956     OUT_BCS_BATCH(batch,
1957                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
1958                   (chroma_type << 0));
1959     OUT_BCS_BATCH(batch,
1960                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
1961                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
1962     ADVANCE_BCS_BATCH(batch);
1963 }
1964
1965 static const int va_to_gen7_jpeg_hufftable[2] = {
1966     MFX_HUFFTABLE_ID_Y,
1967     MFX_HUFFTABLE_ID_UV
1968 };
1969
1970 static void
1971 gen7_mfd_jpeg_huff_table_state(VADriverContextP ctx,
1972                                struct decode_state *decode_state,
1973                                struct gen7_mfd_context *gen7_mfd_context,
1974                                int num_tables)
1975 {
1976     VAHuffmanTableBufferJPEGBaseline *huffman_table;
1977     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1978     int index;
1979
1980     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
1981         return;
1982
1983     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
1984
1985     for (index = 0; index < num_tables; index++) {
1986         int id = va_to_gen7_jpeg_hufftable[index];
1987         if (!huffman_table->load_huffman_table[index])
1988             continue;
1989         BEGIN_BCS_BATCH(batch, 53);
1990         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
1991         OUT_BCS_BATCH(batch, id);
1992         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
1993         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
1994         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
1995         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
1996         ADVANCE_BCS_BATCH(batch);
1997     }
1998 }
1999
2000 static const int va_to_gen7_jpeg_qm[5] = {
2001     -1,
2002     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2003     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2004     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2005     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2006 };
2007
2008 static void
2009 gen7_mfd_jpeg_qm_state(VADriverContextP ctx,
2010                        struct decode_state *decode_state,
2011                        struct gen7_mfd_context *gen7_mfd_context)
2012 {
2013     VAPictureParameterBufferJPEGBaseline *pic_param;
2014     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2015     int index;
2016
2017     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2018         return;
2019
2020     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2021     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2022
2023     assert(pic_param->num_components <= 3);
2024
2025     for (index = 0; index < pic_param->num_components; index++) {
2026         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2027         int qm_type;
2028         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2029         unsigned char raster_qm[64];
2030         int j;
2031
2032         if (id > 4 || id < 1)
2033             continue;
2034
2035         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2036             continue;
2037
2038         qm_type = va_to_gen7_jpeg_qm[id];
2039
2040         for (j = 0; j < 64; j++)
2041             raster_qm[zigzag_direct[j]] = qm[j];
2042
2043         gen7_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2044     }
2045 }
2046
2047 static void
2048 gen7_mfd_jpeg_bsd_object(VADriverContextP ctx,
2049                          VAPictureParameterBufferJPEGBaseline *pic_param,
2050                          VASliceParameterBufferJPEGBaseline *slice_param,
2051                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2052                          dri_bo *slice_data_bo,
2053                          struct gen7_mfd_context *gen7_mfd_context)
2054 {
2055     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2056     int scan_component_mask = 0;
2057     int i;
2058
2059     assert(slice_param->num_components > 0);
2060     assert(slice_param->num_components < 4);
2061     assert(slice_param->num_components <= pic_param->num_components);
2062
2063     for (i = 0; i < slice_param->num_components; i++) {
2064         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2065         case 1:
2066             scan_component_mask |= (1 << 0);
2067             break;
2068         case 2:
2069             scan_component_mask |= (1 << 1);
2070             break;
2071         case 3:
2072             scan_component_mask |= (1 << 2);
2073             break;
2074         default:
2075             assert(0);
2076             break;
2077         }
2078     }
2079
2080     BEGIN_BCS_BATCH(batch, 6);
2081     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2082     OUT_BCS_BATCH(batch, 
2083                   slice_param->slice_data_size);
2084     OUT_BCS_BATCH(batch, 
2085                   slice_param->slice_data_offset);
2086     OUT_BCS_BATCH(batch,
2087                   slice_param->slice_horizontal_position << 16 |
2088                   slice_param->slice_vertical_position << 0);
2089     OUT_BCS_BATCH(batch,
2090                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2091                   (scan_component_mask << 27) |                 /* scan components */
2092                   (0 << 26) |   /* disable interrupt allowed */
2093                   (slice_param->num_mcus << 0));                /* MCU count */
2094     OUT_BCS_BATCH(batch,
2095                   (slice_param->restart_interval << 0));    /* RestartInterval */
2096     ADVANCE_BCS_BATCH(batch);
2097 }
2098
2099 /* Workaround for JPEG decoding on Ivybridge */
2100
2101 static struct {
2102     int width;
2103     int height;
2104     unsigned char data[32];
2105     int data_size;
2106     int data_bit_offset;
2107     int qp;
2108 } gen7_jpeg_wa_clip = {
2109     16,
2110     16,
2111     {
2112         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2113         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2114     },
2115     14,
2116     40,
2117     28,
2118 };
2119
2120 static void
2121 gen7_jpeg_wa_init(VADriverContextP ctx,
2122                   struct gen7_mfd_context *gen7_mfd_context)
2123 {
2124     struct i965_driver_data *i965 = i965_driver_data(ctx);
2125     VAStatus status;
2126     struct object_surface *obj_surface;
2127
2128     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2129         i965_DestroySurfaces(ctx,
2130                              &gen7_mfd_context->jpeg_wa_surface_id,
2131                              1);
2132
2133     status = i965_CreateSurfaces(ctx,
2134                                  gen7_jpeg_wa_clip.width,
2135                                  gen7_jpeg_wa_clip.height,
2136                                  VA_RT_FORMAT_YUV420,
2137                                  1,
2138                                  &gen7_mfd_context->jpeg_wa_surface_id);
2139     assert(status == VA_STATUS_SUCCESS);
2140
2141     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2142     assert(obj_surface);
2143     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2144     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2145
2146     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2147         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2148                                                                "JPEG WA data",
2149                                                                0x1000,
2150                                                                0x1000);
2151         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2152                        0,
2153                        gen7_jpeg_wa_clip.data_size,
2154                        gen7_jpeg_wa_clip.data);
2155     }
2156 }
2157
2158 static void
2159 gen7_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2160                               struct gen7_mfd_context *gen7_mfd_context)
2161 {
2162     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2163
2164     BEGIN_BCS_BATCH(batch, 5);
2165     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2166     OUT_BCS_BATCH(batch,
2167                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2168                   (MFD_MODE_VLD << 15) | /* VLD mode */
2169                   (0 << 10) | /* disable Stream-Out */
2170                   (0 << 9)  | /* Post Deblocking Output */
2171                   (1 << 8)  | /* Pre Deblocking Output */
2172                   (0 << 5)  | /* not in stitch mode */
2173                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2174                   (MFX_FORMAT_AVC << 0));
2175     OUT_BCS_BATCH(batch,
2176                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2177                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2178                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2179                   (0 << 1)  |
2180                   (0 << 0));
2181     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2182     OUT_BCS_BATCH(batch, 0); /* reserved */
2183     ADVANCE_BCS_BATCH(batch);
2184 }
2185
2186 static void
2187 gen7_jpeg_wa_surface_state(VADriverContextP ctx,
2188                            struct gen7_mfd_context *gen7_mfd_context)
2189 {
2190     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2191     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2192
2193     BEGIN_BCS_BATCH(batch, 6);
2194     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2195     OUT_BCS_BATCH(batch, 0);
2196     OUT_BCS_BATCH(batch,
2197                   ((obj_surface->orig_width - 1) << 18) |
2198                   ((obj_surface->orig_height - 1) << 4));
2199     OUT_BCS_BATCH(batch,
2200                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2201                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2202                   (0 << 22) | /* surface object control state, ignored */
2203                   ((obj_surface->width - 1) << 3) | /* pitch */
2204                   (0 << 2)  | /* must be 0 */
2205                   (1 << 1)  | /* must be tiled */
2206                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2207     OUT_BCS_BATCH(batch,
2208                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2209                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2210     OUT_BCS_BATCH(batch,
2211                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2212                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2213     ADVANCE_BCS_BATCH(batch);
2214 }
2215
2216 static void
2217 gen7_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2218                                  struct gen7_mfd_context *gen7_mfd_context)
2219 {
2220     struct i965_driver_data *i965 = i965_driver_data(ctx);
2221     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2222     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2223     dri_bo *intra_bo;
2224     int i;
2225
2226     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2227                             "intra row store",
2228                             128 * 64,
2229                             0x1000);
2230
2231     BEGIN_BCS_BATCH(batch, 24);
2232     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
2233     OUT_BCS_RELOC(batch,
2234                   obj_surface->bo,
2235                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2236                   0);
2237     
2238     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2239
2240     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2241     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2242
2243     OUT_BCS_RELOC(batch,
2244                   intra_bo,
2245                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2246                   0);
2247
2248     OUT_BCS_BATCH(batch, 0);
2249
2250     /* DW 7..22 */
2251     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2252         OUT_BCS_BATCH(batch, 0);
2253     }
2254
2255     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
2256     ADVANCE_BCS_BATCH(batch);
2257
2258     dri_bo_unreference(intra_bo);
2259 }
2260
2261 static void
2262 gen7_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2263                                      struct gen7_mfd_context *gen7_mfd_context)
2264 {
2265     struct i965_driver_data *i965 = i965_driver_data(ctx);
2266     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2267     dri_bo *bsd_mpc_bo, *mpr_bo;
2268
2269     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2270                               "bsd mpc row store",
2271                               11520, /* 1.5 * 120 * 64 */
2272                               0x1000);
2273
2274     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2275                           "mpr row store",
2276                           7680, /* 1. 0 * 120 * 64 */
2277                           0x1000);
2278
2279     BEGIN_BCS_BATCH(batch, 4);
2280     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
2281
2282     OUT_BCS_RELOC(batch,
2283                   bsd_mpc_bo,
2284                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2285                   0);
2286
2287     OUT_BCS_RELOC(batch,
2288                   mpr_bo,
2289                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2290                   0);
2291     OUT_BCS_BATCH(batch, 0);
2292
2293     ADVANCE_BCS_BATCH(batch);
2294
2295     dri_bo_unreference(bsd_mpc_bo);
2296     dri_bo_unreference(mpr_bo);
2297 }
2298
2299 static void
2300 gen7_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2301                           struct gen7_mfd_context *gen7_mfd_context)
2302 {
2303
2304 }
2305
2306 static void
2307 gen7_jpeg_wa_avc_img_state(VADriverContextP ctx,
2308                            struct gen7_mfd_context *gen7_mfd_context)
2309 {
2310     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2311     int img_struct = 0;
2312     int mbaff_frame_flag = 0;
2313     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2314
2315     BEGIN_BCS_BATCH(batch, 16);
2316     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2317     OUT_BCS_BATCH(batch, 
2318                   (width_in_mbs * height_in_mbs - 1));
2319     OUT_BCS_BATCH(batch, 
2320                   ((height_in_mbs - 1) << 16) | 
2321                   ((width_in_mbs - 1) << 0));
2322     OUT_BCS_BATCH(batch, 
2323                   (0 << 24) |
2324                   (0 << 16) |
2325                   (0 << 14) |
2326                   (0 << 13) |
2327                   (0 << 12) | /* differ from GEN6 */
2328                   (0 << 10) |
2329                   (img_struct << 8));
2330     OUT_BCS_BATCH(batch,
2331                   (1 << 10) | /* 4:2:0 */
2332                   (1 << 7) |  /* CABAC */
2333                   (0 << 6) |
2334                   (0 << 5) |
2335                   (0 << 4) |
2336                   (0 << 3) |
2337                   (1 << 2) |
2338                   (mbaff_frame_flag << 1) |
2339                   (0 << 0));
2340     OUT_BCS_BATCH(batch, 0);
2341     OUT_BCS_BATCH(batch, 0);
2342     OUT_BCS_BATCH(batch, 0);
2343     OUT_BCS_BATCH(batch, 0);
2344     OUT_BCS_BATCH(batch, 0);
2345     OUT_BCS_BATCH(batch, 0);
2346     OUT_BCS_BATCH(batch, 0);
2347     OUT_BCS_BATCH(batch, 0);
2348     OUT_BCS_BATCH(batch, 0);
2349     OUT_BCS_BATCH(batch, 0);
2350     OUT_BCS_BATCH(batch, 0);
2351     ADVANCE_BCS_BATCH(batch);
2352 }
2353
2354 static void
2355 gen7_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2356                                   struct gen7_mfd_context *gen7_mfd_context)
2357 {
2358     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2359     int i;
2360
2361     BEGIN_BCS_BATCH(batch, 69);
2362     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
2363
2364     /* reference surfaces 0..15 */
2365     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2366         OUT_BCS_BATCH(batch, 0); /* top */
2367         OUT_BCS_BATCH(batch, 0); /* bottom */
2368     }
2369
2370     /* the current decoding frame/field */
2371     OUT_BCS_BATCH(batch, 0); /* top */
2372     OUT_BCS_BATCH(batch, 0); /* bottom */
2373
2374     /* POC List */
2375     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2376         OUT_BCS_BATCH(batch, 0);
2377         OUT_BCS_BATCH(batch, 0);
2378     }
2379
2380     OUT_BCS_BATCH(batch, 0);
2381     OUT_BCS_BATCH(batch, 0);
2382
2383     ADVANCE_BCS_BATCH(batch);
2384 }
2385
2386 static void
2387 gen7_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2388                                      struct gen7_mfd_context *gen7_mfd_context)
2389 {
2390     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2391
2392     BEGIN_BCS_BATCH(batch, 11);
2393     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2394     OUT_BCS_RELOC(batch,
2395                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2396                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2397                   0);
2398     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2399     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2400     OUT_BCS_BATCH(batch, 0);
2401     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2402     OUT_BCS_BATCH(batch, 0);
2403     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2404     OUT_BCS_BATCH(batch, 0);
2405     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2406     OUT_BCS_BATCH(batch, 0);
2407     ADVANCE_BCS_BATCH(batch);
2408 }
2409
2410 static void
2411 gen7_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2412                             struct gen7_mfd_context *gen7_mfd_context)
2413 {
2414     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2415
2416     /* the input bitsteam format on GEN7 differs from GEN6 */
2417     BEGIN_BCS_BATCH(batch, 6);
2418     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2419     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2420     OUT_BCS_BATCH(batch, 0);
2421     OUT_BCS_BATCH(batch,
2422                   (0 << 31) |
2423                   (0 << 14) |
2424                   (0 << 12) |
2425                   (0 << 10) |
2426                   (0 << 8));
2427     OUT_BCS_BATCH(batch,
2428                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2429                   (0 << 5)  |
2430                   (0 << 4)  |
2431                   (1 << 3) | /* LastSlice Flag */
2432                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2433     OUT_BCS_BATCH(batch, 0);
2434     ADVANCE_BCS_BATCH(batch);
2435 }
2436
2437 static void
2438 gen7_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2439                              struct gen7_mfd_context *gen7_mfd_context)
2440 {
2441     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2442     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2443     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2444     int first_mb_in_slice = 0;
2445     int slice_type = SLICE_TYPE_I;
2446
2447     BEGIN_BCS_BATCH(batch, 11);
2448     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2449     OUT_BCS_BATCH(batch, slice_type);
2450     OUT_BCS_BATCH(batch, 
2451                   (num_ref_idx_l1 << 24) |
2452                   (num_ref_idx_l0 << 16) |
2453                   (0 << 8) |
2454                   (0 << 0));
2455     OUT_BCS_BATCH(batch, 
2456                   (0 << 29) |
2457                   (1 << 27) |   /* disable Deblocking */
2458                   (0 << 24) |
2459                   (gen7_jpeg_wa_clip.qp << 16) |
2460                   (0 << 8) |
2461                   (0 << 0));
2462     OUT_BCS_BATCH(batch, 
2463                   (slice_ver_pos << 24) |
2464                   (slice_hor_pos << 16) | 
2465                   (first_mb_in_slice << 0));
2466     OUT_BCS_BATCH(batch,
2467                   (next_slice_ver_pos << 16) |
2468                   (next_slice_hor_pos << 0));
2469     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2470     OUT_BCS_BATCH(batch, 0);
2471     OUT_BCS_BATCH(batch, 0);
2472     OUT_BCS_BATCH(batch, 0);
2473     OUT_BCS_BATCH(batch, 0);
2474     ADVANCE_BCS_BATCH(batch);
2475 }
2476
2477 static void
2478 gen7_mfd_jpeg_wa(VADriverContextP ctx,
2479                  struct gen7_mfd_context *gen7_mfd_context)
2480 {
2481     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2482     gen7_jpeg_wa_init(ctx, gen7_mfd_context);
2483     intel_batchbuffer_emit_mi_flush(batch);
2484     gen7_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2485     gen7_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2486     gen7_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2487     gen7_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2488     gen7_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2489     gen7_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2490     gen7_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2491
2492     gen7_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2493     gen7_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2494     gen7_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2495 }
2496
2497 void
2498 gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
2499                              struct decode_state *decode_state,
2500                              struct gen7_mfd_context *gen7_mfd_context)
2501 {
2502     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2503     VAPictureParameterBufferJPEGBaseline *pic_param;
2504     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2505     dri_bo *slice_data_bo;
2506     int i, j, max_selector = 0;
2507
2508     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2509     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2510
2511     /* Currently only support Baseline DCT */
2512     gen7_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2513     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2514     gen7_mfd_jpeg_wa(ctx, gen7_mfd_context);
2515     intel_batchbuffer_emit_mi_flush(batch);
2516     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2517     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2518     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2519     gen7_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2520     gen7_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2521
2522     for (j = 0; j < decode_state->num_slice_params; j++) {
2523         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2524         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2525         slice_data_bo = decode_state->slice_datas[j]->bo;
2526         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2527
2528         if (j == decode_state->num_slice_params - 1)
2529             next_slice_group_param = NULL;
2530         else
2531             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2532
2533         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2534             int component;
2535
2536             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2537
2538             if (i < decode_state->slice_params[j]->num_elements - 1)
2539                 next_slice_param = slice_param + 1;
2540             else
2541                 next_slice_param = next_slice_group_param;
2542
2543             for (component = 0; component < slice_param->num_components; component++) {
2544                 if (max_selector < slice_param->components[component].dc_table_selector)
2545                     max_selector = slice_param->components[component].dc_table_selector;
2546
2547                 if (max_selector < slice_param->components[component].ac_table_selector)
2548                     max_selector = slice_param->components[component].ac_table_selector;
2549             }
2550
2551             slice_param++;
2552         }
2553     }
2554
2555     assert(max_selector < 2);
2556     gen7_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2557
2558     for (j = 0; j < decode_state->num_slice_params; j++) {
2559         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2560         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2561         slice_data_bo = decode_state->slice_datas[j]->bo;
2562         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2563
2564         if (j == decode_state->num_slice_params - 1)
2565             next_slice_group_param = NULL;
2566         else
2567             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2568
2569         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2570             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2571
2572             if (i < decode_state->slice_params[j]->num_elements - 1)
2573                 next_slice_param = slice_param + 1;
2574             else
2575                 next_slice_param = next_slice_group_param;
2576
2577             gen7_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2578             slice_param++;
2579         }
2580     }
2581
2582     intel_batchbuffer_end_atomic(batch);
2583     intel_batchbuffer_flush(batch);
2584 }
2585
2586 static VAStatus
2587 gen7_mfd_decode_picture(VADriverContextP ctx, 
2588                         VAProfile profile, 
2589                         union codec_state *codec_state,
2590                         struct hw_context *hw_context)
2591
2592 {
2593     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2594     struct decode_state *decode_state = &codec_state->decode;
2595     VAStatus vaStatus;
2596
2597     assert(gen7_mfd_context);
2598
2599     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
2600
2601     if (vaStatus != VA_STATUS_SUCCESS)
2602         goto out;
2603
2604     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
2605
2606     switch (profile) {
2607     case VAProfileMPEG2Simple:
2608     case VAProfileMPEG2Main:
2609         gen7_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
2610         break;
2611         
2612     case VAProfileH264ConstrainedBaseline:
2613     case VAProfileH264Main:
2614     case VAProfileH264High:
2615     case VAProfileH264StereoHigh:
2616         gen7_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
2617         break;
2618
2619     case VAProfileVC1Simple:
2620     case VAProfileVC1Main:
2621     case VAProfileVC1Advanced:
2622         gen7_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
2623         break;
2624
2625     case VAProfileJPEGBaseline:
2626         gen7_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
2627         break;
2628
2629     default:
2630         assert(0);
2631         break;
2632     }
2633
2634     vaStatus = VA_STATUS_SUCCESS;
2635
2636 out:
2637     return vaStatus;
2638 }
2639
2640 static void
2641 gen7_mfd_context_destroy(void *hw_context)
2642 {
2643     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2644
2645     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2646     gen7_mfd_context->post_deblocking_output.bo = NULL;
2647
2648     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2649     gen7_mfd_context->pre_deblocking_output.bo = NULL;
2650
2651     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2652     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2653
2654     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2655     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2656
2657     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2658     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2659
2660     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2661     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2662
2663     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
2664     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2665
2666     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
2667
2668     intel_batchbuffer_free(gen7_mfd_context->base.batch);
2669     free(gen7_mfd_context);
2670 }
2671
2672 static void gen7_mfd_mpeg2_context_init(VADriverContextP ctx,
2673                                     struct gen7_mfd_context *gen7_mfd_context)
2674 {
2675     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
2676     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
2677     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
2678     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
2679 }
2680
2681 struct hw_context *
2682 gen7_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
2683 {
2684     struct intel_driver_data *intel = intel_driver_data(ctx);
2685     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
2686     int i;
2687
2688     gen7_mfd_context->base.destroy = gen7_mfd_context_destroy;
2689     gen7_mfd_context->base.run = gen7_mfd_decode_picture;
2690     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
2691
2692     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
2693         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
2694         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
2695         gen7_mfd_context->reference_surface[i].obj_surface = NULL;
2696     }
2697
2698     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
2699     gen7_mfd_context->jpeg_wa_surface_object = NULL;
2700
2701     switch (obj_config->profile) {
2702     case VAProfileMPEG2Simple:
2703     case VAProfileMPEG2Main:
2704         gen7_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
2705         break;
2706
2707     case VAProfileH264ConstrainedBaseline:
2708     case VAProfileH264Main:
2709     case VAProfileH264High:
2710     case VAProfileH264StereoHigh:
2711         gen7_mfd_avc_context_init(ctx, gen7_mfd_context);
2712         break;
2713     default:
2714         break;
2715     }
2716     return (struct hw_context *)gen7_mfd_context;
2717 }