Use the right parameters to initialize bit rate context
[platform/upstream/libva-intel-driver.git] / src / gen7_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include "sysdeps.h"
30
31 #include <va/va_dec_jpeg.h>
32
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
35 #include "i965_defines.h"
36 #include "i965_drv_video.h"
37 #include "i965_decoder_utils.h"
38
39 #include "gen7_mfd.h"
40 #include "intel_media.h"
41
42 static const uint32_t zigzag_direct[64] = {
43     0,   1,  8, 16,  9,  2,  3, 10,
44     17, 24, 32, 25, 18, 11,  4,  5,
45     12, 19, 26, 33, 40, 48, 41, 34,
46     27, 20, 13,  6,  7, 14, 21, 28,
47     35, 42, 49, 56, 57, 50, 43, 36,
48     29, 22, 15, 23, 30, 37, 44, 51,
49     58, 59, 52, 45, 38, 31, 39, 46,
50     53, 60, 61, 54, 47, 55, 62, 63
51 };
52
53 static void
54 gen7_mfd_init_avc_surface(VADriverContextP ctx, 
55                           VAPictureParameterBufferH264 *pic_param,
56                           struct object_surface *obj_surface)
57 {
58     struct i965_driver_data *i965 = i965_driver_data(ctx);
59     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
60     int width_in_mbs, height_in_mbs;
61
62     obj_surface->free_private_data = gen_free_avc_surface;
63     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
64     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
65
66     if (!gen7_avc_surface) {
67         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
68         assert((obj_surface->size & 0x3f) == 0);
69         obj_surface->private_data = gen7_avc_surface;
70     }
71
72     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
73                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
74
75     if (gen7_avc_surface->dmv_top == NULL) {
76         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
77                                                  "direct mv w/r buffer",
78                                                  width_in_mbs * (height_in_mbs + 1) * 64,
79                                                  0x1000);
80         assert(gen7_avc_surface->dmv_top);
81     }
82
83     if (gen7_avc_surface->dmv_bottom_flag &&
84         gen7_avc_surface->dmv_bottom == NULL) {
85         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
86                                                     "direct mv w/r buffer",
87                                                     width_in_mbs * (height_in_mbs + 1) * 64,
88                                                     0x1000);
89         assert(gen7_avc_surface->dmv_bottom);
90     }
91 }
92
93 static void
94 gen7_mfd_pipe_mode_select(VADriverContextP ctx,
95                           struct decode_state *decode_state,
96                           int standard_select,
97                           struct gen7_mfd_context *gen7_mfd_context)
98 {
99     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
100
101     assert(standard_select == MFX_FORMAT_MPEG2 ||
102            standard_select == MFX_FORMAT_AVC ||
103            standard_select == MFX_FORMAT_VC1 ||
104            standard_select == MFX_FORMAT_JPEG);
105
106     BEGIN_BCS_BATCH(batch, 5);
107     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
108     OUT_BCS_BATCH(batch,
109                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
110                   (MFD_MODE_VLD << 15) | /* VLD mode */
111                   (0 << 10) | /* disable Stream-Out */
112                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
113                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
114                   (0 << 5)  | /* not in stitch mode */
115                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
116                   (standard_select << 0));
117     OUT_BCS_BATCH(batch,
118                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
119                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
120                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
121                   (0 << 1)  |
122                   (0 << 0));
123     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
124     OUT_BCS_BATCH(batch, 0); /* reserved */
125     ADVANCE_BCS_BATCH(batch);
126 }
127
128 static void
129 gen7_mfd_surface_state(VADriverContextP ctx,
130                        struct decode_state *decode_state,
131                        int standard_select,
132                        struct gen7_mfd_context *gen7_mfd_context)
133 {
134     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
135     struct object_surface *obj_surface = decode_state->render_object;
136     unsigned int y_cb_offset;
137     unsigned int y_cr_offset;
138
139     assert(obj_surface);
140
141     y_cb_offset = obj_surface->y_cb_offset;
142     y_cr_offset = obj_surface->y_cr_offset;
143
144     BEGIN_BCS_BATCH(batch, 6);
145     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
146     OUT_BCS_BATCH(batch, 0);
147     OUT_BCS_BATCH(batch,
148                   ((obj_surface->orig_height - 1) << 18) |
149                   ((obj_surface->orig_width - 1) << 4));
150     OUT_BCS_BATCH(batch,
151                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
152                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
153                   (0 << 22) | /* surface object control state, ignored */
154                   ((obj_surface->width - 1) << 3) | /* pitch */
155                   (0 << 2)  | /* must be 0 */
156                   (1 << 1)  | /* must be tiled */
157                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
158     OUT_BCS_BATCH(batch,
159                   (0 << 16) | /* X offset for U(Cb), must be 0 */
160                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
161     OUT_BCS_BATCH(batch,
162                   (0 << 16) | /* X offset for V(Cr), must be 0 */
163                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
164     ADVANCE_BCS_BATCH(batch);
165 }
166
167 static void
168 gen7_mfd_pipe_buf_addr_state(VADriverContextP ctx,
169                              struct decode_state *decode_state,
170                              int standard_select,
171                              struct gen7_mfd_context *gen7_mfd_context)
172 {
173     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
174     int i;
175
176     BEGIN_BCS_BATCH(batch, 24);
177     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
178     if (gen7_mfd_context->pre_deblocking_output.valid)
179         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
180                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
181                       0);
182     else
183         OUT_BCS_BATCH(batch, 0);
184
185     if (gen7_mfd_context->post_deblocking_output.valid)
186         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
187                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
188                       0);
189     else
190         OUT_BCS_BATCH(batch, 0);
191
192     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
193     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
194
195     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
196         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
197                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
198                       0);
199     else
200         OUT_BCS_BATCH(batch, 0);
201
202     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
203         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
204                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
205                       0);
206     else
207         OUT_BCS_BATCH(batch, 0);
208
209     /* DW 7..22 */
210     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
211         struct object_surface *obj_surface;
212
213         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
214             gen7_mfd_context->reference_surface[i].obj_surface &&
215             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
216             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
217
218             OUT_BCS_RELOC(batch, obj_surface->bo,
219                           I915_GEM_DOMAIN_INSTRUCTION, 0,
220                           0);
221         } else {
222             OUT_BCS_BATCH(batch, 0);
223         }
224     }
225
226     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
227     ADVANCE_BCS_BATCH(batch);
228 }
229
230 static void
231 gen7_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
232                                  dri_bo *slice_data_bo,
233                                  int standard_select,
234                                  struct gen7_mfd_context *gen7_mfd_context)
235 {
236     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
237
238     BEGIN_BCS_BATCH(batch, 11);
239     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
240     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
241     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
242     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
243     OUT_BCS_BATCH(batch, 0);
244     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
245     OUT_BCS_BATCH(batch, 0);
246     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
247     OUT_BCS_BATCH(batch, 0);
248     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
249     OUT_BCS_BATCH(batch, 0);
250     ADVANCE_BCS_BATCH(batch);
251 }
252
253 static void
254 gen7_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
255                                  struct decode_state *decode_state,
256                                  int standard_select,
257                                  struct gen7_mfd_context *gen7_mfd_context)
258 {
259     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
260
261     BEGIN_BCS_BATCH(batch, 4);
262     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
263
264     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
265         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
266                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
267                       0);
268     else
269         OUT_BCS_BATCH(batch, 0);
270
271     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
272         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
273                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
274                       0);
275     else
276         OUT_BCS_BATCH(batch, 0);
277
278     if (gen7_mfd_context->bitplane_read_buffer.valid)
279         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
280                       I915_GEM_DOMAIN_INSTRUCTION, 0,
281                       0);
282     else
283         OUT_BCS_BATCH(batch, 0);
284
285     ADVANCE_BCS_BATCH(batch);
286 }
287
288 static void
289 gen7_mfd_qm_state(VADriverContextP ctx,
290                   int qm_type,
291                   unsigned char *qm,
292                   int qm_length,
293                   struct gen7_mfd_context *gen7_mfd_context)
294 {
295     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
296     unsigned int qm_buffer[16];
297
298     assert(qm_length <= 16 * 4);
299     memcpy(qm_buffer, qm, qm_length);
300
301     BEGIN_BCS_BATCH(batch, 18);
302     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
303     OUT_BCS_BATCH(batch, qm_type << 0);
304     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
305     ADVANCE_BCS_BATCH(batch);
306 }
307
308 static void
309 gen7_mfd_avc_img_state(VADriverContextP ctx,
310                        struct decode_state *decode_state,
311                        struct gen7_mfd_context *gen7_mfd_context)
312 {
313     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
314     int img_struct;
315     int mbaff_frame_flag;
316     unsigned int width_in_mbs, height_in_mbs;
317     VAPictureParameterBufferH264 *pic_param;
318
319     assert(decode_state->pic_param && decode_state->pic_param->buffer);
320     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
321
322     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
323         img_struct = 1;
324     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
325         img_struct = 3;
326     else
327         img_struct = 0;
328
329     if ((img_struct & 0x1) == 0x1) {
330         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
331     } else {
332         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
333     }
334
335     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
336         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
337         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
338     } else {
339         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
340     }
341
342     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
343                         !pic_param->pic_fields.bits.field_pic_flag);
344
345     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
346     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
347
348     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
349     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
350            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
351     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
352
353     BEGIN_BCS_BATCH(batch, 16);
354     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
355     OUT_BCS_BATCH(batch, 
356                   (width_in_mbs * height_in_mbs - 1));
357     OUT_BCS_BATCH(batch, 
358                   ((height_in_mbs - 1) << 16) | 
359                   ((width_in_mbs - 1) << 0));
360     OUT_BCS_BATCH(batch, 
361                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
362                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
363                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
364                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
365                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
366                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
367                   (img_struct << 8));
368     OUT_BCS_BATCH(batch,
369                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
370                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
371                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
372                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
373                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
374                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
375                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
376                   (mbaff_frame_flag << 1) |
377                   (pic_param->pic_fields.bits.field_pic_flag << 0));
378     OUT_BCS_BATCH(batch, 0);
379     OUT_BCS_BATCH(batch, 0);
380     OUT_BCS_BATCH(batch, 0);
381     OUT_BCS_BATCH(batch, 0);
382     OUT_BCS_BATCH(batch, 0);
383     OUT_BCS_BATCH(batch, 0);
384     OUT_BCS_BATCH(batch, 0);
385     OUT_BCS_BATCH(batch, 0);
386     OUT_BCS_BATCH(batch, 0);
387     OUT_BCS_BATCH(batch, 0);
388     OUT_BCS_BATCH(batch, 0);
389     ADVANCE_BCS_BATCH(batch);
390 }
391
392 static void
393 gen7_mfd_avc_qm_state(VADriverContextP ctx,
394                       struct decode_state *decode_state,
395                       struct gen7_mfd_context *gen7_mfd_context)
396 {
397     VAIQMatrixBufferH264 *iq_matrix;
398     VAPictureParameterBufferH264 *pic_param;
399
400     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
401         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
402     else
403         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
404
405     assert(decode_state->pic_param && decode_state->pic_param->buffer);
406     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
407
408     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
409     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
410
411     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
412         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
413         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
414     }
415 }
416
417 static void
418 gen7_mfd_avc_directmode_state(VADriverContextP ctx,
419                               struct decode_state *decode_state,
420                               VAPictureParameterBufferH264 *pic_param,
421                               VASliceParameterBufferH264 *slice_param,
422                               struct gen7_mfd_context *gen7_mfd_context)
423 {
424     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
425     struct object_surface *obj_surface;
426     GenAvcSurface *gen7_avc_surface;
427     VAPictureH264 *va_pic;
428     int i, j;
429
430     BEGIN_BCS_BATCH(batch, 69);
431     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
432
433     /* reference surfaces 0..15 */
434     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
435         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
436             gen7_mfd_context->reference_surface[i].obj_surface &&
437             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
438
439             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
440             gen7_avc_surface = obj_surface->private_data;
441             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
442                           I915_GEM_DOMAIN_INSTRUCTION, 0,
443                           0);
444
445             if (gen7_avc_surface->dmv_bottom_flag == 1)
446                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
447                               I915_GEM_DOMAIN_INSTRUCTION, 0,
448                               0);
449             else
450                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
451                               I915_GEM_DOMAIN_INSTRUCTION, 0,
452                               0);
453         } else {
454             OUT_BCS_BATCH(batch, 0);
455             OUT_BCS_BATCH(batch, 0);
456         }
457     }
458
459     /* the current decoding frame/field */
460     va_pic = &pic_param->CurrPic;
461     obj_surface = decode_state->render_object;
462     assert(obj_surface->bo && obj_surface->private_data);
463     gen7_avc_surface = obj_surface->private_data;
464
465     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
466                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
467                   0);
468
469     if (gen7_avc_surface->dmv_bottom_flag == 1)
470         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
471                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
472                       0);
473     else
474         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
475                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
476                       0);
477
478     /* POC List */
479     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
480         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
481             int found = 0;
482
483             assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
484
485             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
486                 va_pic = &pic_param->ReferenceFrames[j];
487                 
488                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
489                     continue;
490
491                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
492                     found = 1;
493                     break;
494                 }
495             }
496
497             assert(found == 1);
498             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
499             
500             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
501             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
502         } else {
503             OUT_BCS_BATCH(batch, 0);
504             OUT_BCS_BATCH(batch, 0);
505         }
506     }
507
508     va_pic = &pic_param->CurrPic;
509     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
510     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
511
512     ADVANCE_BCS_BATCH(batch);
513 }
514
515 static void
516 gen7_mfd_avc_slice_state(VADriverContextP ctx,
517                          VAPictureParameterBufferH264 *pic_param,
518                          VASliceParameterBufferH264 *slice_param,
519                          VASliceParameterBufferH264 *next_slice_param,
520                          struct gen7_mfd_context *gen7_mfd_context)
521 {
522     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
523     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
524     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
525     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
526     int num_ref_idx_l0, num_ref_idx_l1;
527     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
528                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
529     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
530     int slice_type;
531
532     if (slice_param->slice_type == SLICE_TYPE_I ||
533         slice_param->slice_type == SLICE_TYPE_SI) {
534         slice_type = SLICE_TYPE_I;
535     } else if (slice_param->slice_type == SLICE_TYPE_P ||
536                slice_param->slice_type == SLICE_TYPE_SP) {
537         slice_type = SLICE_TYPE_P;
538     } else { 
539         assert(slice_param->slice_type == SLICE_TYPE_B);
540         slice_type = SLICE_TYPE_B;
541     }
542
543     if (slice_type == SLICE_TYPE_I) {
544         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
545         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
546         num_ref_idx_l0 = 0;
547         num_ref_idx_l1 = 0;
548     } else if (slice_type == SLICE_TYPE_P) {
549         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
550         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
551         num_ref_idx_l1 = 0;
552     } else {
553         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
554         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
555     }
556
557     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
558     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
559     slice_ver_pos = first_mb_in_slice / width_in_mbs;
560
561     if (next_slice_param) {
562         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
563         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
564         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
565     } else {
566         next_slice_hor_pos = 0;
567         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
568     }
569
570     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
571     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
572     OUT_BCS_BATCH(batch, slice_type);
573     OUT_BCS_BATCH(batch, 
574                   (num_ref_idx_l1 << 24) |
575                   (num_ref_idx_l0 << 16) |
576                   (slice_param->chroma_log2_weight_denom << 8) |
577                   (slice_param->luma_log2_weight_denom << 0));
578     OUT_BCS_BATCH(batch, 
579                   (slice_param->direct_spatial_mv_pred_flag << 29) |
580                   (slice_param->disable_deblocking_filter_idc << 27) |
581                   (slice_param->cabac_init_idc << 24) |
582                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
583                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
584                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
585     OUT_BCS_BATCH(batch, 
586                   (slice_ver_pos << 24) |
587                   (slice_hor_pos << 16) | 
588                   (first_mb_in_slice << 0));
589     OUT_BCS_BATCH(batch,
590                   (next_slice_ver_pos << 16) |
591                   (next_slice_hor_pos << 0));
592     OUT_BCS_BATCH(batch, 
593                   (next_slice_param == NULL) << 19); /* last slice flag */
594     OUT_BCS_BATCH(batch, 0);
595     OUT_BCS_BATCH(batch, 0);
596     OUT_BCS_BATCH(batch, 0);
597     OUT_BCS_BATCH(batch, 0);
598     ADVANCE_BCS_BATCH(batch);
599 }
600
601 static inline void
602 gen7_mfd_avc_ref_idx_state(VADriverContextP ctx,
603                            VAPictureParameterBufferH264 *pic_param,
604                            VASliceParameterBufferH264 *slice_param,
605                            struct gen7_mfd_context *gen7_mfd_context)
606 {
607     gen6_send_avc_ref_idx_state(
608         gen7_mfd_context->base.batch,
609         slice_param,
610         gen7_mfd_context->reference_surface
611     );
612 }
613
614 static void
615 gen7_mfd_avc_weightoffset_state(VADriverContextP ctx,
616                                 VAPictureParameterBufferH264 *pic_param,
617                                 VASliceParameterBufferH264 *slice_param,
618                                 struct gen7_mfd_context *gen7_mfd_context)
619 {
620     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
621     int i, j, num_weight_offset_table = 0;
622     short weightoffsets[32 * 6];
623
624     if ((slice_param->slice_type == SLICE_TYPE_P ||
625          slice_param->slice_type == SLICE_TYPE_SP) &&
626         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
627         num_weight_offset_table = 1;
628     }
629     
630     if ((slice_param->slice_type == SLICE_TYPE_B) &&
631         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
632         num_weight_offset_table = 2;
633     }
634
635     for (i = 0; i < num_weight_offset_table; i++) {
636         BEGIN_BCS_BATCH(batch, 98);
637         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
638         OUT_BCS_BATCH(batch, i);
639
640         if (i == 0) {
641             for (j = 0; j < 32; j++) {
642                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
643                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
644                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
645                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
646                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
647                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
648             }
649         } else {
650             for (j = 0; j < 32; j++) {
651                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
652                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
653                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
654                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
655                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
656                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
657             }
658         }
659
660         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
661         ADVANCE_BCS_BATCH(batch);
662     }
663 }
664
665 static void
666 gen7_mfd_avc_bsd_object(VADriverContextP ctx,
667                         VAPictureParameterBufferH264 *pic_param,
668                         VASliceParameterBufferH264 *slice_param,
669                         dri_bo *slice_data_bo,
670                         VASliceParameterBufferH264 *next_slice_param,
671                         struct gen7_mfd_context *gen7_mfd_context)
672 {
673     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
674     unsigned int slice_data_bit_offset;
675
676     slice_data_bit_offset = avc_get_first_mb_bit_offset(
677         slice_data_bo,
678         slice_param,
679         pic_param->pic_fields.bits.entropy_coding_mode_flag
680     );
681
682     /* the input bitsteam format on GEN7 differs from GEN6 */
683     BEGIN_BCS_BATCH(batch, 6);
684     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
685     OUT_BCS_BATCH(batch, 
686                   (slice_param->slice_data_size - slice_param->slice_data_offset));
687     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
688     OUT_BCS_BATCH(batch,
689                   (0 << 31) |
690                   (0 << 14) |
691                   (0 << 12) |
692                   (0 << 10) |
693                   (0 << 8));
694     OUT_BCS_BATCH(batch,
695                   ((slice_data_bit_offset >> 3) << 16) |
696                   (1 << 7)  |
697                   (0 << 5)  |
698                   (0 << 4)  |
699                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
700                   (slice_data_bit_offset & 0x7));
701     OUT_BCS_BATCH(batch, 0);
702     ADVANCE_BCS_BATCH(batch);
703 }
704
705 static inline void
706 gen7_mfd_avc_context_init(
707     VADriverContextP         ctx,
708     struct gen7_mfd_context *gen7_mfd_context
709 )
710 {
711     /* Initialize flat scaling lists */
712     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
713 }
714
715 static void
716 gen7_mfd_avc_decode_init(VADriverContextP ctx,
717                          struct decode_state *decode_state,
718                          struct gen7_mfd_context *gen7_mfd_context)
719 {
720     VAPictureParameterBufferH264 *pic_param;
721     VASliceParameterBufferH264 *slice_param;
722     struct i965_driver_data *i965 = i965_driver_data(ctx);
723     struct object_surface *obj_surface;
724     dri_bo *bo;
725     int i, j, enable_avc_ildb = 0;
726     unsigned int width_in_mbs, height_in_mbs;
727
728     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
729         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
730         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
731
732         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
733             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
734             assert((slice_param->slice_type == SLICE_TYPE_I) ||
735                    (slice_param->slice_type == SLICE_TYPE_SI) ||
736                    (slice_param->slice_type == SLICE_TYPE_P) ||
737                    (slice_param->slice_type == SLICE_TYPE_SP) ||
738                    (slice_param->slice_type == SLICE_TYPE_B));
739
740             if (slice_param->disable_deblocking_filter_idc != 1) {
741                 enable_avc_ildb = 1;
742                 break;
743             }
744
745             slice_param++;
746         }
747     }
748
749     assert(decode_state->pic_param && decode_state->pic_param->buffer);
750     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
751     intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
752     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
753     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
754     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
755     assert(height_in_mbs > 0 && height_in_mbs <= 256);
756
757     /* Current decoded picture */
758     obj_surface = decode_state->render_object;
759     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
760     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
761     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
762
763     /* initial uv component for YUV400 case */
764     if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
765          unsigned int uv_offset = obj_surface->width * obj_surface->height;
766          unsigned int uv_size   = obj_surface->width * obj_surface->height / 2;
767
768          drm_intel_gem_bo_map_gtt(obj_surface->bo);
769          memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
770          drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
771     }
772
773     gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface);
774
775     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
776     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
777     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
778     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
779
780     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
781     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
782     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
783     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
784
785     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
786     bo = dri_bo_alloc(i965->intel.bufmgr,
787                       "intra row store",
788                       width_in_mbs * 64,
789                       0x1000);
790     assert(bo);
791     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
792     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
793
794     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
795     bo = dri_bo_alloc(i965->intel.bufmgr,
796                       "deblocking filter row store",
797                       width_in_mbs * 64 * 4,
798                       0x1000);
799     assert(bo);
800     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
801     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
802
803     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
804     bo = dri_bo_alloc(i965->intel.bufmgr,
805                       "bsd mpc row store",
806                       width_in_mbs * 64 * 2,
807                       0x1000);
808     assert(bo);
809     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
810     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
811
812     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
813     bo = dri_bo_alloc(i965->intel.bufmgr,
814                       "mpr row store",
815                       width_in_mbs * 64 * 2,
816                       0x1000);
817     assert(bo);
818     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
819     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
820
821     gen7_mfd_context->bitplane_read_buffer.valid = 0;
822 }
823
824 static void
825 gen7_mfd_avc_decode_picture(VADriverContextP ctx,
826                             struct decode_state *decode_state,
827                             struct gen7_mfd_context *gen7_mfd_context)
828 {
829     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
830     VAPictureParameterBufferH264 *pic_param;
831     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
832     dri_bo *slice_data_bo;
833     int i, j;
834
835     assert(decode_state->pic_param && decode_state->pic_param->buffer);
836     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
837     gen7_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
838
839     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
840     intel_batchbuffer_emit_mi_flush(batch);
841     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
842     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
843     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
844     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
845     gen7_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
846     gen7_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
847
848     for (j = 0; j < decode_state->num_slice_params; j++) {
849         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
850         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
851         slice_data_bo = decode_state->slice_datas[j]->bo;
852         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
853
854         if (j == decode_state->num_slice_params - 1)
855             next_slice_group_param = NULL;
856         else
857             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
858
859         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
860             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
861             assert((slice_param->slice_type == SLICE_TYPE_I) ||
862                    (slice_param->slice_type == SLICE_TYPE_SI) ||
863                    (slice_param->slice_type == SLICE_TYPE_P) ||
864                    (slice_param->slice_type == SLICE_TYPE_SP) ||
865                    (slice_param->slice_type == SLICE_TYPE_B));
866
867             if (i < decode_state->slice_params[j]->num_elements - 1)
868                 next_slice_param = slice_param + 1;
869             else
870                 next_slice_param = next_slice_group_param;
871
872             gen7_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
873             gen7_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
874             gen7_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
875             gen7_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
876             gen7_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
877             slice_param++;
878         }
879     }
880
881     intel_batchbuffer_end_atomic(batch);
882     intel_batchbuffer_flush(batch);
883 }
884
885 static void
886 gen7_mfd_mpeg2_decode_init(VADriverContextP ctx,
887                            struct decode_state *decode_state,
888                            struct gen7_mfd_context *gen7_mfd_context)
889 {
890     VAPictureParameterBufferMPEG2 *pic_param;
891     struct i965_driver_data *i965 = i965_driver_data(ctx);
892     struct object_surface *obj_surface;
893     dri_bo *bo;
894     unsigned int width_in_mbs;
895
896     assert(decode_state->pic_param && decode_state->pic_param->buffer);
897     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
898     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
899
900     mpeg2_set_reference_surfaces(
901         ctx,
902         gen7_mfd_context->reference_surface,
903         decode_state,
904         pic_param
905     );
906
907     /* Current decoded picture */
908     obj_surface = decode_state->render_object;
909     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
910
911     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
912     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
913     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
914     gen7_mfd_context->pre_deblocking_output.valid = 1;
915
916     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
917     bo = dri_bo_alloc(i965->intel.bufmgr,
918                       "bsd mpc row store",
919                       width_in_mbs * 96,
920                       0x1000);
921     assert(bo);
922     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
923     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
924
925     gen7_mfd_context->post_deblocking_output.valid = 0;
926     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
927     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
928     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
929     gen7_mfd_context->bitplane_read_buffer.valid = 0;
930 }
931
932 static void
933 gen7_mfd_mpeg2_pic_state(VADriverContextP ctx,
934                          struct decode_state *decode_state,
935                          struct gen7_mfd_context *gen7_mfd_context)
936 {
937     struct i965_driver_data * const i965 = i965_driver_data(ctx);
938     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
939     VAPictureParameterBufferMPEG2 *pic_param;
940     unsigned int slice_concealment_disable_bit = 0;
941
942     assert(decode_state->pic_param && decode_state->pic_param->buffer);
943     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
944
945     if (IS_HASWELL(i965->intel.device_id)) {
946         /* XXX: disable concealment for now */
947         slice_concealment_disable_bit = 1;
948     }
949
950     BEGIN_BCS_BATCH(batch, 13);
951     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
952     OUT_BCS_BATCH(batch,
953                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
954                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
955                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
956                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
957                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
958                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
959                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
960                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
961                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
962                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
963                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
964                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
965     OUT_BCS_BATCH(batch,
966                   pic_param->picture_coding_type << 9);
967     OUT_BCS_BATCH(batch,
968                   (slice_concealment_disable_bit << 31) |
969                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
970                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
971     OUT_BCS_BATCH(batch, 0);
972     OUT_BCS_BATCH(batch, 0);
973     OUT_BCS_BATCH(batch, 0);
974     OUT_BCS_BATCH(batch, 0);
975     OUT_BCS_BATCH(batch, 0);
976     OUT_BCS_BATCH(batch, 0);
977     OUT_BCS_BATCH(batch, 0);
978     OUT_BCS_BATCH(batch, 0);
979     OUT_BCS_BATCH(batch, 0);
980     ADVANCE_BCS_BATCH(batch);
981 }
982
983 static void
984 gen7_mfd_mpeg2_qm_state(VADriverContextP ctx,
985                         struct decode_state *decode_state,
986                         struct gen7_mfd_context *gen7_mfd_context)
987 {
988     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
989     int i, j;
990
991     /* Update internal QM state */
992     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
993         VAIQMatrixBufferMPEG2 * const iq_matrix =
994             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
995
996         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
997             iq_matrix->load_intra_quantiser_matrix) {
998             gen_iq_matrix->load_intra_quantiser_matrix =
999                 iq_matrix->load_intra_quantiser_matrix;
1000             if (iq_matrix->load_intra_quantiser_matrix) {
1001                 for (j = 0; j < 64; j++)
1002                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1003                         iq_matrix->intra_quantiser_matrix[j];
1004             }
1005         }
1006
1007         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1008             iq_matrix->load_non_intra_quantiser_matrix) {
1009             gen_iq_matrix->load_non_intra_quantiser_matrix =
1010                 iq_matrix->load_non_intra_quantiser_matrix;
1011             if (iq_matrix->load_non_intra_quantiser_matrix) {
1012                 for (j = 0; j < 64; j++)
1013                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1014                         iq_matrix->non_intra_quantiser_matrix[j];
1015             }
1016         }
1017     }
1018
1019     /* Commit QM state to HW */
1020     for (i = 0; i < 2; i++) {
1021         unsigned char *qm = NULL;
1022         int qm_type;
1023
1024         if (i == 0) {
1025             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1026                 qm = gen_iq_matrix->intra_quantiser_matrix;
1027                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1028             }
1029         } else {
1030             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1031                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1032                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1033             }
1034         }
1035
1036         if (!qm)
1037             continue;
1038
1039         gen7_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1040     }
1041 }
1042
1043 static void
1044 gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1045                           VAPictureParameterBufferMPEG2 *pic_param,
1046                           VASliceParameterBufferMPEG2 *slice_param,
1047                           VASliceParameterBufferMPEG2 *next_slice_param,
1048                           struct gen7_mfd_context *gen7_mfd_context)
1049 {
1050     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1051     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1052     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1053     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1054
1055     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1056         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1057         is_field_pic = 1;
1058     is_field_pic_wa = is_field_pic &&
1059         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1060
1061     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1062     hpos0 = slice_param->slice_horizontal_position;
1063
1064     if (next_slice_param == NULL) {
1065         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1066         hpos1 = 0;
1067     } else {
1068         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1069         hpos1 = next_slice_param->slice_horizontal_position;
1070     }
1071
1072     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1073
1074     BEGIN_BCS_BATCH(batch, 5);
1075     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1076     OUT_BCS_BATCH(batch, 
1077                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1078     OUT_BCS_BATCH(batch, 
1079                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1080     OUT_BCS_BATCH(batch,
1081                   hpos0 << 24 |
1082                   vpos0 << 16 |
1083                   mb_count << 8 |
1084                   (next_slice_param == NULL) << 5 |
1085                   (next_slice_param == NULL) << 3 |
1086                   (slice_param->macroblock_offset & 0x7));
1087     OUT_BCS_BATCH(batch,
1088                   (slice_param->quantiser_scale_code << 24) |
1089                   (IS_HASWELL(i965->intel.device_id) ? (vpos1 << 8 | hpos1) : 0));
1090     ADVANCE_BCS_BATCH(batch);
1091 }
1092
1093 static void
1094 gen7_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1095                               struct decode_state *decode_state,
1096                               struct gen7_mfd_context *gen7_mfd_context)
1097 {
1098     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1099     VAPictureParameterBufferMPEG2 *pic_param;
1100     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1101     dri_bo *slice_data_bo;
1102     int i, j;
1103
1104     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1105     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1106
1107     gen7_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1108     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1109     intel_batchbuffer_emit_mi_flush(batch);
1110     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1111     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1112     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1113     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1114     gen7_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1115     gen7_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1116
1117     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1118         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1119             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1120
1121     for (j = 0; j < decode_state->num_slice_params; j++) {
1122         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1123         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1124         slice_data_bo = decode_state->slice_datas[j]->bo;
1125         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1126
1127         if (j == decode_state->num_slice_params - 1)
1128             next_slice_group_param = NULL;
1129         else
1130             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1131
1132         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1133             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1134
1135             if (i < decode_state->slice_params[j]->num_elements - 1)
1136                 next_slice_param = slice_param + 1;
1137             else
1138                 next_slice_param = next_slice_group_param;
1139
1140             gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1141             slice_param++;
1142         }
1143     }
1144
1145     intel_batchbuffer_end_atomic(batch);
1146     intel_batchbuffer_flush(batch);
1147 }
1148
1149 static const int va_to_gen7_vc1_pic_type[5] = {
1150     GEN7_VC1_I_PICTURE,
1151     GEN7_VC1_P_PICTURE,
1152     GEN7_VC1_B_PICTURE,
1153     GEN7_VC1_BI_PICTURE,
1154     GEN7_VC1_P_PICTURE,
1155 };
1156
1157 static const int va_to_gen7_vc1_mv[4] = {
1158     1, /* 1-MV */
1159     2, /* 1-MV half-pel */
1160     3, /* 1-MV half-pef bilinear */
1161     0, /* Mixed MV */
1162 };
1163
1164 static const int b_picture_scale_factor[21] = {
1165     128, 85,  170, 64,  192,
1166     51,  102, 153, 204, 43,
1167     215, 37,  74,  111, 148,
1168     185, 222, 32,  96,  160, 
1169     224,
1170 };
1171
1172 static const int va_to_gen7_vc1_condover[3] = {
1173     0,
1174     2,
1175     3
1176 };
1177
1178 static const int va_to_gen7_vc1_profile[4] = {
1179     GEN7_VC1_SIMPLE_PROFILE,
1180     GEN7_VC1_MAIN_PROFILE,
1181     GEN7_VC1_RESERVED_PROFILE,
1182     GEN7_VC1_ADVANCED_PROFILE
1183 };
1184
1185 static void 
1186 gen7_mfd_free_vc1_surface(void **data)
1187 {
1188     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1189
1190     if (!gen7_vc1_surface)
1191         return;
1192
1193     dri_bo_unreference(gen7_vc1_surface->dmv);
1194     free(gen7_vc1_surface);
1195     *data = NULL;
1196 }
1197
1198 static void
1199 gen7_mfd_init_vc1_surface(VADriverContextP ctx, 
1200                           VAPictureParameterBufferVC1 *pic_param,
1201                           struct object_surface *obj_surface)
1202 {
1203     struct i965_driver_data *i965 = i965_driver_data(ctx);
1204     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1205     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1206     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1207
1208     obj_surface->free_private_data = gen7_mfd_free_vc1_surface;
1209
1210     if (!gen7_vc1_surface) {
1211         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1212         assert((obj_surface->size & 0x3f) == 0);
1213         obj_surface->private_data = gen7_vc1_surface;
1214     }
1215
1216     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1217
1218     if (gen7_vc1_surface->dmv == NULL) {
1219         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1220                                              "direct mv w/r buffer",
1221                                              width_in_mbs * height_in_mbs * 64,
1222                                              0x1000);
1223     }
1224 }
1225
1226 static void
1227 gen7_mfd_vc1_decode_init(VADriverContextP ctx,
1228                          struct decode_state *decode_state,
1229                          struct gen7_mfd_context *gen7_mfd_context)
1230 {
1231     VAPictureParameterBufferVC1 *pic_param;
1232     struct i965_driver_data *i965 = i965_driver_data(ctx);
1233     struct object_surface *obj_surface;
1234     dri_bo *bo;
1235     int width_in_mbs;
1236     int picture_type;
1237  
1238     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1239     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1240     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1241     picture_type = pic_param->picture_fields.bits.picture_type;
1242  
1243     intel_update_vc1_frame_store_index(ctx,
1244                                        decode_state,
1245                                        pic_param,
1246                                        gen7_mfd_context->reference_surface);
1247
1248     /* Current decoded picture */
1249     obj_surface = decode_state->render_object;
1250     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1251     gen7_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1252
1253     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1254     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1255     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1256     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1257
1258     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1259     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1260     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1261     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1262
1263     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1264     bo = dri_bo_alloc(i965->intel.bufmgr,
1265                       "intra row store",
1266                       width_in_mbs * 64,
1267                       0x1000);
1268     assert(bo);
1269     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1270     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1271
1272     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1273     bo = dri_bo_alloc(i965->intel.bufmgr,
1274                       "deblocking filter row store",
1275                       width_in_mbs * 7 * 64,
1276                       0x1000);
1277     assert(bo);
1278     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1279     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1280
1281     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1282     bo = dri_bo_alloc(i965->intel.bufmgr,
1283                       "bsd mpc row store",
1284                       width_in_mbs * 96,
1285                       0x1000);
1286     assert(bo);
1287     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1288     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1289
1290     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1291
1292     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1293     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1294     
1295     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1296         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1297         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1298         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1299         int src_w, src_h;
1300         uint8_t *src = NULL, *dst = NULL;
1301
1302         assert(decode_state->bit_plane->buffer);
1303         src = decode_state->bit_plane->buffer;
1304
1305         bo = dri_bo_alloc(i965->intel.bufmgr,
1306                           "VC-1 Bitplane",
1307                           bitplane_width * height_in_mbs,
1308                           0x1000);
1309         assert(bo);
1310         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1311
1312         dri_bo_map(bo, True);
1313         assert(bo->virtual);
1314         dst = bo->virtual;
1315
1316         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1317             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1318                 int src_index, dst_index;
1319                 int src_shift;
1320                 uint8_t src_value;
1321
1322                 src_index = (src_h * width_in_mbs + src_w) / 2;
1323                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1324                 src_value = ((src[src_index] >> src_shift) & 0xf);
1325
1326                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1327                     src_value |= 0x2;
1328                 }
1329
1330                 dst_index = src_w / 2;
1331                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1332             }
1333
1334             if (src_w & 1)
1335                 dst[src_w / 2] >>= 4;
1336
1337             dst += bitplane_width;
1338         }
1339
1340         dri_bo_unmap(bo);
1341     } else
1342         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1343 }
1344
1345 static void
1346 gen7_mfd_vc1_pic_state(VADriverContextP ctx,
1347                        struct decode_state *decode_state,
1348                        struct gen7_mfd_context *gen7_mfd_context)
1349 {
1350     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1351     VAPictureParameterBufferVC1 *pic_param;
1352     struct object_surface *obj_surface;
1353     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1354     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1355     int unified_mv_mode;
1356     int ref_field_pic_polarity = 0;
1357     int scale_factor = 0;
1358     int trans_ac_y = 0;
1359     int dmv_surface_valid = 0;
1360     int brfd = 0;
1361     int fcm = 0;
1362     int picture_type;
1363     int profile;
1364     int overlap;
1365     int interpolation_mode = 0;
1366
1367     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1368     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1369
1370     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1371     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1372     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1373     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1374     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1375     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1376     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1377     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1378
1379     if (dquant == 0) {
1380         alt_pquant_config = 0;
1381         alt_pquant_edge_mask = 0;
1382     } else if (dquant == 2) {
1383         alt_pquant_config = 1;
1384         alt_pquant_edge_mask = 0xf;
1385     } else {
1386         assert(dquant == 1);
1387         if (dquantfrm == 0) {
1388             alt_pquant_config = 0;
1389             alt_pquant_edge_mask = 0;
1390             alt_pq = 0;
1391         } else {
1392             assert(dquantfrm == 1);
1393             alt_pquant_config = 1;
1394
1395             switch (dqprofile) {
1396             case 3:
1397                 if (dqbilevel == 0) {
1398                     alt_pquant_config = 2;
1399                     alt_pquant_edge_mask = 0;
1400                 } else {
1401                     assert(dqbilevel == 1);
1402                     alt_pquant_config = 3;
1403                     alt_pquant_edge_mask = 0;
1404                 }
1405                 break;
1406                 
1407             case 0:
1408                 alt_pquant_edge_mask = 0xf;
1409                 break;
1410
1411             case 1:
1412                 if (dqdbedge == 3)
1413                     alt_pquant_edge_mask = 0x9;
1414                 else
1415                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1416
1417                 break;
1418
1419             case 2:
1420                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1421                 break;
1422
1423             default:
1424                 assert(0);
1425             }
1426         }
1427     }
1428
1429     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1430         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1431         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1432     } else {
1433         assert(pic_param->mv_fields.bits.mv_mode < 4);
1434         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1435     }
1436
1437     if (pic_param->sequence_fields.bits.interlace == 1 &&
1438         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1439         /* FIXME: calculate reference field picture polarity */
1440         assert(0);
1441         ref_field_pic_polarity = 0;
1442     }
1443
1444     if (pic_param->b_picture_fraction < 21)
1445         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1446
1447     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1448     
1449     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1450         picture_type == GEN7_VC1_I_PICTURE)
1451         picture_type = GEN7_VC1_BI_PICTURE;
1452
1453     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1454         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1455     else {
1456         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1457         /*
1458          * 8.3.6.2.1 Transform Type Selection
1459          * If variable-sized transform coding is not enabled,
1460          * then the 8x8 transform shall be used for all blocks.
1461          * it is also MFX_VC1_PIC_STATE requirement.
1462          */
1463         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1464             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1465             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1466         }
1467     }
1468
1469
1470     if (picture_type == GEN7_VC1_B_PICTURE) {
1471         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1472
1473         obj_surface = decode_state->reference_objects[1];
1474
1475         if (obj_surface)
1476             gen7_vc1_surface = obj_surface->private_data;
1477
1478         if (!gen7_vc1_surface || 
1479             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1480              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1481             dmv_surface_valid = 0;
1482         else
1483             dmv_surface_valid = 1;
1484     }
1485
1486     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1487
1488     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1489         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1490     else {
1491         if (pic_param->picture_fields.bits.top_field_first)
1492             fcm = 2;
1493         else
1494             fcm = 3;
1495     }
1496
1497     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1498         brfd = pic_param->reference_fields.bits.reference_distance;
1499         brfd = (scale_factor * brfd) >> 8;
1500         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1501
1502         if (brfd < 0)
1503             brfd = 0;
1504     }
1505
1506     overlap = 0;
1507     if (profile != GEN7_VC1_ADVANCED_PROFILE){
1508         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1509             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1510             overlap = 1; 
1511         }
1512     }else {
1513         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1514              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1515               overlap = 1; 
1516         }
1517         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1518             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1519              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1520                 overlap = 1; 
1521              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1522                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1523                  overlap = 1;
1524              }
1525         }
1526     } 
1527
1528     assert(pic_param->conditional_overlap_flag < 3);
1529     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1530
1531     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1532         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1533          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1534         interpolation_mode = 9; /* Half-pel bilinear */
1535     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1536              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1537               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1538         interpolation_mode = 1; /* Half-pel bicubic */
1539     else
1540         interpolation_mode = 0; /* Quarter-pel bicubic */
1541
1542     BEGIN_BCS_BATCH(batch, 6);
1543     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1544     OUT_BCS_BATCH(batch,
1545                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1546                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1547     OUT_BCS_BATCH(batch,
1548                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1549                   dmv_surface_valid << 15 |
1550                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1551                   pic_param->rounding_control << 13 |
1552                   pic_param->sequence_fields.bits.syncmarker << 12 |
1553                   interpolation_mode << 8 |
1554                   0 << 7 | /* FIXME: scale up or down ??? */
1555                   pic_param->range_reduction_frame << 6 |
1556                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1557                   overlap << 4 |
1558                   !pic_param->picture_fields.bits.is_first_field << 3 |
1559                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1560     OUT_BCS_BATCH(batch,
1561                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1562                   picture_type << 26 |
1563                   fcm << 24 |
1564                   alt_pq << 16 |
1565                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1566                   scale_factor << 0);
1567     OUT_BCS_BATCH(batch,
1568                   unified_mv_mode << 28 |
1569                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1570                   pic_param->fast_uvmc_flag << 26 |
1571                   ref_field_pic_polarity << 25 |
1572                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1573                   pic_param->reference_fields.bits.reference_distance << 20 |
1574                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1575                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1576                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1577                   alt_pquant_edge_mask << 4 |
1578                   alt_pquant_config << 2 |
1579                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1580                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1581     OUT_BCS_BATCH(batch,
1582                   !!pic_param->bitplane_present.value << 31 |
1583                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1584                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1585                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1586                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1587                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1588                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1589                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1590                   pic_param->mv_fields.bits.mv_table << 20 |
1591                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1592                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1593                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1594                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1595                   pic_param->mb_mode_table << 8 |
1596                   trans_ac_y << 6 |
1597                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1598                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1599                   pic_param->cbp_table << 0);
1600     ADVANCE_BCS_BATCH(batch);
1601 }
1602
1603 static void
1604 gen7_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1605                              struct decode_state *decode_state,
1606                              struct gen7_mfd_context *gen7_mfd_context)
1607 {
1608     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1609     VAPictureParameterBufferVC1 *pic_param;
1610     int intensitycomp_single;
1611
1612     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1613     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1614
1615     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1616     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1617     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1618
1619     BEGIN_BCS_BATCH(batch, 6);
1620     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1621     OUT_BCS_BATCH(batch,
1622                   0 << 14 | /* FIXME: double ??? */
1623                   0 << 12 |
1624                   intensitycomp_single << 10 |
1625                   intensitycomp_single << 8 |
1626                   0 << 4 | /* FIXME: interlace mode */
1627                   0);
1628     OUT_BCS_BATCH(batch,
1629                   pic_param->luma_shift << 16 |
1630                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1631     OUT_BCS_BATCH(batch, 0);
1632     OUT_BCS_BATCH(batch, 0);
1633     OUT_BCS_BATCH(batch, 0);
1634     ADVANCE_BCS_BATCH(batch);
1635 }
1636
1637
1638 static void
1639 gen7_mfd_vc1_directmode_state(VADriverContextP ctx,
1640                               struct decode_state *decode_state,
1641                               struct gen7_mfd_context *gen7_mfd_context)
1642 {
1643     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1644     struct object_surface *obj_surface;
1645     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1646
1647     obj_surface = decode_state->render_object;
1648
1649     if (obj_surface && obj_surface->private_data) {
1650         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1651     }
1652
1653     obj_surface = decode_state->reference_objects[1];
1654
1655     if (obj_surface && obj_surface->private_data) {
1656         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1657     }
1658
1659     BEGIN_BCS_BATCH(batch, 3);
1660     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
1661
1662     if (dmv_write_buffer)
1663         OUT_BCS_RELOC(batch, dmv_write_buffer,
1664                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1665                       0);
1666     else
1667         OUT_BCS_BATCH(batch, 0);
1668
1669     if (dmv_read_buffer)
1670         OUT_BCS_RELOC(batch, dmv_read_buffer,
1671                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1672                       0);
1673     else
1674         OUT_BCS_BATCH(batch, 0);
1675                   
1676     ADVANCE_BCS_BATCH(batch);
1677 }
1678
1679 static int
1680 gen7_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1681 {
1682     int out_slice_data_bit_offset;
1683     int slice_header_size = in_slice_data_bit_offset / 8;
1684     int i, j;
1685
1686     if (profile != 3)
1687         out_slice_data_bit_offset = in_slice_data_bit_offset;
1688     else {
1689         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1690             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1691                 i++, j += 2;
1692             }
1693         }
1694
1695         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1696     }
1697
1698     return out_slice_data_bit_offset;
1699 }
1700
1701 static void
1702 gen7_mfd_vc1_bsd_object(VADriverContextP ctx,
1703                         VAPictureParameterBufferVC1 *pic_param,
1704                         VASliceParameterBufferVC1 *slice_param,
1705                         VASliceParameterBufferVC1 *next_slice_param,
1706                         dri_bo *slice_data_bo,
1707                         struct gen7_mfd_context *gen7_mfd_context)
1708 {
1709     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1710     int next_slice_start_vert_pos;
1711     int macroblock_offset;
1712     uint8_t *slice_data = NULL;
1713
1714     dri_bo_map(slice_data_bo, 0);
1715     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1716     macroblock_offset = gen7_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1717                                                                slice_param->macroblock_offset,
1718                                                                pic_param->sequence_fields.bits.profile);
1719     dri_bo_unmap(slice_data_bo);
1720
1721     if (next_slice_param)
1722         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1723     else
1724         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1725
1726     BEGIN_BCS_BATCH(batch, 5);
1727     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1728     OUT_BCS_BATCH(batch, 
1729                   slice_param->slice_data_size - (macroblock_offset >> 3));
1730     OUT_BCS_BATCH(batch, 
1731                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1732     OUT_BCS_BATCH(batch,
1733                   slice_param->slice_vertical_position << 16 |
1734                   next_slice_start_vert_pos << 0);
1735     OUT_BCS_BATCH(batch,
1736                   (macroblock_offset & 0x7));
1737     ADVANCE_BCS_BATCH(batch);
1738 }
1739
1740 static void
1741 gen7_mfd_vc1_decode_picture(VADriverContextP ctx,
1742                             struct decode_state *decode_state,
1743                             struct gen7_mfd_context *gen7_mfd_context)
1744 {
1745     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1746     VAPictureParameterBufferVC1 *pic_param;
1747     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1748     dri_bo *slice_data_bo;
1749     int i, j;
1750
1751     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1752     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1753
1754     gen7_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1755     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1756     intel_batchbuffer_emit_mi_flush(batch);
1757     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1758     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1759     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1760     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1761     gen7_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1762     gen7_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1763     gen7_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1764
1765     for (j = 0; j < decode_state->num_slice_params; j++) {
1766         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1767         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1768         slice_data_bo = decode_state->slice_datas[j]->bo;
1769         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1770
1771         if (j == decode_state->num_slice_params - 1)
1772             next_slice_group_param = NULL;
1773         else
1774             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1775
1776         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1777             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1778
1779             if (i < decode_state->slice_params[j]->num_elements - 1)
1780                 next_slice_param = slice_param + 1;
1781             else
1782                 next_slice_param = next_slice_group_param;
1783
1784             gen7_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1785             slice_param++;
1786         }
1787     }
1788
1789     intel_batchbuffer_end_atomic(batch);
1790     intel_batchbuffer_flush(batch);
1791 }
1792
1793 static void
1794 gen7_mfd_jpeg_decode_init(VADriverContextP ctx,
1795                           struct decode_state *decode_state,
1796                           struct gen7_mfd_context *gen7_mfd_context)
1797 {
1798     struct object_surface *obj_surface;
1799     VAPictureParameterBufferJPEGBaseline *pic_param;
1800     int subsampling = SUBSAMPLE_YUV420;
1801     int fourcc = VA_FOURCC('I', 'M', 'C', '3');
1802
1803     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1804
1805     if (pic_param->num_components == 1) {
1806         subsampling = SUBSAMPLE_YUV400;
1807         fourcc = VA_FOURCC('Y', '8', '0', '0');
1808     } else if (pic_param->num_components == 3) {
1809         int h1 = pic_param->components[0].h_sampling_factor;
1810         int h2 = pic_param->components[1].h_sampling_factor;
1811         int h3 = pic_param->components[2].h_sampling_factor;
1812         int v1 = pic_param->components[0].v_sampling_factor;
1813         int v2 = pic_param->components[1].v_sampling_factor;
1814         int v3 = pic_param->components[2].v_sampling_factor;
1815
1816         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1817             v1 == 2 && v2 == 1 && v3 == 1) {
1818             subsampling = SUBSAMPLE_YUV420;
1819             fourcc = VA_FOURCC('I', 'M', 'C', '3');
1820         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1821                    v1 == 1 && v2 == 1 && v3 == 1) {
1822             subsampling = SUBSAMPLE_YUV422H;
1823             fourcc = VA_FOURCC('4', '2', '2', 'H');
1824         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1825                    v1 == 1 && v2 == 1 && v3 == 1) {
1826             subsampling = SUBSAMPLE_YUV444;
1827             fourcc = VA_FOURCC('4', '4', '4', 'P');
1828         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1829                    v1 == 1 && v2 == 1 && v3 == 1) {
1830             subsampling = SUBSAMPLE_YUV411;
1831             fourcc = VA_FOURCC('4', '1', '1', 'P');
1832         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1833                    v1 == 2 && v2 == 1 && v3 == 1) {
1834             subsampling = SUBSAMPLE_YUV422V;
1835             fourcc = VA_FOURCC('4', '2', '2', 'V');
1836         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1837                    v1 == 2 && v2 == 2 && v3 == 2) {
1838             subsampling = SUBSAMPLE_YUV422H;
1839             fourcc = VA_FOURCC('4', '2', '2', 'H');
1840         } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1841                    v1 == 2 && v2 == 1 && v3 == 1) {
1842             subsampling = SUBSAMPLE_YUV422V;
1843             fourcc = VA_FOURCC('4', '2', '2', 'V');
1844         } else
1845             assert(0);
1846     } else {
1847         assert(0);
1848     }
1849
1850     /* Current decoded picture */
1851     obj_surface = decode_state->render_object;
1852     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1853
1854     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1855     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1856     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1857     gen7_mfd_context->pre_deblocking_output.valid = 1;
1858
1859     gen7_mfd_context->post_deblocking_output.bo = NULL;
1860     gen7_mfd_context->post_deblocking_output.valid = 0;
1861
1862     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1863     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1864
1865     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1866     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1867
1868     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1869     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1870
1871     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1872     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1873
1874     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1875     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1876 }
1877
1878 static const int va_to_gen7_jpeg_rotation[4] = {
1879     GEN7_JPEG_ROTATION_0,
1880     GEN7_JPEG_ROTATION_90,
1881     GEN7_JPEG_ROTATION_180,
1882     GEN7_JPEG_ROTATION_270
1883 };
1884
1885 static void
1886 gen7_mfd_jpeg_pic_state(VADriverContextP ctx,
1887                         struct decode_state *decode_state,
1888                         struct gen7_mfd_context *gen7_mfd_context)
1889 {
1890     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1891     VAPictureParameterBufferJPEGBaseline *pic_param;
1892     int chroma_type = GEN7_YUV420;
1893     int frame_width_in_blks;
1894     int frame_height_in_blks;
1895
1896     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1897     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1898
1899     if (pic_param->num_components == 1)
1900         chroma_type = GEN7_YUV400;
1901     else if (pic_param->num_components == 3) {
1902         int h1 = pic_param->components[0].h_sampling_factor;
1903         int h2 = pic_param->components[1].h_sampling_factor;
1904         int h3 = pic_param->components[2].h_sampling_factor;
1905         int v1 = pic_param->components[0].v_sampling_factor;
1906         int v2 = pic_param->components[1].v_sampling_factor;
1907         int v3 = pic_param->components[2].v_sampling_factor;
1908
1909         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1910             v1 == 2 && v2 == 1 && v3 == 1)
1911             chroma_type = GEN7_YUV420;
1912         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1913                  v1 == 1 && v2 == 1 && v3 == 1)
1914             chroma_type = GEN7_YUV422H_2Y;
1915         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1916                  v1 == 1 && v2 == 1 && v3 == 1)
1917             chroma_type = GEN7_YUV444;
1918         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1919                  v1 == 1 && v2 == 1 && v3 == 1)
1920             chroma_type = GEN7_YUV411;
1921         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1922                  v1 == 2 && v2 == 1 && v3 == 1)
1923             chroma_type = GEN7_YUV422V_2Y;
1924         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1925                  v1 == 2 && v2 == 2 && v3 == 2)
1926             chroma_type = GEN7_YUV422H_4Y;
1927         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1928                  v1 == 2 && v2 == 1 && v3 == 1)
1929             chroma_type = GEN7_YUV422V_4Y;
1930         else
1931             assert(0);
1932     }
1933
1934     if (chroma_type == GEN7_YUV400 ||
1935         chroma_type == GEN7_YUV444 ||
1936         chroma_type == GEN7_YUV422V_2Y) {
1937         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
1938         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
1939     } else if (chroma_type == GEN7_YUV411) {
1940         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
1941         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
1942     } else {
1943         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
1944         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
1945     }
1946
1947     BEGIN_BCS_BATCH(batch, 3);
1948     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
1949     OUT_BCS_BATCH(batch,
1950                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
1951                   (chroma_type << 0));
1952     OUT_BCS_BATCH(batch,
1953                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
1954                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
1955     ADVANCE_BCS_BATCH(batch);
1956 }
1957
1958 static const int va_to_gen7_jpeg_hufftable[2] = {
1959     MFX_HUFFTABLE_ID_Y,
1960     MFX_HUFFTABLE_ID_UV
1961 };
1962
1963 static void
1964 gen7_mfd_jpeg_huff_table_state(VADriverContextP ctx,
1965                                struct decode_state *decode_state,
1966                                struct gen7_mfd_context *gen7_mfd_context,
1967                                int num_tables)
1968 {
1969     VAHuffmanTableBufferJPEGBaseline *huffman_table;
1970     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1971     int index;
1972
1973     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
1974         return;
1975
1976     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
1977
1978     for (index = 0; index < num_tables; index++) {
1979         int id = va_to_gen7_jpeg_hufftable[index];
1980         if (!huffman_table->load_huffman_table[index])
1981             continue;
1982         BEGIN_BCS_BATCH(batch, 53);
1983         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
1984         OUT_BCS_BATCH(batch, id);
1985         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
1986         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
1987         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
1988         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
1989         ADVANCE_BCS_BATCH(batch);
1990     }
1991 }
1992
1993 static const int va_to_gen7_jpeg_qm[5] = {
1994     -1,
1995     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
1996     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
1997     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
1998     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
1999 };
2000
2001 static void
2002 gen7_mfd_jpeg_qm_state(VADriverContextP ctx,
2003                        struct decode_state *decode_state,
2004                        struct gen7_mfd_context *gen7_mfd_context)
2005 {
2006     VAPictureParameterBufferJPEGBaseline *pic_param;
2007     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2008     int index;
2009
2010     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2011         return;
2012
2013     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2014     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2015
2016     assert(pic_param->num_components <= 3);
2017
2018     for (index = 0; index < pic_param->num_components; index++) {
2019         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2020         int qm_type;
2021         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2022         unsigned char raster_qm[64];
2023         int j;
2024
2025         if (id > 4 || id < 1)
2026             continue;
2027
2028         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2029             continue;
2030
2031         qm_type = va_to_gen7_jpeg_qm[id];
2032
2033         for (j = 0; j < 64; j++)
2034             raster_qm[zigzag_direct[j]] = qm[j];
2035
2036         gen7_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2037     }
2038 }
2039
2040 static void
2041 gen7_mfd_jpeg_bsd_object(VADriverContextP ctx,
2042                          VAPictureParameterBufferJPEGBaseline *pic_param,
2043                          VASliceParameterBufferJPEGBaseline *slice_param,
2044                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2045                          dri_bo *slice_data_bo,
2046                          struct gen7_mfd_context *gen7_mfd_context)
2047 {
2048     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2049     int scan_component_mask = 0;
2050     int i;
2051
2052     assert(slice_param->num_components > 0);
2053     assert(slice_param->num_components < 4);
2054     assert(slice_param->num_components <= pic_param->num_components);
2055
2056     for (i = 0; i < slice_param->num_components; i++) {
2057         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2058         case 1:
2059             scan_component_mask |= (1 << 0);
2060             break;
2061         case 2:
2062             scan_component_mask |= (1 << 1);
2063             break;
2064         case 3:
2065             scan_component_mask |= (1 << 2);
2066             break;
2067         default:
2068             assert(0);
2069             break;
2070         }
2071     }
2072
2073     BEGIN_BCS_BATCH(batch, 6);
2074     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2075     OUT_BCS_BATCH(batch, 
2076                   slice_param->slice_data_size);
2077     OUT_BCS_BATCH(batch, 
2078                   slice_param->slice_data_offset);
2079     OUT_BCS_BATCH(batch,
2080                   slice_param->slice_horizontal_position << 16 |
2081                   slice_param->slice_vertical_position << 0);
2082     OUT_BCS_BATCH(batch,
2083                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2084                   (scan_component_mask << 27) |                 /* scan components */
2085                   (0 << 26) |   /* disable interrupt allowed */
2086                   (slice_param->num_mcus << 0));                /* MCU count */
2087     OUT_BCS_BATCH(batch,
2088                   (slice_param->restart_interval << 0));    /* RestartInterval */
2089     ADVANCE_BCS_BATCH(batch);
2090 }
2091
2092 /* Workaround for JPEG decoding on Ivybridge */
2093
2094 VAStatus 
2095 i965_DestroySurfaces(VADriverContextP ctx,
2096                      VASurfaceID *surface_list,
2097                      int num_surfaces);
2098 VAStatus 
2099 i965_CreateSurfaces(VADriverContextP ctx,
2100                     int width,
2101                     int height,
2102                     int format,
2103                     int num_surfaces,
2104                     VASurfaceID *surfaces);
2105
2106 static struct {
2107     int width;
2108     int height;
2109     unsigned char data[32];
2110     int data_size;
2111     int data_bit_offset;
2112     int qp;
2113 } gen7_jpeg_wa_clip = {
2114     16,
2115     16,
2116     {
2117         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2118         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2119     },
2120     14,
2121     40,
2122     28,
2123 };
2124
2125 static void
2126 gen7_jpeg_wa_init(VADriverContextP ctx,
2127                   struct gen7_mfd_context *gen7_mfd_context)
2128 {
2129     struct i965_driver_data *i965 = i965_driver_data(ctx);
2130     VAStatus status;
2131     struct object_surface *obj_surface;
2132
2133     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2134         i965_DestroySurfaces(ctx,
2135                              &gen7_mfd_context->jpeg_wa_surface_id,
2136                              1);
2137
2138     status = i965_CreateSurfaces(ctx,
2139                                  gen7_jpeg_wa_clip.width,
2140                                  gen7_jpeg_wa_clip.height,
2141                                  VA_RT_FORMAT_YUV420,
2142                                  1,
2143                                  &gen7_mfd_context->jpeg_wa_surface_id);
2144     assert(status == VA_STATUS_SUCCESS);
2145
2146     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2147     assert(obj_surface);
2148     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
2149     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2150
2151     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2152         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2153                                                                "JPEG WA data",
2154                                                                0x1000,
2155                                                                0x1000);
2156         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2157                        0,
2158                        gen7_jpeg_wa_clip.data_size,
2159                        gen7_jpeg_wa_clip.data);
2160     }
2161 }
2162
2163 static void
2164 gen7_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2165                               struct gen7_mfd_context *gen7_mfd_context)
2166 {
2167     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2168
2169     BEGIN_BCS_BATCH(batch, 5);
2170     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2171     OUT_BCS_BATCH(batch,
2172                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2173                   (MFD_MODE_VLD << 15) | /* VLD mode */
2174                   (0 << 10) | /* disable Stream-Out */
2175                   (0 << 9)  | /* Post Deblocking Output */
2176                   (1 << 8)  | /* Pre Deblocking Output */
2177                   (0 << 5)  | /* not in stitch mode */
2178                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2179                   (MFX_FORMAT_AVC << 0));
2180     OUT_BCS_BATCH(batch,
2181                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2182                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2183                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2184                   (0 << 1)  |
2185                   (0 << 0));
2186     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2187     OUT_BCS_BATCH(batch, 0); /* reserved */
2188     ADVANCE_BCS_BATCH(batch);
2189 }
2190
2191 static void
2192 gen7_jpeg_wa_surface_state(VADriverContextP ctx,
2193                            struct gen7_mfd_context *gen7_mfd_context)
2194 {
2195     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2196     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2197
2198     BEGIN_BCS_BATCH(batch, 6);
2199     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2200     OUT_BCS_BATCH(batch, 0);
2201     OUT_BCS_BATCH(batch,
2202                   ((obj_surface->orig_width - 1) << 18) |
2203                   ((obj_surface->orig_height - 1) << 4));
2204     OUT_BCS_BATCH(batch,
2205                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2206                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2207                   (0 << 22) | /* surface object control state, ignored */
2208                   ((obj_surface->width - 1) << 3) | /* pitch */
2209                   (0 << 2)  | /* must be 0 */
2210                   (1 << 1)  | /* must be tiled */
2211                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2212     OUT_BCS_BATCH(batch,
2213                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2214                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2215     OUT_BCS_BATCH(batch,
2216                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2217                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2218     ADVANCE_BCS_BATCH(batch);
2219 }
2220
2221 static void
2222 gen7_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2223                                  struct gen7_mfd_context *gen7_mfd_context)
2224 {
2225     struct i965_driver_data *i965 = i965_driver_data(ctx);
2226     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2227     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2228     dri_bo *intra_bo;
2229     int i;
2230
2231     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2232                             "intra row store",
2233                             128 * 64,
2234                             0x1000);
2235
2236     BEGIN_BCS_BATCH(batch, 24);
2237     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
2238     OUT_BCS_RELOC(batch,
2239                   obj_surface->bo,
2240                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2241                   0);
2242     
2243     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2244
2245     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2246     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2247
2248     OUT_BCS_RELOC(batch,
2249                   intra_bo,
2250                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2251                   0);
2252
2253     OUT_BCS_BATCH(batch, 0);
2254
2255     /* DW 7..22 */
2256     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2257         OUT_BCS_BATCH(batch, 0);
2258     }
2259
2260     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
2261     ADVANCE_BCS_BATCH(batch);
2262
2263     dri_bo_unreference(intra_bo);
2264 }
2265
2266 static void
2267 gen7_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2268                                      struct gen7_mfd_context *gen7_mfd_context)
2269 {
2270     struct i965_driver_data *i965 = i965_driver_data(ctx);
2271     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2272     dri_bo *bsd_mpc_bo, *mpr_bo;
2273
2274     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2275                               "bsd mpc row store",
2276                               11520, /* 1.5 * 120 * 64 */
2277                               0x1000);
2278
2279     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2280                           "mpr row store",
2281                           7680, /* 1. 0 * 120 * 64 */
2282                           0x1000);
2283
2284     BEGIN_BCS_BATCH(batch, 4);
2285     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
2286
2287     OUT_BCS_RELOC(batch,
2288                   bsd_mpc_bo,
2289                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2290                   0);
2291
2292     OUT_BCS_RELOC(batch,
2293                   mpr_bo,
2294                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2295                   0);
2296     OUT_BCS_BATCH(batch, 0);
2297
2298     ADVANCE_BCS_BATCH(batch);
2299
2300     dri_bo_unreference(bsd_mpc_bo);
2301     dri_bo_unreference(mpr_bo);
2302 }
2303
2304 static void
2305 gen7_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2306                           struct gen7_mfd_context *gen7_mfd_context)
2307 {
2308
2309 }
2310
2311 static void
2312 gen7_jpeg_wa_avc_img_state(VADriverContextP ctx,
2313                            struct gen7_mfd_context *gen7_mfd_context)
2314 {
2315     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2316     int img_struct = 0;
2317     int mbaff_frame_flag = 0;
2318     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2319
2320     BEGIN_BCS_BATCH(batch, 16);
2321     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2322     OUT_BCS_BATCH(batch, 
2323                   (width_in_mbs * height_in_mbs - 1));
2324     OUT_BCS_BATCH(batch, 
2325                   ((height_in_mbs - 1) << 16) | 
2326                   ((width_in_mbs - 1) << 0));
2327     OUT_BCS_BATCH(batch, 
2328                   (0 << 24) |
2329                   (0 << 16) |
2330                   (0 << 14) |
2331                   (0 << 13) |
2332                   (0 << 12) | /* differ from GEN6 */
2333                   (0 << 10) |
2334                   (img_struct << 8));
2335     OUT_BCS_BATCH(batch,
2336                   (1 << 10) | /* 4:2:0 */
2337                   (1 << 7) |  /* CABAC */
2338                   (0 << 6) |
2339                   (0 << 5) |
2340                   (0 << 4) |
2341                   (0 << 3) |
2342                   (1 << 2) |
2343                   (mbaff_frame_flag << 1) |
2344                   (0 << 0));
2345     OUT_BCS_BATCH(batch, 0);
2346     OUT_BCS_BATCH(batch, 0);
2347     OUT_BCS_BATCH(batch, 0);
2348     OUT_BCS_BATCH(batch, 0);
2349     OUT_BCS_BATCH(batch, 0);
2350     OUT_BCS_BATCH(batch, 0);
2351     OUT_BCS_BATCH(batch, 0);
2352     OUT_BCS_BATCH(batch, 0);
2353     OUT_BCS_BATCH(batch, 0);
2354     OUT_BCS_BATCH(batch, 0);
2355     OUT_BCS_BATCH(batch, 0);
2356     ADVANCE_BCS_BATCH(batch);
2357 }
2358
2359 static void
2360 gen7_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2361                                   struct gen7_mfd_context *gen7_mfd_context)
2362 {
2363     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2364     int i;
2365
2366     BEGIN_BCS_BATCH(batch, 69);
2367     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
2368
2369     /* reference surfaces 0..15 */
2370     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2371         OUT_BCS_BATCH(batch, 0); /* top */
2372         OUT_BCS_BATCH(batch, 0); /* bottom */
2373     }
2374
2375     /* the current decoding frame/field */
2376     OUT_BCS_BATCH(batch, 0); /* top */
2377     OUT_BCS_BATCH(batch, 0); /* bottom */
2378
2379     /* POC List */
2380     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2381         OUT_BCS_BATCH(batch, 0);
2382         OUT_BCS_BATCH(batch, 0);
2383     }
2384
2385     OUT_BCS_BATCH(batch, 0);
2386     OUT_BCS_BATCH(batch, 0);
2387
2388     ADVANCE_BCS_BATCH(batch);
2389 }
2390
2391 static void
2392 gen7_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2393                                      struct gen7_mfd_context *gen7_mfd_context)
2394 {
2395     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2396
2397     BEGIN_BCS_BATCH(batch, 11);
2398     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2399     OUT_BCS_RELOC(batch,
2400                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2401                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2402                   0);
2403     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2404     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2405     OUT_BCS_BATCH(batch, 0);
2406     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2407     OUT_BCS_BATCH(batch, 0);
2408     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2409     OUT_BCS_BATCH(batch, 0);
2410     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2411     OUT_BCS_BATCH(batch, 0);
2412     ADVANCE_BCS_BATCH(batch);
2413 }
2414
2415 static void
2416 gen7_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2417                             struct gen7_mfd_context *gen7_mfd_context)
2418 {
2419     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2420
2421     /* the input bitsteam format on GEN7 differs from GEN6 */
2422     BEGIN_BCS_BATCH(batch, 6);
2423     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2424     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2425     OUT_BCS_BATCH(batch, 0);
2426     OUT_BCS_BATCH(batch,
2427                   (0 << 31) |
2428                   (0 << 14) |
2429                   (0 << 12) |
2430                   (0 << 10) |
2431                   (0 << 8));
2432     OUT_BCS_BATCH(batch,
2433                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2434                   (0 << 5)  |
2435                   (0 << 4)  |
2436                   (1 << 3) | /* LastSlice Flag */
2437                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2438     OUT_BCS_BATCH(batch, 0);
2439     ADVANCE_BCS_BATCH(batch);
2440 }
2441
2442 static void
2443 gen7_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2444                              struct gen7_mfd_context *gen7_mfd_context)
2445 {
2446     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2447     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2448     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2449     int first_mb_in_slice = 0;
2450     int slice_type = SLICE_TYPE_I;
2451
2452     BEGIN_BCS_BATCH(batch, 11);
2453     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2454     OUT_BCS_BATCH(batch, slice_type);
2455     OUT_BCS_BATCH(batch, 
2456                   (num_ref_idx_l1 << 24) |
2457                   (num_ref_idx_l0 << 16) |
2458                   (0 << 8) |
2459                   (0 << 0));
2460     OUT_BCS_BATCH(batch, 
2461                   (0 << 29) |
2462                   (1 << 27) |   /* disable Deblocking */
2463                   (0 << 24) |
2464                   (gen7_jpeg_wa_clip.qp << 16) |
2465                   (0 << 8) |
2466                   (0 << 0));
2467     OUT_BCS_BATCH(batch, 
2468                   (slice_ver_pos << 24) |
2469                   (slice_hor_pos << 16) | 
2470                   (first_mb_in_slice << 0));
2471     OUT_BCS_BATCH(batch,
2472                   (next_slice_ver_pos << 16) |
2473                   (next_slice_hor_pos << 0));
2474     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2475     OUT_BCS_BATCH(batch, 0);
2476     OUT_BCS_BATCH(batch, 0);
2477     OUT_BCS_BATCH(batch, 0);
2478     OUT_BCS_BATCH(batch, 0);
2479     ADVANCE_BCS_BATCH(batch);
2480 }
2481
2482 static void
2483 gen7_mfd_jpeg_wa(VADriverContextP ctx,
2484                  struct gen7_mfd_context *gen7_mfd_context)
2485 {
2486     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2487     gen7_jpeg_wa_init(ctx, gen7_mfd_context);
2488     intel_batchbuffer_emit_mi_flush(batch);
2489     gen7_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2490     gen7_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2491     gen7_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2492     gen7_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2493     gen7_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2494     gen7_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2495     gen7_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2496
2497     gen7_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2498     gen7_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2499     gen7_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2500 }
2501
2502 void
2503 gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
2504                              struct decode_state *decode_state,
2505                              struct gen7_mfd_context *gen7_mfd_context)
2506 {
2507     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2508     VAPictureParameterBufferJPEGBaseline *pic_param;
2509     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2510     dri_bo *slice_data_bo;
2511     int i, j, max_selector = 0;
2512
2513     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2514     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2515
2516     /* Currently only support Baseline DCT */
2517     gen7_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2518     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2519     gen7_mfd_jpeg_wa(ctx, gen7_mfd_context);
2520     intel_batchbuffer_emit_mi_flush(batch);
2521     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2522     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2523     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2524     gen7_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2525     gen7_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2526
2527     for (j = 0; j < decode_state->num_slice_params; j++) {
2528         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2529         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2530         slice_data_bo = decode_state->slice_datas[j]->bo;
2531         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2532
2533         if (j == decode_state->num_slice_params - 1)
2534             next_slice_group_param = NULL;
2535         else
2536             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2537
2538         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2539             int component;
2540
2541             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2542
2543             if (i < decode_state->slice_params[j]->num_elements - 1)
2544                 next_slice_param = slice_param + 1;
2545             else
2546                 next_slice_param = next_slice_group_param;
2547
2548             for (component = 0; component < slice_param->num_components; component++) {
2549                 if (max_selector < slice_param->components[component].dc_table_selector)
2550                     max_selector = slice_param->components[component].dc_table_selector;
2551
2552                 if (max_selector < slice_param->components[component].ac_table_selector)
2553                     max_selector = slice_param->components[component].ac_table_selector;
2554             }
2555
2556             slice_param++;
2557         }
2558     }
2559
2560     assert(max_selector < 2);
2561     gen7_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2562
2563     for (j = 0; j < decode_state->num_slice_params; j++) {
2564         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2565         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2566         slice_data_bo = decode_state->slice_datas[j]->bo;
2567         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2568
2569         if (j == decode_state->num_slice_params - 1)
2570             next_slice_group_param = NULL;
2571         else
2572             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2573
2574         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2575             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2576
2577             if (i < decode_state->slice_params[j]->num_elements - 1)
2578                 next_slice_param = slice_param + 1;
2579             else
2580                 next_slice_param = next_slice_group_param;
2581
2582             gen7_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2583             slice_param++;
2584         }
2585     }
2586
2587     intel_batchbuffer_end_atomic(batch);
2588     intel_batchbuffer_flush(batch);
2589 }
2590
2591 static VAStatus
2592 gen7_mfd_decode_picture(VADriverContextP ctx, 
2593                         VAProfile profile, 
2594                         union codec_state *codec_state,
2595                         struct hw_context *hw_context)
2596
2597 {
2598     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2599     struct decode_state *decode_state = &codec_state->decode;
2600     VAStatus vaStatus;
2601
2602     assert(gen7_mfd_context);
2603
2604     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
2605
2606     if (vaStatus != VA_STATUS_SUCCESS)
2607         goto out;
2608
2609     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
2610
2611     switch (profile) {
2612     case VAProfileMPEG2Simple:
2613     case VAProfileMPEG2Main:
2614         gen7_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
2615         break;
2616         
2617     case VAProfileH264ConstrainedBaseline:
2618     case VAProfileH264Main:
2619     case VAProfileH264High:
2620         gen7_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
2621         break;
2622
2623     case VAProfileVC1Simple:
2624     case VAProfileVC1Main:
2625     case VAProfileVC1Advanced:
2626         gen7_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
2627         break;
2628
2629     case VAProfileJPEGBaseline:
2630         gen7_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
2631         break;
2632
2633     default:
2634         assert(0);
2635         break;
2636     }
2637
2638     vaStatus = VA_STATUS_SUCCESS;
2639
2640 out:
2641     return vaStatus;
2642 }
2643
2644 static void
2645 gen7_mfd_context_destroy(void *hw_context)
2646 {
2647     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2648
2649     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2650     gen7_mfd_context->post_deblocking_output.bo = NULL;
2651
2652     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2653     gen7_mfd_context->pre_deblocking_output.bo = NULL;
2654
2655     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2656     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2657
2658     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2659     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2660
2661     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2662     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2663
2664     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2665     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2666
2667     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
2668     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2669
2670     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
2671
2672     intel_batchbuffer_free(gen7_mfd_context->base.batch);
2673     free(gen7_mfd_context);
2674 }
2675
2676 static void gen7_mfd_mpeg2_context_init(VADriverContextP ctx,
2677                                     struct gen7_mfd_context *gen7_mfd_context)
2678 {
2679     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
2680     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
2681     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
2682     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
2683 }
2684
2685 struct hw_context *
2686 gen7_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
2687 {
2688     struct intel_driver_data *intel = intel_driver_data(ctx);
2689     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
2690     int i;
2691
2692     gen7_mfd_context->base.destroy = gen7_mfd_context_destroy;
2693     gen7_mfd_context->base.run = gen7_mfd_decode_picture;
2694     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
2695
2696     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
2697         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
2698         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
2699         gen7_mfd_context->reference_surface[i].obj_surface = NULL;
2700     }
2701
2702     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
2703     gen7_mfd_context->jpeg_wa_surface_object = NULL;
2704
2705     switch (obj_config->profile) {
2706     case VAProfileMPEG2Simple:
2707     case VAProfileMPEG2Main:
2708         gen7_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
2709         break;
2710
2711     case VAProfileH264ConstrainedBaseline:
2712     case VAProfileH264Main:
2713     case VAProfileH264High:
2714         gen7_mfd_avc_context_init(ctx, gen7_mfd_context);
2715         break;
2716     default:
2717         break;
2718     }
2719     return (struct hw_context *)gen7_mfd_context;
2720 }