Always set Fix_Prev_Mb_skipped in AVC_BSD_OBJECT command
[platform/upstream/libva-intel-driver.git] / src / gen7_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #ifndef HAVE_GEN_AVC_SURFACE
30 #define HAVE_GEN_AVC_SURFACE 1
31 #endif
32
33 #include "sysdeps.h"
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36 #include "i965_defines.h"
37 #include "i965_drv_video.h"
38 #include "i965_decoder_utils.h"
39
40 #include "gen7_mfd.h"
41
42 static const uint32_t zigzag_direct[64] = {
43     0,   1,  8, 16,  9,  2,  3, 10,
44     17, 24, 32, 25, 18, 11,  4,  5,
45     12, 19, 26, 33, 40, 48, 41, 34,
46     27, 20, 13,  6,  7, 14, 21, 28,
47     35, 42, 49, 56, 57, 50, 43, 36,
48     29, 22, 15, 23, 30, 37, 44, 51,
49     58, 59, 52, 45, 38, 31, 39, 46,
50     53, 60, 61, 54, 47, 55, 62, 63
51 };
52
53 static void
54 gen7_mfd_avc_frame_store_index(VADriverContextP ctx,
55                                VAPictureParameterBufferH264 *pic_param,
56                                struct gen7_mfd_context *gen7_mfd_context)
57 {
58     struct i965_driver_data *i965 = i965_driver_data(ctx);
59     int i, j;
60
61     assert(ARRAY_ELEMS(gen7_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
62
63     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
64         int found = 0;
65
66         if (gen7_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
67             continue;
68
69         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
70             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
71             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
72                 continue;
73
74             if (gen7_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
75                 found = 1;
76                 break;
77             }
78         }
79
80         if (!found) {
81             struct object_surface *obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
82             obj_surface->flags &= ~SURFACE_REFERENCED;
83
84             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
85                 dri_bo_unreference(obj_surface->bo);
86                 obj_surface->bo = NULL;
87                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
88             }
89
90             if (obj_surface->free_private_data)
91                 obj_surface->free_private_data(&obj_surface->private_data);
92
93             gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
94             gen7_mfd_context->reference_surface[i].frame_store_id = -1;
95         }
96     }
97
98     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
99         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
100         int found = 0;
101
102         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
103             continue;
104
105         for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
106             if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
107                 continue;
108             
109             if (gen7_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
110                 found = 1;
111                 break;
112             }
113         }
114
115         if (!found) {
116             int frame_idx;
117             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
118             
119             assert(obj_surface);
120             i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
121
122             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface); frame_idx++) {
123                 for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
124                     if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
125                         continue;
126
127                     if (gen7_mfd_context->reference_surface[j].frame_store_id == frame_idx)
128                         break;
129                 }
130
131                 if (j == ARRAY_ELEMS(gen7_mfd_context->reference_surface))
132                     break;
133             }
134
135             assert(frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface));
136
137             for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
138                 if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
139                     gen7_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
140                     gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
141                     break;
142                 }
143             }
144         }
145     }
146
147     /* sort */
148     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface) - 1; i++) {
149         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
150             gen7_mfd_context->reference_surface[i].frame_store_id == i)
151             continue;
152
153         for (j = i + 1; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
154             if (gen7_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
155                 gen7_mfd_context->reference_surface[j].frame_store_id == i) {
156                 VASurfaceID id = gen7_mfd_context->reference_surface[i].surface_id;
157                 int frame_idx = gen7_mfd_context->reference_surface[i].frame_store_id;
158
159                 gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[j].surface_id;
160                 gen7_mfd_context->reference_surface[i].frame_store_id = gen7_mfd_context->reference_surface[j].frame_store_id;
161                 gen7_mfd_context->reference_surface[j].surface_id = id;
162                 gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
163                 break;
164             }
165         }
166     }
167 }
168
169 static void
170 gen7_mfd_init_avc_surface(VADriverContextP ctx, 
171                           VAPictureParameterBufferH264 *pic_param,
172                           struct object_surface *obj_surface)
173 {
174     struct i965_driver_data *i965 = i965_driver_data(ctx);
175     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
176     int width_in_mbs, height_in_mbs;
177
178     obj_surface->free_private_data = gen_free_avc_surface;
179     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
180     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
181
182     if (!gen7_avc_surface) {
183         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
184         assert((obj_surface->size & 0x3f) == 0);
185         obj_surface->private_data = gen7_avc_surface;
186     }
187
188     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
189                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
190
191     if (gen7_avc_surface->dmv_top == NULL) {
192         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
193                                                  "direct mv w/r buffer",
194                                                  width_in_mbs * height_in_mbs * 64,
195                                                  0x1000);
196         assert(gen7_avc_surface->dmv_top);
197     }
198
199     if (gen7_avc_surface->dmv_bottom_flag &&
200         gen7_avc_surface->dmv_bottom == NULL) {
201         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
202                                                     "direct mv w/r buffer",
203                                                     width_in_mbs * height_in_mbs * 64,                                                    
204                                                     0x1000);
205         assert(gen7_avc_surface->dmv_bottom);
206     }
207 }
208
209 static void
210 gen7_mfd_pipe_mode_select(VADriverContextP ctx,
211                           struct decode_state *decode_state,
212                           int standard_select,
213                           struct gen7_mfd_context *gen7_mfd_context)
214 {
215     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
216
217     assert(standard_select == MFX_FORMAT_MPEG2 ||
218            standard_select == MFX_FORMAT_AVC ||
219            standard_select == MFX_FORMAT_VC1 ||
220            standard_select == MFX_FORMAT_JPEG);
221
222     BEGIN_BCS_BATCH(batch, 5);
223     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
224     OUT_BCS_BATCH(batch,
225                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
226                   (MFD_MODE_VLD << 15) | /* VLD mode */
227                   (0 << 10) | /* disable Stream-Out */
228                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
229                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
230                   (0 << 5)  | /* not in stitch mode */
231                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
232                   (standard_select << 0));
233     OUT_BCS_BATCH(batch,
234                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
235                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
236                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
237                   (0 << 1)  |
238                   (0 << 0));
239     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
240     OUT_BCS_BATCH(batch, 0); /* reserved */
241     ADVANCE_BCS_BATCH(batch);
242 }
243
244 static void
245 gen7_mfd_surface_state(VADriverContextP ctx,
246                        struct decode_state *decode_state,
247                        int standard_select,
248                        struct gen7_mfd_context *gen7_mfd_context)
249 {
250     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
251     struct i965_driver_data *i965 = i965_driver_data(ctx);
252     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
253     unsigned int y_cb_offset;
254     unsigned int y_cr_offset;
255
256     assert(obj_surface);
257
258     y_cb_offset = obj_surface->y_cb_offset;
259     y_cr_offset = obj_surface->y_cr_offset;
260
261     BEGIN_BCS_BATCH(batch, 6);
262     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
263     OUT_BCS_BATCH(batch, 0);
264     OUT_BCS_BATCH(batch,
265                   ((obj_surface->orig_height - 1) << 18) |
266                   ((obj_surface->orig_width - 1) << 4));
267     OUT_BCS_BATCH(batch,
268                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
269                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
270                   (0 << 22) | /* surface object control state, ignored */
271                   ((obj_surface->width - 1) << 3) | /* pitch */
272                   (0 << 2)  | /* must be 0 */
273                   (1 << 1)  | /* must be tiled */
274                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
275     OUT_BCS_BATCH(batch,
276                   (0 << 16) | /* X offset for U(Cb), must be 0 */
277                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
278     OUT_BCS_BATCH(batch,
279                   (0 << 16) | /* X offset for V(Cr), must be 0 */
280                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
281     ADVANCE_BCS_BATCH(batch);
282 }
283
284 static void
285 gen7_mfd_pipe_buf_addr_state(VADriverContextP ctx,
286                              struct decode_state *decode_state,
287                              int standard_select,
288                              struct gen7_mfd_context *gen7_mfd_context)
289 {
290     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
291     struct i965_driver_data *i965 = i965_driver_data(ctx);
292     int i;
293
294     BEGIN_BCS_BATCH(batch, 24);
295     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
296     if (gen7_mfd_context->pre_deblocking_output.valid)
297         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
298                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
299                       0);
300     else
301         OUT_BCS_BATCH(batch, 0);
302
303     if (gen7_mfd_context->post_deblocking_output.valid)
304         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
305                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
306                       0);
307     else
308         OUT_BCS_BATCH(batch, 0);
309
310     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
311     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
312
313     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
314         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
315                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
316                       0);
317     else
318         OUT_BCS_BATCH(batch, 0);
319
320     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
321         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
322                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
323                       0);
324     else
325         OUT_BCS_BATCH(batch, 0);
326
327     /* DW 7..22 */
328     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
329         struct object_surface *obj_surface;
330
331         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
332             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
333             assert(obj_surface && obj_surface->bo);
334
335             OUT_BCS_RELOC(batch, obj_surface->bo,
336                           I915_GEM_DOMAIN_INSTRUCTION, 0,
337                           0);
338         } else {
339             OUT_BCS_BATCH(batch, 0);
340         }
341     }
342
343     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
344     ADVANCE_BCS_BATCH(batch);
345 }
346
347 static void
348 gen7_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
349                                  dri_bo *slice_data_bo,
350                                  int standard_select,
351                                  struct gen7_mfd_context *gen7_mfd_context)
352 {
353     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
354
355     BEGIN_BCS_BATCH(batch, 11);
356     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
357     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
358     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
359     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
360     OUT_BCS_BATCH(batch, 0);
361     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
362     OUT_BCS_BATCH(batch, 0);
363     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
364     OUT_BCS_BATCH(batch, 0);
365     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
366     OUT_BCS_BATCH(batch, 0);
367     ADVANCE_BCS_BATCH(batch);
368 }
369
370 static void
371 gen7_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
372                                  struct decode_state *decode_state,
373                                  int standard_select,
374                                  struct gen7_mfd_context *gen7_mfd_context)
375 {
376     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
377
378     BEGIN_BCS_BATCH(batch, 4);
379     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
380
381     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
382         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
383                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
384                       0);
385     else
386         OUT_BCS_BATCH(batch, 0);
387
388     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
389         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
390                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
391                       0);
392     else
393         OUT_BCS_BATCH(batch, 0);
394
395     if (gen7_mfd_context->bitplane_read_buffer.valid)
396         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
397                       I915_GEM_DOMAIN_INSTRUCTION, 0,
398                       0);
399     else
400         OUT_BCS_BATCH(batch, 0);
401
402     ADVANCE_BCS_BATCH(batch);
403 }
404
405 static void
406 gen7_mfd_qm_state(VADriverContextP ctx,
407                   int qm_type,
408                   unsigned char *qm,
409                   int qm_length,
410                   struct gen7_mfd_context *gen7_mfd_context)
411 {
412     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
413     unsigned int qm_buffer[16];
414
415     assert(qm_length <= 16 * 4);
416     memcpy(qm_buffer, qm, qm_length);
417
418     BEGIN_BCS_BATCH(batch, 18);
419     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
420     OUT_BCS_BATCH(batch, qm_type << 0);
421     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
422     ADVANCE_BCS_BATCH(batch);
423 }
424
425 static void
426 gen7_mfd_avc_img_state(VADriverContextP ctx,
427                        struct decode_state *decode_state,
428                        struct gen7_mfd_context *gen7_mfd_context)
429 {
430     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
431     int img_struct;
432     int mbaff_frame_flag;
433     unsigned int width_in_mbs, height_in_mbs;
434     VAPictureParameterBufferH264 *pic_param;
435
436     assert(decode_state->pic_param && decode_state->pic_param->buffer);
437     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
438     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
439
440     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
441         img_struct = 1;
442     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
443         img_struct = 3;
444     else
445         img_struct = 0;
446
447     if ((img_struct & 0x1) == 0x1) {
448         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
449     } else {
450         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
451     }
452
453     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
454         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
455         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
456     } else {
457         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
458     }
459
460     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
461                         !pic_param->pic_fields.bits.field_pic_flag);
462
463     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
464     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
465
466     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
467     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
468            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
469     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
470
471     BEGIN_BCS_BATCH(batch, 16);
472     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
473     OUT_BCS_BATCH(batch, 
474                   width_in_mbs * height_in_mbs);
475     OUT_BCS_BATCH(batch, 
476                   ((height_in_mbs - 1) << 16) | 
477                   ((width_in_mbs - 1) << 0));
478     OUT_BCS_BATCH(batch, 
479                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
480                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
481                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
482                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
483                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
484                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
485                   (img_struct << 8));
486     OUT_BCS_BATCH(batch,
487                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
488                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
489                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
490                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
491                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
492                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
493                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
494                   (mbaff_frame_flag << 1) |
495                   (pic_param->pic_fields.bits.field_pic_flag << 0));
496     OUT_BCS_BATCH(batch, 0);
497     OUT_BCS_BATCH(batch, 0);
498     OUT_BCS_BATCH(batch, 0);
499     OUT_BCS_BATCH(batch, 0);
500     OUT_BCS_BATCH(batch, 0);
501     OUT_BCS_BATCH(batch, 0);
502     OUT_BCS_BATCH(batch, 0);
503     OUT_BCS_BATCH(batch, 0);
504     OUT_BCS_BATCH(batch, 0);
505     OUT_BCS_BATCH(batch, 0);
506     OUT_BCS_BATCH(batch, 0);
507     ADVANCE_BCS_BATCH(batch);
508 }
509
510 static void
511 gen7_mfd_avc_qm_state(VADriverContextP ctx,
512                       struct decode_state *decode_state,
513                       struct gen7_mfd_context *gen7_mfd_context)
514 {
515     VAIQMatrixBufferH264 *iq_matrix;
516     VAPictureParameterBufferH264 *pic_param;
517
518     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
519         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
520     else
521         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
522
523     assert(decode_state->pic_param && decode_state->pic_param->buffer);
524     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
525
526     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
527     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
528
529     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
530         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
531         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
532     }
533 }
534
535 static void
536 gen7_mfd_avc_directmode_state(VADriverContextP ctx,
537                               VAPictureParameterBufferH264 *pic_param,
538                               VASliceParameterBufferH264 *slice_param,
539                               struct gen7_mfd_context *gen7_mfd_context)
540 {
541     struct i965_driver_data *i965 = i965_driver_data(ctx);
542     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
543     struct object_surface *obj_surface;
544     GenAvcSurface *gen7_avc_surface;
545     VAPictureH264 *va_pic;
546     int i, j;
547
548     BEGIN_BCS_BATCH(batch, 69);
549     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
550
551     /* reference surfaces 0..15 */
552     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
553         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
554             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
555             assert(obj_surface);
556             gen7_avc_surface = obj_surface->private_data;
557
558             if (gen7_avc_surface == NULL) {
559                 OUT_BCS_BATCH(batch, 0);
560                 OUT_BCS_BATCH(batch, 0);
561             } else {
562                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
563                               I915_GEM_DOMAIN_INSTRUCTION, 0,
564                               0);
565
566                 if (gen7_avc_surface->dmv_bottom_flag == 1)
567                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
568                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
569                                   0);
570                 else
571                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
572                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
573                                   0);
574             }
575         } else {
576             OUT_BCS_BATCH(batch, 0);
577             OUT_BCS_BATCH(batch, 0);
578         }
579     }
580
581     /* the current decoding frame/field */
582     va_pic = &pic_param->CurrPic;
583     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
584     obj_surface = SURFACE(va_pic->picture_id);
585     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
586     gen7_avc_surface = obj_surface->private_data;
587
588     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
589                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
590                   0);
591
592     if (gen7_avc_surface->dmv_bottom_flag == 1)
593         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
594                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
595                       0);
596     else
597         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
598                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
599                       0);
600
601     /* POC List */
602     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
603         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
604             int found = 0;
605             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
606                 va_pic = &pic_param->ReferenceFrames[j];
607                 
608                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
609                     continue;
610
611                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
612                     found = 1;
613                     break;
614                 }
615             }
616
617             assert(found == 1);
618             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
619             
620             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
621             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
622         } else {
623             OUT_BCS_BATCH(batch, 0);
624             OUT_BCS_BATCH(batch, 0);
625         }
626     }
627
628     va_pic = &pic_param->CurrPic;
629     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
630     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
631
632     ADVANCE_BCS_BATCH(batch);
633 }
634
635 static void
636 gen7_mfd_avc_slice_state(VADriverContextP ctx,
637                          VAPictureParameterBufferH264 *pic_param,
638                          VASliceParameterBufferH264 *slice_param,
639                          VASliceParameterBufferH264 *next_slice_param,
640                          struct gen7_mfd_context *gen7_mfd_context)
641 {
642     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
643     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
644     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
645     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
646     int num_ref_idx_l0, num_ref_idx_l1;
647     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
648                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
649     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
650     int slice_type;
651
652     if (slice_param->slice_type == SLICE_TYPE_I ||
653         slice_param->slice_type == SLICE_TYPE_SI) {
654         slice_type = SLICE_TYPE_I;
655     } else if (slice_param->slice_type == SLICE_TYPE_P ||
656                slice_param->slice_type == SLICE_TYPE_SP) {
657         slice_type = SLICE_TYPE_P;
658     } else { 
659         assert(slice_param->slice_type == SLICE_TYPE_B);
660         slice_type = SLICE_TYPE_B;
661     }
662
663     if (slice_type == SLICE_TYPE_I) {
664         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
665         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
666         num_ref_idx_l0 = 0;
667         num_ref_idx_l1 = 0;
668     } else if (slice_type == SLICE_TYPE_P) {
669         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
670         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
671         num_ref_idx_l1 = 0;
672     } else {
673         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
674         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
675     }
676
677     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
678     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
679     slice_ver_pos = first_mb_in_slice / width_in_mbs;
680
681     if (next_slice_param) {
682         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
683         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
684         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
685     } else {
686         next_slice_hor_pos = 0;
687         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
688     }
689
690     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
691     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
692     OUT_BCS_BATCH(batch, slice_type);
693     OUT_BCS_BATCH(batch, 
694                   (num_ref_idx_l1 << 24) |
695                   (num_ref_idx_l0 << 16) |
696                   (slice_param->chroma_log2_weight_denom << 8) |
697                   (slice_param->luma_log2_weight_denom << 0));
698     OUT_BCS_BATCH(batch, 
699                   (slice_param->direct_spatial_mv_pred_flag << 29) |
700                   (slice_param->disable_deblocking_filter_idc << 27) |
701                   (slice_param->cabac_init_idc << 24) |
702                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
703                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
704                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
705     OUT_BCS_BATCH(batch, 
706                   (slice_ver_pos << 24) |
707                   (slice_hor_pos << 16) | 
708                   (first_mb_in_slice << 0));
709     OUT_BCS_BATCH(batch,
710                   (next_slice_ver_pos << 16) |
711                   (next_slice_hor_pos << 0));
712     OUT_BCS_BATCH(batch, 
713                   (next_slice_param == NULL) << 19); /* last slice flag */
714     OUT_BCS_BATCH(batch, 0);
715     OUT_BCS_BATCH(batch, 0);
716     OUT_BCS_BATCH(batch, 0);
717     OUT_BCS_BATCH(batch, 0);
718     ADVANCE_BCS_BATCH(batch);
719 }
720
721 static inline void
722 gen7_mfd_avc_ref_idx_state(VADriverContextP ctx,
723                            VAPictureParameterBufferH264 *pic_param,
724                            VASliceParameterBufferH264 *slice_param,
725                            struct gen7_mfd_context *gen7_mfd_context)
726 {
727     gen6_send_avc_ref_idx_state(
728         gen7_mfd_context->base.batch,
729         slice_param,
730         gen7_mfd_context->reference_surface
731     );
732 }
733
734 static void
735 gen7_mfd_avc_weightoffset_state(VADriverContextP ctx,
736                                 VAPictureParameterBufferH264 *pic_param,
737                                 VASliceParameterBufferH264 *slice_param,
738                                 struct gen7_mfd_context *gen7_mfd_context)
739 {
740     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
741     int i, j, num_weight_offset_table = 0;
742     short weightoffsets[32 * 6];
743
744     if ((slice_param->slice_type == SLICE_TYPE_P ||
745          slice_param->slice_type == SLICE_TYPE_SP) &&
746         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
747         num_weight_offset_table = 1;
748     }
749     
750     if ((slice_param->slice_type == SLICE_TYPE_B) &&
751         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
752         num_weight_offset_table = 2;
753     }
754
755     for (i = 0; i < num_weight_offset_table; i++) {
756         BEGIN_BCS_BATCH(batch, 98);
757         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
758         OUT_BCS_BATCH(batch, i);
759
760         if (i == 0) {
761             for (j = 0; j < 32; j++) {
762                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
763                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
764                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
765                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
766                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
767                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
768             }
769         } else {
770             for (j = 0; j < 32; j++) {
771                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
772                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
773                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
774                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
775                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
776                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
777             }
778         }
779
780         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
781         ADVANCE_BCS_BATCH(batch);
782     }
783 }
784
785 static void
786 gen7_mfd_avc_bsd_object(VADriverContextP ctx,
787                         VAPictureParameterBufferH264 *pic_param,
788                         VASliceParameterBufferH264 *slice_param,
789                         dri_bo *slice_data_bo,
790                         VASliceParameterBufferH264 *next_slice_param,
791                         struct gen7_mfd_context *gen7_mfd_context)
792 {
793     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
794     unsigned int slice_data_bit_offset;
795
796     slice_data_bit_offset = avc_get_first_mb_bit_offset(
797         slice_data_bo,
798         slice_param,
799         pic_param->pic_fields.bits.entropy_coding_mode_flag
800     );
801
802     /* the input bitsteam format on GEN7 differs from GEN6 */
803     BEGIN_BCS_BATCH(batch, 6);
804     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
805     OUT_BCS_BATCH(batch, 
806                   (slice_param->slice_data_size - slice_param->slice_data_offset));
807     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
808     OUT_BCS_BATCH(batch,
809                   (0 << 31) |
810                   (0 << 14) |
811                   (0 << 12) |
812                   (0 << 10) |
813                   (0 << 8));
814     OUT_BCS_BATCH(batch,
815                   ((slice_data_bit_offset >> 3) << 16) |
816                   (1 << 7)  |
817                   (0 << 5)  |
818                   (0 << 4)  |
819                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
820                   (slice_data_bit_offset & 0x7));
821     OUT_BCS_BATCH(batch, 0);
822     ADVANCE_BCS_BATCH(batch);
823 }
824
825 static inline void
826 gen7_mfd_avc_context_init(
827     VADriverContextP         ctx,
828     struct gen7_mfd_context *gen7_mfd_context
829 )
830 {
831     /* Initialize flat scaling lists */
832     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
833 }
834
835 static void
836 gen7_mfd_avc_decode_init(VADriverContextP ctx,
837                          struct decode_state *decode_state,
838                          struct gen7_mfd_context *gen7_mfd_context)
839 {
840     VAPictureParameterBufferH264 *pic_param;
841     VASliceParameterBufferH264 *slice_param;
842     VAPictureH264 *va_pic;
843     struct i965_driver_data *i965 = i965_driver_data(ctx);
844     struct object_surface *obj_surface;
845     dri_bo *bo;
846     int i, j, enable_avc_ildb = 0;
847     unsigned int width_in_mbs, height_in_mbs;
848
849     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
850         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
851         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
852
853         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
854             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
855             assert((slice_param->slice_type == SLICE_TYPE_I) ||
856                    (slice_param->slice_type == SLICE_TYPE_SI) ||
857                    (slice_param->slice_type == SLICE_TYPE_P) ||
858                    (slice_param->slice_type == SLICE_TYPE_SP) ||
859                    (slice_param->slice_type == SLICE_TYPE_B));
860
861             if (slice_param->disable_deblocking_filter_idc != 1) {
862                 enable_avc_ildb = 1;
863                 break;
864             }
865
866             slice_param++;
867         }
868     }
869
870     assert(decode_state->pic_param && decode_state->pic_param->buffer);
871     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
872     gen7_mfd_avc_frame_store_index(ctx, pic_param, gen7_mfd_context);
873     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
874     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
875     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
876     assert(height_in_mbs > 0 && height_in_mbs <= 256);
877
878     /* Current decoded picture */
879     va_pic = &pic_param->CurrPic;
880     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
881     obj_surface = SURFACE(va_pic->picture_id);
882     assert(obj_surface);
883     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
884     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
885     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
886     gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface);
887
888     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
889     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
890     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
891     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
892
893     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
894     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
895     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
896     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
897
898     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
899     bo = dri_bo_alloc(i965->intel.bufmgr,
900                       "intra row store",
901                       width_in_mbs * 64,
902                       0x1000);
903     assert(bo);
904     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
905     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
906
907     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
908     bo = dri_bo_alloc(i965->intel.bufmgr,
909                       "deblocking filter row store",
910                       width_in_mbs * 64 * 4,
911                       0x1000);
912     assert(bo);
913     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
914     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
915
916     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
917     bo = dri_bo_alloc(i965->intel.bufmgr,
918                       "bsd mpc row store",
919                       width_in_mbs * 64 * 2,
920                       0x1000);
921     assert(bo);
922     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
923     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
924
925     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
926     bo = dri_bo_alloc(i965->intel.bufmgr,
927                       "mpr row store",
928                       width_in_mbs * 64 * 2,
929                       0x1000);
930     assert(bo);
931     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
932     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
933
934     gen7_mfd_context->bitplane_read_buffer.valid = 0;
935 }
936
937 static void
938 gen7_mfd_avc_decode_picture(VADriverContextP ctx,
939                             struct decode_state *decode_state,
940                             struct gen7_mfd_context *gen7_mfd_context)
941 {
942     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
943     VAPictureParameterBufferH264 *pic_param;
944     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
945     dri_bo *slice_data_bo;
946     int i, j;
947
948     assert(decode_state->pic_param && decode_state->pic_param->buffer);
949     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
950     gen7_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
951
952     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
953     intel_batchbuffer_emit_mi_flush(batch);
954     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
955     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
956     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
957     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
958     gen7_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
959     gen7_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
960
961     for (j = 0; j < decode_state->num_slice_params; j++) {
962         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
963         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
964         slice_data_bo = decode_state->slice_datas[j]->bo;
965         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
966
967         if (j == decode_state->num_slice_params - 1)
968             next_slice_group_param = NULL;
969         else
970             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
971
972         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
973             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
974             assert((slice_param->slice_type == SLICE_TYPE_I) ||
975                    (slice_param->slice_type == SLICE_TYPE_SI) ||
976                    (slice_param->slice_type == SLICE_TYPE_P) ||
977                    (slice_param->slice_type == SLICE_TYPE_SP) ||
978                    (slice_param->slice_type == SLICE_TYPE_B));
979
980             if (i < decode_state->slice_params[j]->num_elements - 1)
981                 next_slice_param = slice_param + 1;
982             else
983                 next_slice_param = next_slice_group_param;
984
985             gen7_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen7_mfd_context);
986             gen7_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
987             gen7_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
988             gen7_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
989             gen7_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
990             slice_param++;
991         }
992     }
993
994     intel_batchbuffer_end_atomic(batch);
995     intel_batchbuffer_flush(batch);
996 }
997
998 static void
999 gen7_mfd_mpeg2_decode_init(VADriverContextP ctx,
1000                            struct decode_state *decode_state,
1001                            struct gen7_mfd_context *gen7_mfd_context)
1002 {
1003     VAPictureParameterBufferMPEG2 *pic_param;
1004     struct i965_driver_data *i965 = i965_driver_data(ctx);
1005     struct object_surface *obj_surface;
1006     dri_bo *bo;
1007     unsigned int width_in_mbs;
1008
1009     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1010     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1011     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1012
1013     mpeg2_set_reference_surfaces(
1014         ctx,
1015         gen7_mfd_context->reference_surface,
1016         decode_state,
1017         pic_param
1018     );
1019
1020     /* Current decoded picture */
1021     obj_surface = SURFACE(decode_state->current_render_target);
1022     assert(obj_surface);
1023     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1024
1025     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1026     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1027     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1028     gen7_mfd_context->pre_deblocking_output.valid = 1;
1029
1030     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1031     bo = dri_bo_alloc(i965->intel.bufmgr,
1032                       "bsd mpc row store",
1033                       width_in_mbs * 96,
1034                       0x1000);
1035     assert(bo);
1036     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1037     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1038
1039     gen7_mfd_context->post_deblocking_output.valid = 0;
1040     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1041     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1042     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1043     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1044 }
1045
1046 static void
1047 gen7_mfd_mpeg2_pic_state(VADriverContextP ctx,
1048                          struct decode_state *decode_state,
1049                          struct gen7_mfd_context *gen7_mfd_context)
1050 {
1051     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1052     VAPictureParameterBufferMPEG2 *pic_param;
1053
1054     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1055     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1056
1057     BEGIN_BCS_BATCH(batch, 13);
1058     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1059     OUT_BCS_BATCH(batch,
1060                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1061                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1062                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1063                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1064                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1065                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1066                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1067                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1068                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1069                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1070                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1071                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1072     OUT_BCS_BATCH(batch,
1073                   pic_param->picture_coding_type << 9);
1074     OUT_BCS_BATCH(batch,
1075                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1076                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1077     OUT_BCS_BATCH(batch, 0);
1078     OUT_BCS_BATCH(batch, 0);
1079     OUT_BCS_BATCH(batch, 0);
1080     OUT_BCS_BATCH(batch, 0);
1081     OUT_BCS_BATCH(batch, 0);
1082     OUT_BCS_BATCH(batch, 0);
1083     OUT_BCS_BATCH(batch, 0);
1084     OUT_BCS_BATCH(batch, 0);
1085     OUT_BCS_BATCH(batch, 0);
1086     ADVANCE_BCS_BATCH(batch);
1087 }
1088
1089 static void
1090 gen7_mfd_mpeg2_qm_state(VADriverContextP ctx,
1091                         struct decode_state *decode_state,
1092                         struct gen7_mfd_context *gen7_mfd_context)
1093 {
1094     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1095     int i, j;
1096
1097     /* Update internal QM state */
1098     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1099         VAIQMatrixBufferMPEG2 * const iq_matrix =
1100             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1101
1102         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1103             iq_matrix->load_intra_quantiser_matrix) {
1104             gen_iq_matrix->load_intra_quantiser_matrix =
1105                 iq_matrix->load_intra_quantiser_matrix;
1106             if (iq_matrix->load_intra_quantiser_matrix) {
1107                 for (j = 0; j < 64; j++)
1108                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1109                         iq_matrix->intra_quantiser_matrix[j];
1110             }
1111         }
1112
1113         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1114             iq_matrix->load_non_intra_quantiser_matrix) {
1115             gen_iq_matrix->load_non_intra_quantiser_matrix =
1116                 iq_matrix->load_non_intra_quantiser_matrix;
1117             if (iq_matrix->load_non_intra_quantiser_matrix) {
1118                 for (j = 0; j < 64; j++)
1119                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1120                         iq_matrix->non_intra_quantiser_matrix[j];
1121             }
1122         }
1123     }
1124
1125     /* Commit QM state to HW */
1126     for (i = 0; i < 2; i++) {
1127         unsigned char *qm = NULL;
1128         int qm_type;
1129
1130         if (i == 0) {
1131             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1132                 qm = gen_iq_matrix->intra_quantiser_matrix;
1133                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1134             }
1135         } else {
1136             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1137                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1138                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1139             }
1140         }
1141
1142         if (!qm)
1143             continue;
1144
1145         gen7_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1146     }
1147 }
1148
1149 static void
1150 gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1151                           VAPictureParameterBufferMPEG2 *pic_param,
1152                           VASliceParameterBufferMPEG2 *slice_param,
1153                           VASliceParameterBufferMPEG2 *next_slice_param,
1154                           struct gen7_mfd_context *gen7_mfd_context)
1155 {
1156     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1157     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1158     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1159
1160     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1161         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1162         is_field_pic = 1;
1163     is_field_pic_wa = is_field_pic &&
1164         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1165
1166     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1167     hpos0 = slice_param->slice_horizontal_position;
1168
1169     if (next_slice_param == NULL) {
1170         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1171         hpos1 = 0;
1172     } else {
1173         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1174         hpos1 = next_slice_param->slice_horizontal_position;
1175     }
1176
1177     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1178
1179     BEGIN_BCS_BATCH(batch, 5);
1180     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1181     OUT_BCS_BATCH(batch, 
1182                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1183     OUT_BCS_BATCH(batch, 
1184                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1185     OUT_BCS_BATCH(batch,
1186                   hpos0 << 24 |
1187                   vpos0 << 16 |
1188                   mb_count << 8 |
1189                   (next_slice_param == NULL) << 5 |
1190                   (next_slice_param == NULL) << 3 |
1191                   (slice_param->macroblock_offset & 0x7));
1192     OUT_BCS_BATCH(batch,
1193                   slice_param->quantiser_scale_code << 24);
1194     ADVANCE_BCS_BATCH(batch);
1195 }
1196
1197 static void
1198 gen7_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1199                               struct decode_state *decode_state,
1200                               struct gen7_mfd_context *gen7_mfd_context)
1201 {
1202     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1203     VAPictureParameterBufferMPEG2 *pic_param;
1204     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1205     dri_bo *slice_data_bo;
1206     int i, j;
1207
1208     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1209     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1210
1211     gen7_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1212     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1213     intel_batchbuffer_emit_mi_flush(batch);
1214     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1215     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1216     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1217     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1218     gen7_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1219     gen7_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1220
1221     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1222         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1223             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1224
1225     for (j = 0; j < decode_state->num_slice_params; j++) {
1226         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1227         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1228         slice_data_bo = decode_state->slice_datas[j]->bo;
1229         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1230
1231         if (j == decode_state->num_slice_params - 1)
1232             next_slice_group_param = NULL;
1233         else
1234             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1235
1236         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1237             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1238
1239             if (i < decode_state->slice_params[j]->num_elements - 1)
1240                 next_slice_param = slice_param + 1;
1241             else
1242                 next_slice_param = next_slice_group_param;
1243
1244             gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1245             slice_param++;
1246         }
1247     }
1248
1249     intel_batchbuffer_end_atomic(batch);
1250     intel_batchbuffer_flush(batch);
1251 }
1252
1253 static const int va_to_gen7_vc1_pic_type[5] = {
1254     GEN7_VC1_I_PICTURE,
1255     GEN7_VC1_P_PICTURE,
1256     GEN7_VC1_B_PICTURE,
1257     GEN7_VC1_BI_PICTURE,
1258     GEN7_VC1_P_PICTURE,
1259 };
1260
1261 static const int va_to_gen7_vc1_mv[4] = {
1262     1, /* 1-MV */
1263     2, /* 1-MV half-pel */
1264     3, /* 1-MV half-pef bilinear */
1265     0, /* Mixed MV */
1266 };
1267
1268 static const int b_picture_scale_factor[21] = {
1269     128, 85,  170, 64,  192,
1270     51,  102, 153, 204, 43,
1271     215, 37,  74,  111, 148,
1272     185, 222, 32,  96,  160, 
1273     224,
1274 };
1275
1276 static const int va_to_gen7_vc1_condover[3] = {
1277     0,
1278     2,
1279     3
1280 };
1281
1282 static const int va_to_gen7_vc1_profile[4] = {
1283     GEN7_VC1_SIMPLE_PROFILE,
1284     GEN7_VC1_MAIN_PROFILE,
1285     GEN7_VC1_RESERVED_PROFILE,
1286     GEN7_VC1_ADVANCED_PROFILE
1287 };
1288
1289 static void 
1290 gen7_mfd_free_vc1_surface(void **data)
1291 {
1292     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1293
1294     if (!gen7_vc1_surface)
1295         return;
1296
1297     dri_bo_unreference(gen7_vc1_surface->dmv);
1298     free(gen7_vc1_surface);
1299     *data = NULL;
1300 }
1301
1302 static void
1303 gen7_mfd_init_vc1_surface(VADriverContextP ctx, 
1304                           VAPictureParameterBufferVC1 *pic_param,
1305                           struct object_surface *obj_surface)
1306 {
1307     struct i965_driver_data *i965 = i965_driver_data(ctx);
1308     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1309     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1310     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1311
1312     obj_surface->free_private_data = gen7_mfd_free_vc1_surface;
1313
1314     if (!gen7_vc1_surface) {
1315         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1316         assert((obj_surface->size & 0x3f) == 0);
1317         obj_surface->private_data = gen7_vc1_surface;
1318     }
1319
1320     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1321
1322     if (gen7_vc1_surface->dmv == NULL) {
1323         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1324                                              "direct mv w/r buffer",
1325                                              width_in_mbs * height_in_mbs * 64,
1326                                              0x1000);
1327     }
1328 }
1329
1330 static void
1331 gen7_mfd_vc1_decode_init(VADriverContextP ctx,
1332                          struct decode_state *decode_state,
1333                          struct gen7_mfd_context *gen7_mfd_context)
1334 {
1335     VAPictureParameterBufferVC1 *pic_param;
1336     struct i965_driver_data *i965 = i965_driver_data(ctx);
1337     struct object_surface *obj_surface;
1338     int i;
1339     dri_bo *bo;
1340     int width_in_mbs;
1341
1342     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1343     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1344     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1345
1346     /* reference picture */
1347     obj_surface = SURFACE(pic_param->forward_reference_picture);
1348
1349     if (obj_surface && obj_surface->bo)
1350         gen7_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1351     else
1352         gen7_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1353
1354     obj_surface = SURFACE(pic_param->backward_reference_picture);
1355
1356     if (obj_surface && obj_surface->bo)
1357         gen7_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1358     else
1359         gen7_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1360
1361     /* must do so !!! */
1362     for (i = 2; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++)
1363         gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[i % 2].surface_id;
1364
1365     /* Current decoded picture */
1366     obj_surface = SURFACE(decode_state->current_render_target);
1367     assert(obj_surface);
1368     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1369     gen7_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1370
1371     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1372     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1373     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1374     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1375
1376     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1377     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1378     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1379     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1380
1381     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1382     bo = dri_bo_alloc(i965->intel.bufmgr,
1383                       "intra row store",
1384                       width_in_mbs * 64,
1385                       0x1000);
1386     assert(bo);
1387     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1388     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1389
1390     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1391     bo = dri_bo_alloc(i965->intel.bufmgr,
1392                       "deblocking filter row store",
1393                       width_in_mbs * 6 * 64,
1394                       0x1000);
1395     assert(bo);
1396     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1397     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1398
1399     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1400     bo = dri_bo_alloc(i965->intel.bufmgr,
1401                       "bsd mpc row store",
1402                       width_in_mbs * 96,
1403                       0x1000);
1404     assert(bo);
1405     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1406     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1407
1408     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1409
1410     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1411     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1412     
1413     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1414         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1415         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1416         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1417         int src_w, src_h;
1418         uint8_t *src = NULL, *dst = NULL;
1419
1420         assert(decode_state->bit_plane->buffer);
1421         src = decode_state->bit_plane->buffer;
1422
1423         bo = dri_bo_alloc(i965->intel.bufmgr,
1424                           "VC-1 Bitplane",
1425                           bitplane_width * height_in_mbs,
1426                           0x1000);
1427         assert(bo);
1428         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1429
1430         dri_bo_map(bo, True);
1431         assert(bo->virtual);
1432         dst = bo->virtual;
1433
1434         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1435             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1436                 int src_index, dst_index;
1437                 int src_shift;
1438                 uint8_t src_value;
1439
1440                 src_index = (src_h * width_in_mbs + src_w) / 2;
1441                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1442                 src_value = ((src[src_index] >> src_shift) & 0xf);
1443
1444                 dst_index = src_w / 2;
1445                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1446             }
1447
1448             if (src_w & 1)
1449                 dst[src_w / 2] >>= 4;
1450
1451             dst += bitplane_width;
1452         }
1453
1454         dri_bo_unmap(bo);
1455     } else
1456         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1457 }
1458
1459 static void
1460 gen7_mfd_vc1_pic_state(VADriverContextP ctx,
1461                        struct decode_state *decode_state,
1462                        struct gen7_mfd_context *gen7_mfd_context)
1463 {
1464     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1465     VAPictureParameterBufferVC1 *pic_param;
1466     struct i965_driver_data *i965 = i965_driver_data(ctx);
1467     struct object_surface *obj_surface;
1468     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1469     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1470     int unified_mv_mode;
1471     int ref_field_pic_polarity = 0;
1472     int scale_factor = 0;
1473     int trans_ac_y = 0;
1474     int dmv_surface_valid = 0;
1475     int brfd = 0;
1476     int fcm = 0;
1477     int picture_type;
1478     int profile;
1479     int overlap;
1480     int interpolation_mode = 0;
1481
1482     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1483     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1484
1485     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1486     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1487     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1488     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1489     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1490     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1491     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1492     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1493
1494     if (dquant == 0) {
1495         alt_pquant_config = 0;
1496         alt_pquant_edge_mask = 0;
1497     } else if (dquant == 2) {
1498         alt_pquant_config = 1;
1499         alt_pquant_edge_mask = 0xf;
1500     } else {
1501         assert(dquant == 1);
1502         if (dquantfrm == 0) {
1503             alt_pquant_config = 0;
1504             alt_pquant_edge_mask = 0;
1505             alt_pq = 0;
1506         } else {
1507             assert(dquantfrm == 1);
1508             alt_pquant_config = 1;
1509
1510             switch (dqprofile) {
1511             case 3:
1512                 if (dqbilevel == 0) {
1513                     alt_pquant_config = 2;
1514                     alt_pquant_edge_mask = 0;
1515                 } else {
1516                     assert(dqbilevel == 1);
1517                     alt_pquant_config = 3;
1518                     alt_pquant_edge_mask = 0;
1519                 }
1520                 break;
1521                 
1522             case 0:
1523                 alt_pquant_edge_mask = 0xf;
1524                 break;
1525
1526             case 1:
1527                 if (dqdbedge == 3)
1528                     alt_pquant_edge_mask = 0x9;
1529                 else
1530                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1531
1532                 break;
1533
1534             case 2:
1535                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1536                 break;
1537
1538             default:
1539                 assert(0);
1540             }
1541         }
1542     }
1543
1544     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1545         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1546         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1547     } else {
1548         assert(pic_param->mv_fields.bits.mv_mode < 4);
1549         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1550     }
1551
1552     if (pic_param->sequence_fields.bits.interlace == 1 &&
1553         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1554         /* FIXME: calculate reference field picture polarity */
1555         assert(0);
1556         ref_field_pic_polarity = 0;
1557     }
1558
1559     if (pic_param->b_picture_fraction < 21)
1560         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1561
1562     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1563     
1564     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1565         picture_type == GEN7_VC1_I_PICTURE)
1566         picture_type = GEN7_VC1_BI_PICTURE;
1567
1568     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1569         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1570     else {
1571         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1572         /*
1573          * 8.3.6.2.1 Transform Type Selection
1574          * If variable-sized transform coding is not enabled,
1575          * then the 8x8 transform shall be used for all blocks.
1576          * it is also MFX_VC1_PIC_STATE requirement.
1577          */
1578         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1579             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1580             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1581         }
1582     }
1583
1584
1585     if (picture_type == GEN7_VC1_B_PICTURE) {
1586         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1587
1588         obj_surface = SURFACE(pic_param->backward_reference_picture);
1589         assert(obj_surface);
1590         gen7_vc1_surface = obj_surface->private_data;
1591
1592         if (!gen7_vc1_surface || 
1593             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1594              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1595             dmv_surface_valid = 0;
1596         else
1597             dmv_surface_valid = 1;
1598     }
1599
1600     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1601
1602     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1603         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1604     else {
1605         if (pic_param->picture_fields.bits.top_field_first)
1606             fcm = 2;
1607         else
1608             fcm = 3;
1609     }
1610
1611     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1612         brfd = pic_param->reference_fields.bits.reference_distance;
1613         brfd = (scale_factor * brfd) >> 8;
1614         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1615
1616         if (brfd < 0)
1617             brfd = 0;
1618     }
1619
1620     overlap = pic_param->sequence_fields.bits.overlap;
1621     if (profile != GEN7_VC1_ADVANCED_PROFILE && pic_param->pic_quantizer_fields.bits.pic_quantizer_scale < 9)
1622         overlap = 0;
1623
1624     assert(pic_param->conditional_overlap_flag < 3);
1625     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1626
1627     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1628         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1629          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1630         interpolation_mode = 9; /* Half-pel bilinear */
1631     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1632              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1633               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1634         interpolation_mode = 1; /* Half-pel bicubic */
1635     else
1636         interpolation_mode = 0; /* Quarter-pel bicubic */
1637
1638     BEGIN_BCS_BATCH(batch, 6);
1639     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1640     OUT_BCS_BATCH(batch,
1641                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1642                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1643     OUT_BCS_BATCH(batch,
1644                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1645                   dmv_surface_valid << 15 |
1646                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1647                   pic_param->rounding_control << 13 |
1648                   pic_param->sequence_fields.bits.syncmarker << 12 |
1649                   interpolation_mode << 8 |
1650                   0 << 7 | /* FIXME: scale up or down ??? */
1651                   pic_param->range_reduction_frame << 6 |
1652                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1653                   overlap << 4 |
1654                   !pic_param->picture_fields.bits.is_first_field << 3 |
1655                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1656     OUT_BCS_BATCH(batch,
1657                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1658                   picture_type << 26 |
1659                   fcm << 24 |
1660                   alt_pq << 16 |
1661                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1662                   scale_factor << 0);
1663     OUT_BCS_BATCH(batch,
1664                   unified_mv_mode << 28 |
1665                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1666                   pic_param->fast_uvmc_flag << 26 |
1667                   ref_field_pic_polarity << 25 |
1668                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1669                   pic_param->reference_fields.bits.reference_distance << 20 |
1670                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1671                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1672                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1673                   alt_pquant_edge_mask << 4 |
1674                   alt_pquant_config << 2 |
1675                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1676                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1677     OUT_BCS_BATCH(batch,
1678                   !!pic_param->bitplane_present.value << 31 |
1679                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1680                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1681                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1682                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1683                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1684                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1685                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1686                   pic_param->mv_fields.bits.mv_table << 20 |
1687                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1688                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1689                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1690                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1691                   pic_param->mb_mode_table << 8 |
1692                   trans_ac_y << 6 |
1693                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1694                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1695                   pic_param->cbp_table << 0);
1696     ADVANCE_BCS_BATCH(batch);
1697 }
1698
1699 static void
1700 gen7_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1701                              struct decode_state *decode_state,
1702                              struct gen7_mfd_context *gen7_mfd_context)
1703 {
1704     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1705     VAPictureParameterBufferVC1 *pic_param;
1706     int intensitycomp_single;
1707
1708     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1709     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1710
1711     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1712     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1713     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1714
1715     BEGIN_BCS_BATCH(batch, 6);
1716     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1717     OUT_BCS_BATCH(batch,
1718                   0 << 14 | /* FIXME: double ??? */
1719                   0 << 12 |
1720                   intensitycomp_single << 10 |
1721                   intensitycomp_single << 8 |
1722                   0 << 4 | /* FIXME: interlace mode */
1723                   0);
1724     OUT_BCS_BATCH(batch,
1725                   pic_param->luma_shift << 16 |
1726                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1727     OUT_BCS_BATCH(batch, 0);
1728     OUT_BCS_BATCH(batch, 0);
1729     OUT_BCS_BATCH(batch, 0);
1730     ADVANCE_BCS_BATCH(batch);
1731 }
1732
1733
1734 static void
1735 gen7_mfd_vc1_directmode_state(VADriverContextP ctx,
1736                               struct decode_state *decode_state,
1737                               struct gen7_mfd_context *gen7_mfd_context)
1738 {
1739     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1740     VAPictureParameterBufferVC1 *pic_param;
1741     struct i965_driver_data *i965 = i965_driver_data(ctx);
1742     struct object_surface *obj_surface;
1743     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1744
1745     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1746     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1747
1748     obj_surface = SURFACE(decode_state->current_render_target);
1749
1750     if (obj_surface && obj_surface->private_data) {
1751         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1752     }
1753
1754     obj_surface = SURFACE(pic_param->backward_reference_picture);
1755
1756     if (obj_surface && obj_surface->private_data) {
1757         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1758     }
1759
1760     BEGIN_BCS_BATCH(batch, 3);
1761     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
1762
1763     if (dmv_write_buffer)
1764         OUT_BCS_RELOC(batch, dmv_write_buffer,
1765                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1766                       0);
1767     else
1768         OUT_BCS_BATCH(batch, 0);
1769
1770     if (dmv_read_buffer)
1771         OUT_BCS_RELOC(batch, dmv_read_buffer,
1772                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1773                       0);
1774     else
1775         OUT_BCS_BATCH(batch, 0);
1776                   
1777     ADVANCE_BCS_BATCH(batch);
1778 }
1779
1780 static int
1781 gen7_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1782 {
1783     int out_slice_data_bit_offset;
1784     int slice_header_size = in_slice_data_bit_offset / 8;
1785     int i, j;
1786
1787     if (profile != 3)
1788         out_slice_data_bit_offset = in_slice_data_bit_offset;
1789     else {
1790         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1791             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1792                 i++, j += 2;
1793             }
1794         }
1795
1796         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1797     }
1798
1799     return out_slice_data_bit_offset;
1800 }
1801
1802 static void
1803 gen7_mfd_vc1_bsd_object(VADriverContextP ctx,
1804                         VAPictureParameterBufferVC1 *pic_param,
1805                         VASliceParameterBufferVC1 *slice_param,
1806                         VASliceParameterBufferVC1 *next_slice_param,
1807                         dri_bo *slice_data_bo,
1808                         struct gen7_mfd_context *gen7_mfd_context)
1809 {
1810     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1811     int next_slice_start_vert_pos;
1812     int macroblock_offset;
1813     uint8_t *slice_data = NULL;
1814
1815     dri_bo_map(slice_data_bo, 0);
1816     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1817     macroblock_offset = gen7_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1818                                                                slice_param->macroblock_offset,
1819                                                                pic_param->sequence_fields.bits.profile);
1820     dri_bo_unmap(slice_data_bo);
1821
1822     if (next_slice_param)
1823         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1824     else
1825         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1826
1827     BEGIN_BCS_BATCH(batch, 5);
1828     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1829     OUT_BCS_BATCH(batch, 
1830                   slice_param->slice_data_size - (macroblock_offset >> 3));
1831     OUT_BCS_BATCH(batch, 
1832                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1833     OUT_BCS_BATCH(batch,
1834                   slice_param->slice_vertical_position << 16 |
1835                   next_slice_start_vert_pos << 0);
1836     OUT_BCS_BATCH(batch,
1837                   (macroblock_offset & 0x7));
1838     ADVANCE_BCS_BATCH(batch);
1839 }
1840
1841 static void
1842 gen7_mfd_vc1_decode_picture(VADriverContextP ctx,
1843                             struct decode_state *decode_state,
1844                             struct gen7_mfd_context *gen7_mfd_context)
1845 {
1846     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1847     VAPictureParameterBufferVC1 *pic_param;
1848     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1849     dri_bo *slice_data_bo;
1850     int i, j;
1851
1852     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1853     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1854
1855     gen7_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1856     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1857     intel_batchbuffer_emit_mi_flush(batch);
1858     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1859     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1860     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1861     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1862     gen7_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1863     gen7_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1864     gen7_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1865
1866     for (j = 0; j < decode_state->num_slice_params; j++) {
1867         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1868         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1869         slice_data_bo = decode_state->slice_datas[j]->bo;
1870         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1871
1872         if (j == decode_state->num_slice_params - 1)
1873             next_slice_group_param = NULL;
1874         else
1875             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1876
1877         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1878             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1879
1880             if (i < decode_state->slice_params[j]->num_elements - 1)
1881                 next_slice_param = slice_param + 1;
1882             else
1883                 next_slice_param = next_slice_group_param;
1884
1885             gen7_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1886             slice_param++;
1887         }
1888     }
1889
1890     intel_batchbuffer_end_atomic(batch);
1891     intel_batchbuffer_flush(batch);
1892 }
1893
1894 #ifdef HAVE_VA_JPEG_DECODE
1895 static void
1896 gen7_mfd_jpeg_decode_init(VADriverContextP ctx,
1897                           struct decode_state *decode_state,
1898                           struct gen7_mfd_context *gen7_mfd_context)
1899 {
1900     struct i965_driver_data *i965 = i965_driver_data(ctx);
1901     struct object_surface *obj_surface;
1902     VAPictureParameterBufferJPEGBaseline *pic_param;
1903     int subsampling = SUBSAMPLE_YUV420;
1904
1905     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1906
1907     if (pic_param->num_components == 1)
1908         subsampling = SUBSAMPLE_YUV400;
1909     else if (pic_param->num_components == 3) {
1910         int h1 = pic_param->components[0].h_sampling_factor;
1911         int h2 = pic_param->components[1].h_sampling_factor;
1912         int h3 = pic_param->components[2].h_sampling_factor;
1913         int v1 = pic_param->components[0].v_sampling_factor;
1914         int v2 = pic_param->components[1].v_sampling_factor;
1915         int v3 = pic_param->components[2].v_sampling_factor;
1916
1917         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1918             v1 == 2 && v2 == 1 && v3 == 1)
1919             subsampling = SUBSAMPLE_YUV420;
1920         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1921                  v1 == 1 && v2 == 1 && v3 == 1)
1922             subsampling = SUBSAMPLE_YUV422H;
1923         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1924                  v1 == 1 && v2 == 1 && v3 == 1)
1925             subsampling = SUBSAMPLE_YUV444;
1926         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1927                  v1 == 1 && v2 == 1 && v3 == 1)
1928             subsampling = SUBSAMPLE_YUV411;
1929         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1930                  v1 == 2 && v2 == 1 && v3 == 1)
1931             subsampling = SUBSAMPLE_YUV422V;
1932         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1933                  v1 == 2 && v2 == 2 && v3 == 2)
1934             subsampling = SUBSAMPLE_YUV422H;
1935         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1936                  v1 == 2 && v2 == 1 && v3 == 1)
1937             subsampling = SUBSAMPLE_YUV422V;
1938         else
1939             assert(0);
1940     } else {
1941         assert(0);
1942     }
1943
1944     /* Current decoded picture */
1945     obj_surface = SURFACE(decode_state->current_render_target);
1946     assert(obj_surface);
1947     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling);
1948
1949     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1950     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1951     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1952     gen7_mfd_context->pre_deblocking_output.valid = 1;
1953
1954     gen7_mfd_context->post_deblocking_output.bo = NULL;
1955     gen7_mfd_context->post_deblocking_output.valid = 0;
1956
1957     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1958     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1959
1960     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1961     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1962
1963     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1964     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1965
1966     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1967     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1968
1969     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1970     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1971 }
1972
1973 static const int va_to_gen7_jpeg_rotation[4] = {
1974     GEN7_JPEG_ROTATION_0,
1975     GEN7_JPEG_ROTATION_90,
1976     GEN7_JPEG_ROTATION_180,
1977     GEN7_JPEG_ROTATION_270
1978 };
1979
1980 static void
1981 gen7_mfd_jpeg_pic_state(VADriverContextP ctx,
1982                         struct decode_state *decode_state,
1983                         struct gen7_mfd_context *gen7_mfd_context)
1984 {
1985     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1986     VAPictureParameterBufferJPEGBaseline *pic_param;
1987     int chroma_type = GEN7_YUV420;
1988     int frame_width_in_blks;
1989     int frame_height_in_blks;
1990
1991     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1992     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1993
1994     if (pic_param->num_components == 1)
1995         chroma_type = GEN7_YUV400;
1996     else if (pic_param->num_components == 3) {
1997         int h1 = pic_param->components[0].h_sampling_factor;
1998         int h2 = pic_param->components[1].h_sampling_factor;
1999         int h3 = pic_param->components[2].h_sampling_factor;
2000         int v1 = pic_param->components[0].v_sampling_factor;
2001         int v2 = pic_param->components[1].v_sampling_factor;
2002         int v3 = pic_param->components[2].v_sampling_factor;
2003
2004         if (h1 == 2 && h2 == 1 && h3 == 1 &&
2005             v1 == 2 && v2 == 1 && v3 == 1)
2006             chroma_type = GEN7_YUV420;
2007         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2008                  v1 == 1 && v2 == 1 && v3 == 1)
2009             chroma_type = GEN7_YUV422H_2Y;
2010         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2011                  v1 == 1 && v2 == 1 && v3 == 1)
2012             chroma_type = GEN7_YUV444;
2013         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2014                  v1 == 1 && v2 == 1 && v3 == 1)
2015             chroma_type = GEN7_YUV411;
2016         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2017                  v1 == 2 && v2 == 1 && v3 == 1)
2018             chroma_type = GEN7_YUV422V_2Y;
2019         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2020                  v1 == 2 && v2 == 2 && v3 == 2)
2021             chroma_type = GEN7_YUV422H_4Y;
2022         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2023                  v1 == 2 && v2 == 1 && v3 == 1)
2024             chroma_type = GEN7_YUV422V_4Y;
2025         else
2026             assert(0);
2027     }
2028
2029     if (chroma_type == GEN7_YUV400 ||
2030         chroma_type == GEN7_YUV444 ||
2031         chroma_type == GEN7_YUV422V_2Y) {
2032         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2033         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2034     } else if (chroma_type == GEN7_YUV411) {
2035         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2036         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2037     } else {
2038         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2039         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2040     }
2041
2042     BEGIN_BCS_BATCH(batch, 3);
2043     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2044     OUT_BCS_BATCH(batch,
2045                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2046                   (chroma_type << 0));
2047     OUT_BCS_BATCH(batch,
2048                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2049                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2050     ADVANCE_BCS_BATCH(batch);
2051 }
2052
2053 static const int va_to_gen7_jpeg_hufftable[2] = {
2054     MFX_HUFFTABLE_ID_Y,
2055     MFX_HUFFTABLE_ID_UV
2056 };
2057
2058 static void
2059 gen7_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2060                                struct decode_state *decode_state,
2061                                struct gen7_mfd_context *gen7_mfd_context,
2062                                int num_tables)
2063 {
2064     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2065     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2066     int index;
2067
2068     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2069         return;
2070
2071     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2072
2073     for (index = 0; index < num_tables; index++) {
2074         int id = va_to_gen7_jpeg_hufftable[index];
2075         BEGIN_BCS_BATCH(batch, 53);
2076         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2077         OUT_BCS_BATCH(batch, id);
2078         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2079         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2080         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2081         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2082         ADVANCE_BCS_BATCH(batch);
2083     }
2084 }
2085
2086 static const int va_to_gen7_jpeg_qm[5] = {
2087     -1,
2088     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2089     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2090     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2091     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2092 };
2093
2094 static void
2095 gen7_mfd_jpeg_qm_state(VADriverContextP ctx,
2096                        struct decode_state *decode_state,
2097                        struct gen7_mfd_context *gen7_mfd_context)
2098 {
2099     VAPictureParameterBufferJPEGBaseline *pic_param;
2100     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2101     int index;
2102
2103     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2104         return;
2105
2106     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2107     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2108
2109     assert(pic_param->num_components <= 3);
2110
2111     for (index = 0; index < pic_param->num_components; index++) {
2112         int qm_type = va_to_gen7_jpeg_qm[pic_param->components[index].component_id - pic_param->components[0].component_id + 1];
2113         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2114         unsigned char raster_qm[64];
2115         int j;
2116
2117         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2118             continue;
2119
2120         for (j = 0; j < 64; j++)
2121             raster_qm[zigzag_direct[j]] = qm[j];
2122
2123         gen7_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2124     }
2125 }
2126
2127 static void
2128 gen7_mfd_jpeg_bsd_object(VADriverContextP ctx,
2129                          VAPictureParameterBufferJPEGBaseline *pic_param,
2130                          VASliceParameterBufferJPEGBaseline *slice_param,
2131                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2132                          dri_bo *slice_data_bo,
2133                          struct gen7_mfd_context *gen7_mfd_context)
2134 {
2135     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2136     int scan_component_mask = 0;
2137     int i;
2138
2139     assert(slice_param->num_components > 0);
2140     assert(slice_param->num_components < 4);
2141     assert(slice_param->num_components <= pic_param->num_components);
2142
2143     for (i = 0; i < slice_param->num_components; i++) {
2144         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2145         case 1:
2146             scan_component_mask |= (1 << 0);
2147             break;
2148         case 2:
2149             scan_component_mask |= (1 << 1);
2150             break;
2151         case 3:
2152             scan_component_mask |= (1 << 2);
2153             break;
2154         default:
2155             assert(0);
2156             break;
2157         }
2158     }
2159
2160     BEGIN_BCS_BATCH(batch, 6);
2161     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2162     OUT_BCS_BATCH(batch, 
2163                   slice_param->slice_data_size);
2164     OUT_BCS_BATCH(batch, 
2165                   slice_param->slice_data_offset);
2166     OUT_BCS_BATCH(batch,
2167                   slice_param->slice_horizontal_position << 16 |
2168                   slice_param->slice_vertical_position << 0);
2169     OUT_BCS_BATCH(batch,
2170                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2171                   (scan_component_mask << 27) |                 /* scan components */
2172                   (0 << 26) |   /* disable interrupt allowed */
2173                   (slice_param->num_mcus << 0));                /* MCU count */
2174     OUT_BCS_BATCH(batch,
2175                   (slice_param->restart_interval << 0));    /* RestartInterval */
2176     ADVANCE_BCS_BATCH(batch);
2177 }
2178
2179 /* Workaround for JPEG decoding on Ivybridge */
2180
2181 VAStatus 
2182 i965_DestroySurfaces(VADriverContextP ctx,
2183                      VASurfaceID *surface_list,
2184                      int num_surfaces);
2185 VAStatus 
2186 i965_CreateSurfaces(VADriverContextP ctx,
2187                     int width,
2188                     int height,
2189                     int format,
2190                     int num_surfaces,
2191                     VASurfaceID *surfaces);
2192
2193 static struct {
2194     int width;
2195     int height;
2196     unsigned char data[32];
2197     int data_size;
2198     int data_bit_offset;
2199     int qp;
2200 } gen7_jpeg_wa_clip = {
2201     16,
2202     16,
2203     {
2204         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2205         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2206     },
2207     14,
2208     40,
2209     28,
2210 };
2211
2212 static void
2213 gen7_jpeg_wa_init(VADriverContextP ctx,
2214                   struct gen7_mfd_context *gen7_mfd_context)
2215 {
2216     struct i965_driver_data *i965 = i965_driver_data(ctx);
2217     VAStatus status;
2218     struct object_surface *obj_surface;
2219
2220     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2221         i965_DestroySurfaces(ctx,
2222                              &gen7_mfd_context->jpeg_wa_surface_id,
2223                              1);
2224
2225     status = i965_CreateSurfaces(ctx,
2226                                  gen7_jpeg_wa_clip.width,
2227                                  gen7_jpeg_wa_clip.height,
2228                                  VA_RT_FORMAT_YUV420,
2229                                  1,
2230                                  &gen7_mfd_context->jpeg_wa_surface_id);
2231     assert(status == VA_STATUS_SUCCESS);
2232
2233     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2234     assert(obj_surface);
2235     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
2236
2237     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2238         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2239                                                                "JPEG WA data",
2240                                                                0x1000,
2241                                                                0x1000);
2242         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2243                        0,
2244                        gen7_jpeg_wa_clip.data_size,
2245                        gen7_jpeg_wa_clip.data);
2246     }
2247 }
2248
2249 static void
2250 gen7_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2251                               struct gen7_mfd_context *gen7_mfd_context)
2252 {
2253     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2254
2255     BEGIN_BCS_BATCH(batch, 5);
2256     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2257     OUT_BCS_BATCH(batch,
2258                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2259                   (MFD_MODE_VLD << 15) | /* VLD mode */
2260                   (0 << 10) | /* disable Stream-Out */
2261                   (0 << 9)  | /* Post Deblocking Output */
2262                   (1 << 8)  | /* Pre Deblocking Output */
2263                   (0 << 5)  | /* not in stitch mode */
2264                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2265                   (MFX_FORMAT_AVC << 0));
2266     OUT_BCS_BATCH(batch,
2267                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2268                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2269                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2270                   (0 << 1)  |
2271                   (0 << 0));
2272     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2273     OUT_BCS_BATCH(batch, 0); /* reserved */
2274     ADVANCE_BCS_BATCH(batch);
2275 }
2276
2277 static void
2278 gen7_jpeg_wa_surface_state(VADriverContextP ctx,
2279                            struct gen7_mfd_context *gen7_mfd_context)
2280 {
2281     struct i965_driver_data *i965 = i965_driver_data(ctx);
2282     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2283     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2284
2285     BEGIN_BCS_BATCH(batch, 6);
2286     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2287     OUT_BCS_BATCH(batch, 0);
2288     OUT_BCS_BATCH(batch,
2289                   ((obj_surface->orig_width - 1) << 18) |
2290                   ((obj_surface->orig_height - 1) << 4));
2291     OUT_BCS_BATCH(batch,
2292                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2293                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2294                   (0 << 22) | /* surface object control state, ignored */
2295                   ((obj_surface->width - 1) << 3) | /* pitch */
2296                   (0 << 2)  | /* must be 0 */
2297                   (1 << 1)  | /* must be tiled */
2298                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2299     OUT_BCS_BATCH(batch,
2300                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2301                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2302     OUT_BCS_BATCH(batch,
2303                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2304                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2305     ADVANCE_BCS_BATCH(batch);
2306 }
2307
2308 static void
2309 gen7_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2310                                  struct gen7_mfd_context *gen7_mfd_context)
2311 {
2312     struct i965_driver_data *i965 = i965_driver_data(ctx);
2313     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2314     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2315     dri_bo *intra_bo;
2316     int i;
2317
2318     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2319                             "intra row store",
2320                             128 * 64,
2321                             0x1000);
2322
2323     BEGIN_BCS_BATCH(batch, 24);
2324     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
2325     OUT_BCS_RELOC(batch,
2326                   obj_surface->bo,
2327                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2328                   0);
2329     
2330     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2331
2332     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2333     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2334
2335     OUT_BCS_RELOC(batch,
2336                   intra_bo,
2337                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2338                   0);
2339
2340     OUT_BCS_BATCH(batch, 0);
2341
2342     /* DW 7..22 */
2343     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2344         OUT_BCS_BATCH(batch, 0);
2345     }
2346
2347     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
2348     ADVANCE_BCS_BATCH(batch);
2349
2350     dri_bo_unreference(intra_bo);
2351 }
2352
2353 static void
2354 gen7_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2355                                      struct gen7_mfd_context *gen7_mfd_context)
2356 {
2357     struct i965_driver_data *i965 = i965_driver_data(ctx);
2358     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2359     dri_bo *bsd_mpc_bo, *mpr_bo;
2360
2361     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2362                               "bsd mpc row store",
2363                               11520, /* 1.5 * 120 * 64 */
2364                               0x1000);
2365
2366     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2367                           "mpr row store",
2368                           7680, /* 1. 0 * 120 * 64 */
2369                           0x1000);
2370
2371     BEGIN_BCS_BATCH(batch, 4);
2372     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
2373
2374     OUT_BCS_RELOC(batch,
2375                   bsd_mpc_bo,
2376                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2377                   0);
2378
2379     OUT_BCS_RELOC(batch,
2380                   mpr_bo,
2381                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2382                   0);
2383     OUT_BCS_BATCH(batch, 0);
2384
2385     ADVANCE_BCS_BATCH(batch);
2386
2387     dri_bo_unreference(bsd_mpc_bo);
2388     dri_bo_unreference(mpr_bo);
2389 }
2390
2391 static void
2392 gen7_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2393                           struct gen7_mfd_context *gen7_mfd_context)
2394 {
2395
2396 }
2397
2398 static void
2399 gen7_jpeg_wa_avc_img_state(VADriverContextP ctx,
2400                            struct gen7_mfd_context *gen7_mfd_context)
2401 {
2402     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2403     int img_struct = 0;
2404     int mbaff_frame_flag = 0;
2405     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2406
2407     BEGIN_BCS_BATCH(batch, 16);
2408     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2409     OUT_BCS_BATCH(batch, 
2410                   width_in_mbs * height_in_mbs);
2411     OUT_BCS_BATCH(batch, 
2412                   ((height_in_mbs - 1) << 16) | 
2413                   ((width_in_mbs - 1) << 0));
2414     OUT_BCS_BATCH(batch, 
2415                   (0 << 24) |
2416                   (0 << 16) |
2417                   (0 << 14) |
2418                   (0 << 13) |
2419                   (0 << 12) | /* differ from GEN6 */
2420                   (0 << 10) |
2421                   (img_struct << 8));
2422     OUT_BCS_BATCH(batch,
2423                   (1 << 10) | /* 4:2:0 */
2424                   (1 << 7) |  /* CABAC */
2425                   (0 << 6) |
2426                   (0 << 5) |
2427                   (0 << 4) |
2428                   (0 << 3) |
2429                   (1 << 2) |
2430                   (mbaff_frame_flag << 1) |
2431                   (0 << 0));
2432     OUT_BCS_BATCH(batch, 0);
2433     OUT_BCS_BATCH(batch, 0);
2434     OUT_BCS_BATCH(batch, 0);
2435     OUT_BCS_BATCH(batch, 0);
2436     OUT_BCS_BATCH(batch, 0);
2437     OUT_BCS_BATCH(batch, 0);
2438     OUT_BCS_BATCH(batch, 0);
2439     OUT_BCS_BATCH(batch, 0);
2440     OUT_BCS_BATCH(batch, 0);
2441     OUT_BCS_BATCH(batch, 0);
2442     OUT_BCS_BATCH(batch, 0);
2443     ADVANCE_BCS_BATCH(batch);
2444 }
2445
2446 static void
2447 gen7_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2448                                   struct gen7_mfd_context *gen7_mfd_context)
2449 {
2450     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2451     int i;
2452
2453     BEGIN_BCS_BATCH(batch, 69);
2454     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
2455
2456     /* reference surfaces 0..15 */
2457     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2458         OUT_BCS_BATCH(batch, 0); /* top */
2459         OUT_BCS_BATCH(batch, 0); /* bottom */
2460     }
2461
2462     /* the current decoding frame/field */
2463     OUT_BCS_BATCH(batch, 0); /* top */
2464     OUT_BCS_BATCH(batch, 0); /* bottom */
2465
2466     /* POC List */
2467     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2468         OUT_BCS_BATCH(batch, 0);
2469         OUT_BCS_BATCH(batch, 0);
2470     }
2471
2472     OUT_BCS_BATCH(batch, 0);
2473     OUT_BCS_BATCH(batch, 0);
2474
2475     ADVANCE_BCS_BATCH(batch);
2476 }
2477
2478 static void
2479 gen7_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2480                                      struct gen7_mfd_context *gen7_mfd_context)
2481 {
2482     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2483
2484     BEGIN_BCS_BATCH(batch, 11);
2485     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2486     OUT_BCS_RELOC(batch,
2487                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2488                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2489                   0);
2490     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2491     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2492     OUT_BCS_BATCH(batch, 0);
2493     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2494     OUT_BCS_BATCH(batch, 0);
2495     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2496     OUT_BCS_BATCH(batch, 0);
2497     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2498     OUT_BCS_BATCH(batch, 0);
2499     ADVANCE_BCS_BATCH(batch);
2500 }
2501
2502 static void
2503 gen7_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2504                             struct gen7_mfd_context *gen7_mfd_context)
2505 {
2506     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2507
2508     /* the input bitsteam format on GEN7 differs from GEN6 */
2509     BEGIN_BCS_BATCH(batch, 6);
2510     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2511     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2512     OUT_BCS_BATCH(batch, 0);
2513     OUT_BCS_BATCH(batch,
2514                   (0 << 31) |
2515                   (0 << 14) |
2516                   (0 << 12) |
2517                   (0 << 10) |
2518                   (0 << 8));
2519     OUT_BCS_BATCH(batch,
2520                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2521                   (0 << 5)  |
2522                   (0 << 4)  |
2523                   (1 << 3) | /* LastSlice Flag */
2524                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2525     OUT_BCS_BATCH(batch, 0);
2526     ADVANCE_BCS_BATCH(batch);
2527 }
2528
2529 static void
2530 gen7_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2531                              struct gen7_mfd_context *gen7_mfd_context)
2532 {
2533     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2534     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2535     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2536     int first_mb_in_slice = 0;
2537     int slice_type = SLICE_TYPE_I;
2538
2539     BEGIN_BCS_BATCH(batch, 11);
2540     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2541     OUT_BCS_BATCH(batch, slice_type);
2542     OUT_BCS_BATCH(batch, 
2543                   (num_ref_idx_l1 << 24) |
2544                   (num_ref_idx_l0 << 16) |
2545                   (0 << 8) |
2546                   (0 << 0));
2547     OUT_BCS_BATCH(batch, 
2548                   (0 << 29) |
2549                   (1 << 27) |   /* disable Deblocking */
2550                   (0 << 24) |
2551                   (gen7_jpeg_wa_clip.qp << 16) |
2552                   (0 << 8) |
2553                   (0 << 0));
2554     OUT_BCS_BATCH(batch, 
2555                   (slice_ver_pos << 24) |
2556                   (slice_hor_pos << 16) | 
2557                   (first_mb_in_slice << 0));
2558     OUT_BCS_BATCH(batch,
2559                   (next_slice_ver_pos << 16) |
2560                   (next_slice_hor_pos << 0));
2561     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2562     OUT_BCS_BATCH(batch, 0);
2563     OUT_BCS_BATCH(batch, 0);
2564     OUT_BCS_BATCH(batch, 0);
2565     OUT_BCS_BATCH(batch, 0);
2566     ADVANCE_BCS_BATCH(batch);
2567 }
2568
2569 static void
2570 gen7_mfd_jpeg_wa(VADriverContextP ctx,
2571                  struct gen7_mfd_context *gen7_mfd_context)
2572 {
2573     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2574     gen7_jpeg_wa_init(ctx, gen7_mfd_context);
2575     intel_batchbuffer_emit_mi_flush(batch);
2576     gen7_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2577     gen7_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2578     gen7_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2579     gen7_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2580     gen7_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2581     gen7_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2582     gen7_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2583
2584     gen7_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2585     gen7_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2586     gen7_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2587 }
2588
2589 void
2590 gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
2591                              struct decode_state *decode_state,
2592                              struct gen7_mfd_context *gen7_mfd_context)
2593 {
2594     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2595     VAPictureParameterBufferJPEGBaseline *pic_param;
2596     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2597     dri_bo *slice_data_bo;
2598     int i, j, max_selector = 0;
2599
2600     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2601     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2602
2603     /* Currently only support Baseline DCT */
2604     gen7_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2605     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2606     gen7_mfd_jpeg_wa(ctx, gen7_mfd_context);
2607     intel_batchbuffer_emit_mi_flush(batch);
2608     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2609     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2610     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2611     gen7_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2612     gen7_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2613
2614     for (j = 0; j < decode_state->num_slice_params; j++) {
2615         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2616         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2617         slice_data_bo = decode_state->slice_datas[j]->bo;
2618         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2619
2620         if (j == decode_state->num_slice_params - 1)
2621             next_slice_group_param = NULL;
2622         else
2623             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2624
2625         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2626             int component;
2627
2628             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2629
2630             if (i < decode_state->slice_params[j]->num_elements - 1)
2631                 next_slice_param = slice_param + 1;
2632             else
2633                 next_slice_param = next_slice_group_param;
2634
2635             for (component = 0; component < slice_param->num_components; component++) {
2636                 if (max_selector < slice_param->components[component].dc_table_selector)
2637                     max_selector = slice_param->components[component].dc_table_selector;
2638
2639                 if (max_selector < slice_param->components[component].ac_table_selector)
2640                     max_selector = slice_param->components[component].ac_table_selector;
2641             }
2642
2643             slice_param++;
2644         }
2645     }
2646
2647     assert(max_selector < 2);
2648     gen7_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2649
2650     for (j = 0; j < decode_state->num_slice_params; j++) {
2651         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2652         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2653         slice_data_bo = decode_state->slice_datas[j]->bo;
2654         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2655
2656         if (j == decode_state->num_slice_params - 1)
2657             next_slice_group_param = NULL;
2658         else
2659             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2660
2661         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2662             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2663
2664             if (i < decode_state->slice_params[j]->num_elements - 1)
2665                 next_slice_param = slice_param + 1;
2666             else
2667                 next_slice_param = next_slice_group_param;
2668
2669             gen7_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2670             slice_param++;
2671         }
2672     }
2673
2674     intel_batchbuffer_end_atomic(batch);
2675     intel_batchbuffer_flush(batch);
2676 }
2677 #endif
2678
2679 static void 
2680 gen7_mfd_decode_picture(VADriverContextP ctx, 
2681                         VAProfile profile, 
2682                         union codec_state *codec_state,
2683                         struct hw_context *hw_context)
2684
2685 {
2686     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2687     struct decode_state *decode_state = &codec_state->decode;
2688
2689     assert(gen7_mfd_context);
2690
2691     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
2692
2693     switch (profile) {
2694     case VAProfileMPEG2Simple:
2695     case VAProfileMPEG2Main:
2696         gen7_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
2697         break;
2698         
2699     case VAProfileH264Baseline:
2700     case VAProfileH264Main:
2701     case VAProfileH264High:
2702         gen7_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
2703         break;
2704
2705     case VAProfileVC1Simple:
2706     case VAProfileVC1Main:
2707     case VAProfileVC1Advanced:
2708         gen7_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
2709         break;
2710
2711 #ifdef HAVE_VA_JPEG_DECODE
2712     case VAProfileJPEGBaseline:
2713         gen7_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
2714         break;
2715 #endif
2716
2717     default:
2718         assert(0);
2719         break;
2720     }
2721 }
2722
2723 static void
2724 gen7_mfd_context_destroy(void *hw_context)
2725 {
2726     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2727
2728     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2729     gen7_mfd_context->post_deblocking_output.bo = NULL;
2730
2731     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2732     gen7_mfd_context->pre_deblocking_output.bo = NULL;
2733
2734     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2735     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2736
2737     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2738     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2739
2740     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2741     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2742
2743     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2744     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2745
2746     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
2747     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2748
2749     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
2750
2751     intel_batchbuffer_free(gen7_mfd_context->base.batch);
2752     free(gen7_mfd_context);
2753 }
2754
2755 static void gen7_mfd_mpeg2_context_init(VADriverContextP ctx,
2756                                     struct gen7_mfd_context *gen7_mfd_context)
2757 {
2758     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
2759     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
2760     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
2761     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
2762 }
2763
2764 struct hw_context *
2765 gen7_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
2766 {
2767     struct intel_driver_data *intel = intel_driver_data(ctx);
2768     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
2769     int i;
2770
2771     gen7_mfd_context->base.destroy = gen7_mfd_context_destroy;
2772     gen7_mfd_context->base.run = gen7_mfd_decode_picture;
2773     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
2774
2775     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
2776         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
2777         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
2778     }
2779
2780     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
2781
2782     switch (profile) {
2783     case VAProfileMPEG2Simple:
2784     case VAProfileMPEG2Main:
2785         gen7_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
2786         break;
2787
2788     case VAProfileH264Baseline:
2789     case VAProfileH264Main:
2790     case VAProfileH264High:
2791         gen7_mfd_avc_context_init(ctx, gen7_mfd_context);
2792         break;
2793     default:
2794         break;
2795     }
2796     return (struct hw_context *)gen7_mfd_context;
2797 }