9afa7dd17fdc29589c7514c8c14435023e6ab5ae
[profile/ivi/vaapi-intel-driver.git] / src / gen6_mfd.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36
37 #include "i965_defines.h"
38 #include "i965_drv_video.h"
39 #include "i965_decoder_utils.h"
40
41 #include "gen6_mfd.h"
42 #include "intel_media.h"
43
44 static const uint32_t zigzag_direct[64] = {
45     0,   1,  8, 16,  9,  2,  3, 10,
46     17, 24, 32, 25, 18, 11,  4,  5,
47     12, 19, 26, 33, 40, 48, 41, 34,
48     27, 20, 13,  6,  7, 14, 21, 28,
49     35, 42, 49, 56, 57, 50, 43, 36,
50     29, 22, 15, 23, 30, 37, 44, 51,
51     58, 59, 52, 45, 38, 31, 39, 46,
52     53, 60, 61, 54, 47, 55, 62, 63
53 };
54
55 static void
56 gen6_mfd_avc_frame_store_index(VADriverContextP ctx,
57                                VAPictureParameterBufferH264 *pic_param,
58                                struct gen6_mfd_context *gen6_mfd_context)
59 {
60     struct i965_driver_data *i965 = i965_driver_data(ctx);
61     int i, j;
62
63     assert(ARRAY_ELEMS(gen6_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
64
65     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
66         int found = 0;
67
68         if (gen6_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
69             continue;
70
71         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
72             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
73             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
74                 continue;
75
76             if (gen6_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
77                 found = 1;
78                 break;
79             }
80         }
81
82         if (!found) {
83             struct object_surface *obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
84             obj_surface->flags &= ~SURFACE_REFERENCED;
85
86             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
87                 dri_bo_unreference(obj_surface->bo);
88                 obj_surface->bo = NULL;
89                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
90             }
91
92             if (obj_surface->free_private_data)
93                 obj_surface->free_private_data(&obj_surface->private_data);
94
95             gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
96             gen6_mfd_context->reference_surface[i].frame_store_id = -1;
97         }
98     }
99
100     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
101         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
102         int found = 0;
103
104         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
105             continue;
106
107         for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
108             if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
109                 continue;
110             
111             if (gen6_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
112                 found = 1;
113                 break;
114             }
115         }
116
117         if (!found) {
118             int frame_idx;
119             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
120             
121             assert(obj_surface);
122             i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
123
124             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) {
125                 for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
126                     if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
127                         continue;
128
129                     if (gen6_mfd_context->reference_surface[j].frame_store_id == frame_idx)
130                         break;
131                 }
132
133                 if (j == ARRAY_ELEMS(gen6_mfd_context->reference_surface))
134                     break;
135             }
136
137             assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface));
138
139             for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
140                 if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
141                     gen6_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
142                     gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
143                     break;
144                 }
145             }
146         }
147     }
148
149     /* sort */
150     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface) - 1; i++) {
151         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
152             gen6_mfd_context->reference_surface[i].frame_store_id == i)
153             continue;
154
155         for (j = i + 1; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
156             if (gen6_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
157                 gen6_mfd_context->reference_surface[j].frame_store_id == i) {
158                 VASurfaceID id = gen6_mfd_context->reference_surface[i].surface_id;
159                 int frame_idx = gen6_mfd_context->reference_surface[i].frame_store_id;
160
161                 gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[j].surface_id;
162                 gen6_mfd_context->reference_surface[i].frame_store_id = gen6_mfd_context->reference_surface[j].frame_store_id;
163                 gen6_mfd_context->reference_surface[j].surface_id = id;
164                 gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
165                 break;
166             }
167         }
168     }
169 }
170
171 static void
172 gen6_mfd_init_avc_surface(VADriverContextP ctx, 
173                           VAPictureParameterBufferH264 *pic_param,
174                           struct object_surface *obj_surface)
175 {
176     struct i965_driver_data *i965 = i965_driver_data(ctx);
177     GenAvcSurface *gen6_avc_surface = obj_surface->private_data;
178     int height_in_mbs;
179
180     obj_surface->free_private_data = gen_free_avc_surface;
181     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
182
183     if (!gen6_avc_surface) {
184         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
185         assert((obj_surface->size & 0x3f) == 0);
186         obj_surface->private_data = gen6_avc_surface;
187     }
188
189     gen6_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
190                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
191
192     if (gen6_avc_surface->dmv_top == NULL) {
193         gen6_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
194                                                  "direct mv w/r buffer",
195                                                  128 * height_in_mbs * 64,      /* scalable with frame height */
196                                                  0x1000);
197     }
198
199     if (gen6_avc_surface->dmv_bottom_flag &&
200         gen6_avc_surface->dmv_bottom == NULL) {
201         gen6_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
202                                                     "direct mv w/r buffer",
203                                                     128 * height_in_mbs * 64,   /* scalable with frame height */
204                                                     0x1000);
205     }
206 }
207
208 static void
209 gen6_mfd_pipe_mode_select(VADriverContextP ctx,
210                           struct decode_state *decode_state,
211                           int standard_select,
212                           struct gen6_mfd_context *gen6_mfd_context)
213 {
214     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
215
216     assert(standard_select == MFX_FORMAT_MPEG2 ||
217            standard_select == MFX_FORMAT_AVC ||
218            standard_select == MFX_FORMAT_VC1);
219
220     BEGIN_BCS_BATCH(batch, 4);
221     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
222     OUT_BCS_BATCH(batch,
223                   (MFD_MODE_VLD << 16) | /* VLD mode */
224                   (0 << 10) | /* disable Stream-Out */
225                   (gen6_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
226                   (gen6_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
227                   (0 << 7)  | /* disable TLB prefectch */
228                   (0 << 5)  | /* not in stitch mode */
229                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
230                   (standard_select << 0));
231     OUT_BCS_BATCH(batch,
232                   (0 << 20) | /* round flag in PB slice */
233                   (0 << 19) | /* round flag in Intra8x8 */
234                   (0 << 7)  | /* expand NOA bus flag */
235                   (1 << 6)  | /* must be 1 */
236                   (0 << 5)  | /* disable clock gating for NOA */
237                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
238                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
239                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
240                   (0 << 1)  | /* AVC long field motion vector */
241                   (1 << 0));  /* always calculate AVC ILDB boundary strength */
242     OUT_BCS_BATCH(batch, 0);
243     ADVANCE_BCS_BATCH(batch);
244 }
245
246 static void
247 gen6_mfd_surface_state(VADriverContextP ctx,
248                        struct decode_state *decode_state,
249                        int standard_select,
250                        struct gen6_mfd_context *gen6_mfd_context)
251 {
252     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
253     struct i965_driver_data *i965 = i965_driver_data(ctx);
254     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
255     assert(obj_surface);
256     
257     BEGIN_BCS_BATCH(batch, 6);
258     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch,
261                   ((obj_surface->orig_height - 1) << 19) |
262                   ((obj_surface->orig_width - 1) << 6));
263     OUT_BCS_BATCH(batch,
264                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
265                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
266                   (0 << 22) | /* surface object control state, FIXME??? */
267                   ((obj_surface->width - 1) << 3) | /* pitch */
268                   (0 << 2)  | /* must be 0 for interleave U/V */
269                   (1 << 1)  | /* must be y-tiled */
270                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, FIXME: must be 1 ??? */
271     OUT_BCS_BATCH(batch,
272                   (0 << 16) | /* must be 0 for interleave U/V */
273                   (obj_surface->height)); /* y offset for U(cb) */
274     OUT_BCS_BATCH(batch, 0);
275     ADVANCE_BCS_BATCH(batch);
276 }
277
278 static void
279 gen6_mfd_pipe_buf_addr_state(VADriverContextP ctx,
280                              struct decode_state *decode_state,
281                              int standard_select,
282                              struct gen6_mfd_context *gen6_mfd_context)
283 {
284     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
285     struct i965_driver_data *i965 = i965_driver_data(ctx);
286     int i;
287
288     BEGIN_BCS_BATCH(batch, 24);
289     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
290     if (gen6_mfd_context->pre_deblocking_output.valid)
291         OUT_BCS_RELOC(batch, gen6_mfd_context->pre_deblocking_output.bo,
292                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
293                       0);
294     else
295         OUT_BCS_BATCH(batch, 0);
296
297     if (gen6_mfd_context->post_deblocking_output.valid)
298         OUT_BCS_RELOC(batch, gen6_mfd_context->post_deblocking_output.bo,
299                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
300                       0);
301     else
302         OUT_BCS_BATCH(batch, 0);
303
304     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
305     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
306
307     if (gen6_mfd_context->intra_row_store_scratch_buffer.valid)
308         OUT_BCS_RELOC(batch, gen6_mfd_context->intra_row_store_scratch_buffer.bo,
309                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
310                       0);
311     else
312         OUT_BCS_BATCH(batch, 0);
313
314     if (gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
315         OUT_BCS_RELOC(batch, gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
316                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
317                       0);
318     else
319         OUT_BCS_BATCH(batch, 0);
320
321     /* DW 7..22 */
322     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
323         struct object_surface *obj_surface;
324
325         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
326             obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
327             assert(obj_surface && obj_surface->bo);
328
329             OUT_BCS_RELOC(batch, obj_surface->bo,
330                           I915_GEM_DOMAIN_INSTRUCTION, 0,
331                           0);
332         } else {
333             OUT_BCS_BATCH(batch, 0);
334         }
335     }
336
337     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
338     ADVANCE_BCS_BATCH(batch);
339 }
340
341 static void
342 gen6_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
343                                  dri_bo *slice_data_bo,
344                                  int standard_select,
345                                  struct gen6_mfd_context *gen6_mfd_context)
346 {
347     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
348
349     BEGIN_BCS_BATCH(batch, 11);
350     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
351     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
352     OUT_BCS_BATCH(batch, 0);
353     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
354     OUT_BCS_BATCH(batch, 0);
355     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
356     OUT_BCS_BATCH(batch, 0);
357     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
358     OUT_BCS_BATCH(batch, 0);
359     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
360     OUT_BCS_BATCH(batch, 0);
361     ADVANCE_BCS_BATCH(batch);
362 }
363
364 static void
365 gen6_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
366                                  struct decode_state *decode_state,
367                                  int standard_select,
368                                  struct gen6_mfd_context *gen6_mfd_context)
369 {
370     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
371
372     BEGIN_BCS_BATCH(batch, 4);
373     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
374
375     if (gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
376         OUT_BCS_RELOC(batch, gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
377                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
378                       0);
379     else
380         OUT_BCS_BATCH(batch, 0);
381
382     if (gen6_mfd_context->mpr_row_store_scratch_buffer.valid)
383         OUT_BCS_RELOC(batch, gen6_mfd_context->mpr_row_store_scratch_buffer.bo,
384                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
385                       0);
386     else
387         OUT_BCS_BATCH(batch, 0);
388
389     if (gen6_mfd_context->bitplane_read_buffer.valid)
390         OUT_BCS_RELOC(batch, gen6_mfd_context->bitplane_read_buffer.bo,
391                       I915_GEM_DOMAIN_INSTRUCTION, 0,
392                       0);
393     else
394         OUT_BCS_BATCH(batch, 0);
395
396     ADVANCE_BCS_BATCH(batch);
397 }
398
399 #if 0
400 static void
401 gen6_mfd_aes_state(VADriverContextP ctx,
402                    struct decode_state *decode_state,
403                    int standard_select)
404 {
405     /* FIXME */
406 }
407
408 static void
409 gen6_mfd_wait(VADriverContextP ctx,
410               struct decode_state *decode_state,
411               int standard_select,
412               struct gen6_mfd_context *gen6_mfd_context)
413 {
414     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
415
416     BEGIN_BCS_BATCH(batch, 1);
417     OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8));
418     ADVANCE_BCS_BATCH(batch);
419 }
420 #endif
421
422 static void
423 gen6_mfd_avc_img_state(VADriverContextP ctx,
424                        struct decode_state *decode_state,
425                        struct gen6_mfd_context *gen6_mfd_context)
426 {
427     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
428     int qm_present_flag;
429     int img_struct;
430     int mbaff_frame_flag;
431     unsigned int width_in_mbs, height_in_mbs;
432     VAPictureParameterBufferH264 *pic_param;
433
434     assert(decode_state->pic_param && decode_state->pic_param->buffer);
435     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
436     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
437
438     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
439         qm_present_flag = 1;
440     else
441         qm_present_flag = 0; /* built-in QM matrices */
442
443     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
444         img_struct = 1;
445     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
446         img_struct = 3;
447     else
448         img_struct = 0;
449
450     if ((img_struct & 0x1) == 0x1) {
451         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
452     } else {
453         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
454     }
455
456     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
457         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
458         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
459     } else {
460         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
461     }
462
463     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
464                         !pic_param->pic_fields.bits.field_pic_flag);
465
466     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
467     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
468     assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */
469
470     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
471     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
472            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
473     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
474
475     BEGIN_BCS_BATCH(batch, 13);
476     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
477     OUT_BCS_BATCH(batch, 
478                   ((width_in_mbs * height_in_mbs) & 0x7fff));
479     OUT_BCS_BATCH(batch, 
480                   (height_in_mbs << 16) | 
481                   (width_in_mbs << 0));
482     OUT_BCS_BATCH(batch, 
483                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
484                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
485                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
486                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
487                   (1 << 12) | /* always 1, hardware requirement */
488                   (qm_present_flag << 10) |
489                   (img_struct << 8) |
490                   (16 << 0));
491     OUT_BCS_BATCH(batch,
492                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
493                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
494                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
495                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
496                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
497                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
498                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
499                   (mbaff_frame_flag << 1) |
500                   (pic_param->pic_fields.bits.field_pic_flag << 0));
501     OUT_BCS_BATCH(batch, 0);
502     OUT_BCS_BATCH(batch, 0);
503     OUT_BCS_BATCH(batch, 0);
504     OUT_BCS_BATCH(batch, 0);
505     OUT_BCS_BATCH(batch, 0);
506     OUT_BCS_BATCH(batch, 0);
507     OUT_BCS_BATCH(batch, 0);
508     OUT_BCS_BATCH(batch, 0);
509     ADVANCE_BCS_BATCH(batch);
510 }
511
512 static void
513 gen6_mfd_avc_qm_state(VADriverContextP ctx,
514                       struct decode_state *decode_state,
515                       struct gen6_mfd_context *gen6_mfd_context)
516 {
517     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
518     int cmd_len;
519     VAIQMatrixBufferH264 *iq_matrix;
520     VAPictureParameterBufferH264 *pic_param;
521
522     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
523         return;
524
525     iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
526
527     assert(decode_state->pic_param && decode_state->pic_param->buffer);
528     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
529
530     cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */
531
532     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
533         cmd_len += 2 * 16; /* load two 8x8 scaling matrices */
534
535     BEGIN_BCS_BATCH(batch, cmd_len);
536     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | (cmd_len - 2));
537
538     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
539         OUT_BCS_BATCH(batch, 
540                       (0x0  << 8) | /* don't use default built-in matrices */
541                       (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */
542     else
543         OUT_BCS_BATCH(batch, 
544                       (0x0  << 8) | /* don't use default built-in matrices */
545                       (0x3f << 0)); /* six 4x4 scaling matrices */
546
547     intel_batchbuffer_data(batch, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4);
548
549     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
550         intel_batchbuffer_data(batch, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4);
551
552     ADVANCE_BCS_BATCH(batch);
553 }
554
555 static void
556 gen6_mfd_avc_directmode_state(VADriverContextP ctx,
557                               VAPictureParameterBufferH264 *pic_param,
558                               VASliceParameterBufferH264 *slice_param,
559                               struct gen6_mfd_context *gen6_mfd_context)
560 {
561     struct i965_driver_data *i965 = i965_driver_data(ctx);
562     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
563     struct object_surface *obj_surface;
564     GenAvcSurface *gen6_avc_surface;
565     VAPictureH264 *va_pic;
566     int i, j;
567
568     BEGIN_BCS_BATCH(batch, 69);
569     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
570
571     /* reference surfaces 0..15 */
572     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
573         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
574             obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
575             assert(obj_surface);
576             gen6_avc_surface = obj_surface->private_data;
577
578             if (gen6_avc_surface == NULL) {
579                 OUT_BCS_BATCH(batch, 0);
580                 OUT_BCS_BATCH(batch, 0);
581             } else {
582                 OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
583                               I915_GEM_DOMAIN_INSTRUCTION, 0,
584                               0);
585
586                 if (gen6_avc_surface->dmv_bottom_flag == 1)
587                     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
588                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
589                                   0);
590                 else
591                     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
592                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
593                                   0);
594             }
595         } else {
596             OUT_BCS_BATCH(batch, 0);
597             OUT_BCS_BATCH(batch, 0);
598         }
599     }
600
601     /* the current decoding frame/field */
602     va_pic = &pic_param->CurrPic;
603     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
604     obj_surface = SURFACE(va_pic->picture_id);
605     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
606     gen6_avc_surface = obj_surface->private_data;
607
608     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
609                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
610                   0);
611
612     if (gen6_avc_surface->dmv_bottom_flag == 1)
613         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
614                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
615                       0);
616     else
617         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
618                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
619                       0);
620
621     /* POC List */
622     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
623         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
624             int found = 0;
625             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
626                 va_pic = &pic_param->ReferenceFrames[j];
627                 
628                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
629                     continue;
630
631                 if (va_pic->picture_id == gen6_mfd_context->reference_surface[i].surface_id) {
632                     found = 1;
633                     break;
634                 }
635             }
636
637             assert(found == 1);
638             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
639             
640             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
641             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
642         } else {
643             OUT_BCS_BATCH(batch, 0);
644             OUT_BCS_BATCH(batch, 0);
645         }
646     }
647
648     va_pic = &pic_param->CurrPic;
649     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
650     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
651
652     ADVANCE_BCS_BATCH(batch);
653 }
654
655 static void
656 gen6_mfd_avc_slice_state(VADriverContextP ctx,
657                          VAPictureParameterBufferH264 *pic_param,
658                          VASliceParameterBufferH264 *slice_param,
659                          VASliceParameterBufferH264 *next_slice_param,
660                          struct gen6_mfd_context *gen6_mfd_context)
661 {
662     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
663     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
664     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
665     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
666     int num_ref_idx_l0, num_ref_idx_l1;
667     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
668                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
669     int weighted_pred_idc = 0;
670     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
671     int slice_type;
672
673     if (slice_param->slice_type == SLICE_TYPE_I ||
674         slice_param->slice_type == SLICE_TYPE_SI) {
675         slice_type = SLICE_TYPE_I;
676     } else if (slice_param->slice_type == SLICE_TYPE_P ||
677                slice_param->slice_type == SLICE_TYPE_SP) {
678         slice_type = SLICE_TYPE_P;
679     } else { 
680         assert(slice_param->slice_type == SLICE_TYPE_B);
681         slice_type = SLICE_TYPE_B;
682     }
683
684     if (slice_type == SLICE_TYPE_I) {
685         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
686         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
687         num_ref_idx_l0 = 0;
688         num_ref_idx_l1 = 0;
689     } else if (slice_type == SLICE_TYPE_P) {
690         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
691         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
692         num_ref_idx_l1 = 0;
693         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_pred_flag == 1);
694     } else {
695         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
696         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
697         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_bipred_idc == 1);
698     }
699
700     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
701     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
702     slice_ver_pos = first_mb_in_slice / width_in_mbs;
703
704     if (next_slice_param) {
705         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
706         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
707         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
708     } else {
709         next_slice_hor_pos = 0;
710         next_slice_ver_pos = height_in_mbs;
711     }
712
713     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
714     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
715     OUT_BCS_BATCH(batch, slice_type);
716     OUT_BCS_BATCH(batch, 
717                   (num_ref_idx_l1 << 24) |
718                   (num_ref_idx_l0 << 16) |
719                   (slice_param->chroma_log2_weight_denom << 8) |
720                   (slice_param->luma_log2_weight_denom << 0));
721     OUT_BCS_BATCH(batch, 
722                   (weighted_pred_idc << 30) |
723                   (slice_param->direct_spatial_mv_pred_flag << 29) |
724                   (slice_param->disable_deblocking_filter_idc << 27) |
725                   (slice_param->cabac_init_idc << 24) |
726                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
727                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
728                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
729     OUT_BCS_BATCH(batch, 
730                   (slice_ver_pos << 24) |
731                   (slice_hor_pos << 16) | 
732                   (first_mb_in_slice << 0));
733     OUT_BCS_BATCH(batch,
734                   (next_slice_ver_pos << 16) |
735                   (next_slice_hor_pos << 0));
736     OUT_BCS_BATCH(batch, 
737                   (next_slice_param == NULL) << 19); /* last slice flag */
738     OUT_BCS_BATCH(batch, 0);
739     OUT_BCS_BATCH(batch, 0);
740     OUT_BCS_BATCH(batch, 0);
741     OUT_BCS_BATCH(batch, 0);
742     ADVANCE_BCS_BATCH(batch);
743 }
744
745 static void
746 gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx,
747                                  VAPictureParameterBufferH264 *pic_param,
748                                  struct gen6_mfd_context *gen6_mfd_context)
749 {
750     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
751     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
752     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
753
754     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
755     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
756     OUT_BCS_BATCH(batch, 0);
757     OUT_BCS_BATCH(batch, 0);
758     OUT_BCS_BATCH(batch, 0);
759     OUT_BCS_BATCH(batch,
760                   height_in_mbs << 24 |
761                   width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
762     OUT_BCS_BATCH(batch, 0);
763     OUT_BCS_BATCH(batch, 0);
764     OUT_BCS_BATCH(batch, 0);
765     OUT_BCS_BATCH(batch, 0);
766     OUT_BCS_BATCH(batch, 0);
767     OUT_BCS_BATCH(batch, 0);
768     ADVANCE_BCS_BATCH(batch);
769 }
770
771 static inline void
772 gen6_mfd_avc_ref_idx_state(VADriverContextP ctx,
773                            VAPictureParameterBufferH264 *pic_param,
774                            VASliceParameterBufferH264 *slice_param,
775                            struct gen6_mfd_context *gen6_mfd_context)
776 {
777     gen6_send_avc_ref_idx_state(
778         gen6_mfd_context->base.batch,
779         slice_param,
780         gen6_mfd_context->reference_surface
781     );
782 }
783
784 static void
785 gen6_mfd_avc_weightoffset_state(VADriverContextP ctx,
786                                 VAPictureParameterBufferH264 *pic_param,
787                                 VASliceParameterBufferH264 *slice_param,
788                                 struct gen6_mfd_context *gen6_mfd_context)
789 {
790     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
791     int i, j, num_weight_offset_table = 0;
792     short weightoffsets[32 * 6];
793
794     if ((slice_param->slice_type == SLICE_TYPE_P ||
795          slice_param->slice_type == SLICE_TYPE_SP) &&
796         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
797         num_weight_offset_table = 1;
798     }
799     
800     if ((slice_param->slice_type == SLICE_TYPE_B) &&
801         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
802         num_weight_offset_table = 2;
803     }
804
805     for (i = 0; i < num_weight_offset_table; i++) {
806         BEGIN_BCS_BATCH(batch, 98);
807         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
808         OUT_BCS_BATCH(batch, i);
809
810         if (i == 0) {
811             for (j = 0; j < 32; j++) {
812                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
813                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
814                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
815                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
816                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
817                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
818             }
819         } else {
820             for (j = 0; j < 32; j++) {
821                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
822                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
823                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
824                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
825                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
826                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
827             }
828         }
829
830         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
831         ADVANCE_BCS_BATCH(batch);
832     }
833 }
834
835 static int
836 gen6_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
837 {
838     int out_slice_data_bit_offset;
839     int slice_header_size = in_slice_data_bit_offset / 8;
840     int i, j;
841
842     for (i = 0, j = 0; i < slice_header_size; i++, j++) {
843         if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
844             i++, j += 2;
845         }
846     }
847
848     out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
849
850     if (mode_flag == ENTROPY_CABAC)
851         out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
852
853     return out_slice_data_bit_offset;
854 }
855
856 static void
857 gen6_mfd_avc_bsd_object(VADriverContextP ctx,
858                         VAPictureParameterBufferH264 *pic_param,
859                         VASliceParameterBufferH264 *slice_param,
860                         dri_bo *slice_data_bo,
861                         struct gen6_mfd_context *gen6_mfd_context)
862 {
863     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
864     int slice_data_bit_offset;
865     uint8_t *slice_data = NULL;
866
867     dri_bo_map(slice_data_bo, 0);
868     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
869     slice_data_bit_offset = gen6_mfd_avc_get_slice_bit_offset(slice_data,
870                                                               pic_param->pic_fields.bits.entropy_coding_mode_flag,
871                                                               slice_param->slice_data_bit_offset);
872     dri_bo_unmap(slice_data_bo);
873
874     BEGIN_BCS_BATCH(batch, 6);
875     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
876     OUT_BCS_BATCH(batch, 
877                   ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
878     OUT_BCS_BATCH(batch, slice_param->slice_data_offset + (slice_data_bit_offset >> 3));
879     OUT_BCS_BATCH(batch,
880                   (0 << 31) |
881                   (0 << 14) |
882                   (0 << 12) |
883                   (0 << 10) |
884                   (0 << 8));
885     OUT_BCS_BATCH(batch,
886                   (0 << 16) |
887                   (0 << 6)  |
888                   ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
889     OUT_BCS_BATCH(batch, 0);
890     ADVANCE_BCS_BATCH(batch);
891 }
892
893 static void
894 gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx,
895                                       VAPictureParameterBufferH264 *pic_param,
896                                       struct gen6_mfd_context *gen6_mfd_context)
897 {
898     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
899
900     BEGIN_BCS_BATCH(batch, 6);
901     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
902     OUT_BCS_BATCH(batch, 0);
903     OUT_BCS_BATCH(batch, 0);
904     OUT_BCS_BATCH(batch, 0);
905     OUT_BCS_BATCH(batch, 0);
906     OUT_BCS_BATCH(batch, 0);
907     ADVANCE_BCS_BATCH(batch);
908 }
909
910 static void
911 gen6_mfd_avc_phantom_slice(VADriverContextP ctx,
912                            VAPictureParameterBufferH264 *pic_param,
913                            struct gen6_mfd_context *gen6_mfd_context)
914 {
915     gen6_mfd_avc_phantom_slice_state(ctx, pic_param, gen6_mfd_context);
916     gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param, gen6_mfd_context);
917 }
918
919 static void
920 gen6_mfd_avc_decode_init(VADriverContextP ctx,
921                          struct decode_state *decode_state,
922                          struct gen6_mfd_context *gen6_mfd_context)
923 {
924     VAPictureParameterBufferH264 *pic_param;
925     VASliceParameterBufferH264 *slice_param;
926     VAPictureH264 *va_pic;
927     struct i965_driver_data *i965 = i965_driver_data(ctx);
928     struct object_surface *obj_surface;
929     dri_bo *bo;
930     int i, j, enable_avc_ildb = 0;
931     int width_in_mbs;
932
933     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
934         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
935         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
936
937         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
938             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
939             assert((slice_param->slice_type == SLICE_TYPE_I) ||
940                    (slice_param->slice_type == SLICE_TYPE_SI) ||
941                    (slice_param->slice_type == SLICE_TYPE_P) ||
942                    (slice_param->slice_type == SLICE_TYPE_SP) ||
943                    (slice_param->slice_type == SLICE_TYPE_B));
944
945             if (slice_param->disable_deblocking_filter_idc != 1) {
946                 enable_avc_ildb = 1;
947                 break;
948             }
949
950             slice_param++;
951         }
952     }
953
954     assert(decode_state->pic_param && decode_state->pic_param->buffer);
955     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
956     gen6_mfd_avc_frame_store_index(ctx, pic_param, gen6_mfd_context);
957     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
958
959     /* Current decoded picture */
960     va_pic = &pic_param->CurrPic;
961     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
962     obj_surface = SURFACE(va_pic->picture_id);
963     assert(obj_surface);
964     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
965     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
966     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
967
968     /* initial uv component for YUV400 case */
969     if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
970          unsigned int uv_offset = obj_surface->width * obj_surface->height; 
971          unsigned int uv_size   = obj_surface->width * obj_surface->height / 2; 
972
973          drm_intel_gem_bo_map_gtt(obj_surface->bo);
974          memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
975          drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
976     }
977
978     gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
979
980     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
981     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
982     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
983     gen6_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
984
985     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
986     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
987     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
988     gen6_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
989
990     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
991     bo = dri_bo_alloc(i965->intel.bufmgr,
992                       "intra row store",
993                       width_in_mbs * 64,
994                       0x1000);
995     assert(bo);
996     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
997     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
998
999     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1000     bo = dri_bo_alloc(i965->intel.bufmgr,
1001                       "deblocking filter row store",
1002                       width_in_mbs * 64 * 4,
1003                       0x1000);
1004     assert(bo);
1005     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1006     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1007
1008     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1009     bo = dri_bo_alloc(i965->intel.bufmgr,
1010                       "bsd mpc row store",
1011                       width_in_mbs * 96,
1012                       0x1000);
1013     assert(bo);
1014     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1015     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1016
1017     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
1018     bo = dri_bo_alloc(i965->intel.bufmgr,
1019                       "mpr row store",
1020                       width_in_mbs * 64,
1021                       0x1000);
1022     assert(bo);
1023     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
1024     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
1025
1026     gen6_mfd_context->bitplane_read_buffer.valid = 0;
1027 }
1028
1029 static void
1030 gen6_mfd_avc_decode_picture(VADriverContextP ctx,
1031                             struct decode_state *decode_state,
1032                             struct gen6_mfd_context *gen6_mfd_context)
1033 {
1034     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1035     VAPictureParameterBufferH264 *pic_param;
1036     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
1037     dri_bo *slice_data_bo;
1038     int i, j;
1039
1040     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1041     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1042     gen6_mfd_avc_decode_init(ctx, decode_state, gen6_mfd_context);
1043
1044     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1045     intel_batchbuffer_emit_mi_flush(batch);
1046     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1047     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1048     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1049     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1050     gen6_mfd_avc_img_state(ctx, decode_state, gen6_mfd_context);
1051     gen6_mfd_avc_qm_state(ctx, decode_state, gen6_mfd_context);
1052
1053     for (j = 0; j < decode_state->num_slice_params; j++) {
1054         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1055         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1056         slice_data_bo = decode_state->slice_datas[j]->bo;
1057         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen6_mfd_context);
1058
1059         if (j == decode_state->num_slice_params - 1)
1060             next_slice_group_param = NULL;
1061         else
1062             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1063
1064         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1065             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1066             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1067                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1068                    (slice_param->slice_type == SLICE_TYPE_P) ||
1069                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1070                    (slice_param->slice_type == SLICE_TYPE_B));
1071
1072             if (i < decode_state->slice_params[j]->num_elements - 1)
1073                 next_slice_param = slice_param + 1;
1074             else
1075                 next_slice_param = next_slice_group_param;
1076
1077             gen6_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen6_mfd_context);
1078             gen6_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
1079             gen6_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen6_mfd_context);
1080             gen6_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen6_mfd_context);
1081             gen6_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen6_mfd_context);
1082             slice_param++;
1083         }
1084     }
1085     
1086     gen6_mfd_avc_phantom_slice(ctx, pic_param, gen6_mfd_context);
1087     intel_batchbuffer_end_atomic(batch);
1088     intel_batchbuffer_flush(batch);
1089 }
1090
1091 static void
1092 gen6_mfd_mpeg2_decode_init(VADriverContextP ctx,
1093                            struct decode_state *decode_state,
1094                            struct gen6_mfd_context *gen6_mfd_context)
1095 {
1096     VAPictureParameterBufferMPEG2 *pic_param;
1097     struct i965_driver_data *i965 = i965_driver_data(ctx);
1098     struct object_surface *obj_surface;
1099     dri_bo *bo;
1100     unsigned int width_in_mbs;
1101
1102     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1103     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1104     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1105
1106     mpeg2_set_reference_surfaces(
1107         ctx,
1108         gen6_mfd_context->reference_surface,
1109         decode_state,
1110         pic_param
1111     );
1112
1113     /* Current decoded picture */
1114     obj_surface = SURFACE(decode_state->current_render_target);
1115     assert(obj_surface);
1116     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1117
1118     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1119     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1120     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1121     gen6_mfd_context->pre_deblocking_output.valid = 1;
1122
1123     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1124     bo = dri_bo_alloc(i965->intel.bufmgr,
1125                       "bsd mpc row store",
1126                       width_in_mbs * 96,
1127                       0x1000);
1128     assert(bo);
1129     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1130     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1131
1132     gen6_mfd_context->post_deblocking_output.valid = 0;
1133     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1134     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1135     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1136     gen6_mfd_context->bitplane_read_buffer.valid = 0;
1137 }
1138
1139 static void
1140 gen6_mfd_mpeg2_pic_state(VADriverContextP ctx,
1141                          struct decode_state *decode_state,
1142                          struct gen6_mfd_context *gen6_mfd_context)
1143 {
1144     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1145     VAPictureParameterBufferMPEG2 *pic_param;
1146     unsigned int tff, pic_structure;
1147
1148     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1149     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1150
1151     pic_structure = pic_param->picture_coding_extension.bits.picture_structure;
1152     if (pic_structure == MPEG_FRAME)
1153         tff = pic_param->picture_coding_extension.bits.top_field_first;
1154     else
1155         tff = !(pic_param->picture_coding_extension.bits.is_first_field ^
1156                 (pic_structure & MPEG_TOP_FIELD));
1157
1158     BEGIN_BCS_BATCH(batch, 4);
1159     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (4 - 2));
1160     OUT_BCS_BATCH(batch,
1161                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1162                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1163                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1164                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1165                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1166                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1167                   tff << 11 |
1168                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1169                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1170                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1171                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1172                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1173     OUT_BCS_BATCH(batch,
1174                   pic_param->picture_coding_type << 9);
1175     OUT_BCS_BATCH(batch,
1176                   (ALIGN(pic_param->vertical_size, 16) / 16) << 16 |
1177                   (ALIGN(pic_param->horizontal_size, 16) / 16));
1178     ADVANCE_BCS_BATCH(batch);
1179 }
1180
1181 static void
1182 gen6_mfd_mpeg2_qm_state(VADriverContextP ctx,
1183                         struct decode_state *decode_state,
1184                         struct gen6_mfd_context *gen6_mfd_context)
1185 {
1186     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1187     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen6_mfd_context->iq_matrix.mpeg2;
1188     int i, j;
1189
1190     /* Update internal QM state */
1191     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1192         VAIQMatrixBufferMPEG2 * const iq_matrix =
1193             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1194
1195         gen_iq_matrix->load_intra_quantiser_matrix =
1196             iq_matrix->load_intra_quantiser_matrix;
1197         if (iq_matrix->load_intra_quantiser_matrix) {
1198             for (j = 0; j < 64; j++)
1199                 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1200                     iq_matrix->intra_quantiser_matrix[j];
1201         }
1202
1203         gen_iq_matrix->load_non_intra_quantiser_matrix =
1204             iq_matrix->load_non_intra_quantiser_matrix;
1205         if (iq_matrix->load_non_intra_quantiser_matrix) {
1206             for (j = 0; j < 64; j++)
1207                 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1208                     iq_matrix->non_intra_quantiser_matrix[j];
1209         }
1210     }
1211
1212     /* Commit QM state to HW */
1213     for (i = 0; i < 2; i++) {
1214         unsigned char *qm = NULL;
1215
1216         if (i == 0) {
1217             if (gen_iq_matrix->load_intra_quantiser_matrix)
1218                 qm = gen_iq_matrix->intra_quantiser_matrix;
1219         } else {
1220             if (gen_iq_matrix->load_non_intra_quantiser_matrix)
1221                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1222         }
1223
1224         if (!qm)
1225             continue;
1226
1227         BEGIN_BCS_BATCH(batch, 18);
1228         OUT_BCS_BATCH(batch, MFX_MPEG2_QM_STATE | (18 - 2));
1229         OUT_BCS_BATCH(batch, i);
1230         intel_batchbuffer_data(batch, qm, 64);
1231         ADVANCE_BCS_BATCH(batch);
1232     }
1233 }
1234
1235 static void
1236 gen6_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1237                           VAPictureParameterBufferMPEG2 *pic_param,
1238                           VASliceParameterBufferMPEG2 *slice_param,
1239                           VASliceParameterBufferMPEG2 *next_slice_param,
1240                           struct gen6_mfd_context *gen6_mfd_context)
1241 {
1242     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1243     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1244     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1245
1246     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1247         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1248         is_field_pic = 1;
1249     is_field_pic_wa = is_field_pic &&
1250         gen6_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1251
1252     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1253     hpos0 = slice_param->slice_horizontal_position;
1254
1255     if (next_slice_param == NULL) {
1256         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1257         hpos1 = 0;
1258     } else {
1259         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1260         hpos1 = next_slice_param->slice_horizontal_position;
1261     }
1262
1263     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1264
1265     BEGIN_BCS_BATCH(batch, 5);
1266     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1267     OUT_BCS_BATCH(batch, 
1268                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1269     OUT_BCS_BATCH(batch, 
1270                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1271     OUT_BCS_BATCH(batch,
1272                   hpos0 << 24 |
1273                   vpos0 << 16 |
1274                   mb_count << 8 |
1275                   (next_slice_param == NULL) << 5 |
1276                   (next_slice_param == NULL) << 3 |
1277                   (slice_param->macroblock_offset & 0x7));
1278     OUT_BCS_BATCH(batch,
1279                   slice_param->quantiser_scale_code << 24);
1280     ADVANCE_BCS_BATCH(batch);
1281 }
1282
1283 static void
1284 gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1285                               struct decode_state *decode_state,
1286                               struct gen6_mfd_context *gen6_mfd_context)
1287 {
1288     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1289     VAPictureParameterBufferMPEG2 *pic_param;
1290     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1291     dri_bo *slice_data_bo;
1292     int i, j;
1293
1294     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1295     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1296
1297     gen6_mfd_mpeg2_decode_init(ctx, decode_state, gen6_mfd_context);
1298     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1299     intel_batchbuffer_emit_mi_flush(batch);
1300     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1301     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1302     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1303     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1304     gen6_mfd_mpeg2_pic_state(ctx, decode_state, gen6_mfd_context);
1305     gen6_mfd_mpeg2_qm_state(ctx, decode_state, gen6_mfd_context);
1306
1307     if (gen6_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1308         gen6_mfd_context->wa_mpeg2_slice_vertical_position =
1309             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1310
1311     for (j = 0; j < decode_state->num_slice_params; j++) {
1312         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1313         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1314         slice_data_bo = decode_state->slice_datas[j]->bo;
1315         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen6_mfd_context);
1316
1317         if (j == decode_state->num_slice_params - 1)
1318             next_slice_group_param = NULL;
1319         else
1320             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1321
1322         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1323             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1324
1325             if (i < decode_state->slice_params[j]->num_elements - 1)
1326                 next_slice_param = slice_param + 1;
1327             else
1328                 next_slice_param = next_slice_group_param;
1329
1330             gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
1331             slice_param++;
1332         }
1333     }
1334
1335     intel_batchbuffer_end_atomic(batch);
1336     intel_batchbuffer_flush(batch);
1337 }
1338
1339 static const int va_to_gen6_vc1_pic_type[5] = {
1340     GEN6_VC1_I_PICTURE,
1341     GEN6_VC1_P_PICTURE,
1342     GEN6_VC1_B_PICTURE,
1343     GEN6_VC1_BI_PICTURE,
1344     GEN6_VC1_P_PICTURE,
1345 };
1346
1347 static const int va_to_gen6_vc1_mv[4] = {
1348     1, /* 1-MV */
1349     2, /* 1-MV half-pel */
1350     3, /* 1-MV half-pef bilinear */
1351     0, /* Mixed MV */
1352 };
1353
1354 static const int b_picture_scale_factor[21] = {
1355     128, 85,  170, 64,  192,
1356     51,  102, 153, 204, 43,
1357     215, 37,  74,  111, 148,
1358     185, 222, 32,  96,  160, 
1359     224,
1360 };
1361
1362 static const int va_to_gen6_vc1_condover[3] = {
1363     0,
1364     2,
1365     3
1366 };
1367
1368 static const int va_to_gen6_vc1_profile[4] = {
1369     GEN6_VC1_SIMPLE_PROFILE,
1370     GEN6_VC1_MAIN_PROFILE,
1371     GEN6_VC1_RESERVED_PROFILE,
1372     GEN6_VC1_ADVANCED_PROFILE
1373 };
1374
1375 static void 
1376 gen6_mfd_free_vc1_surface(void **data)
1377 {
1378     struct gen6_vc1_surface *gen6_vc1_surface = *data;
1379
1380     if (!gen6_vc1_surface)
1381         return;
1382
1383     dri_bo_unreference(gen6_vc1_surface->dmv);
1384     free(gen6_vc1_surface);
1385     *data = NULL;
1386 }
1387
1388 static void
1389 gen6_mfd_init_vc1_surface(VADriverContextP ctx, 
1390                           VAPictureParameterBufferVC1 *pic_param,
1391                           struct object_surface *obj_surface)
1392 {
1393     struct i965_driver_data *i965 = i965_driver_data(ctx);
1394     struct gen6_vc1_surface *gen6_vc1_surface = obj_surface->private_data;
1395     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1396
1397     obj_surface->free_private_data = gen6_mfd_free_vc1_surface;
1398
1399     if (!gen6_vc1_surface) {
1400         gen6_vc1_surface = calloc(sizeof(struct gen6_vc1_surface), 1);
1401         assert((obj_surface->size & 0x3f) == 0);
1402         obj_surface->private_data = gen6_vc1_surface;
1403     }
1404
1405     gen6_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1406
1407     if (gen6_vc1_surface->dmv == NULL) {
1408         gen6_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1409                                              "direct mv w/r buffer",
1410                                              128 * height_in_mbs * 64,  /* scalable with frame height */
1411                                              0x1000);
1412     }
1413 }
1414
1415 static void
1416 gen6_mfd_vc1_decode_init(VADriverContextP ctx,
1417                          struct decode_state *decode_state,
1418                          struct gen6_mfd_context *gen6_mfd_context)
1419 {
1420     VAPictureParameterBufferVC1 *pic_param;
1421     struct i965_driver_data *i965 = i965_driver_data(ctx);
1422     struct object_surface *obj_surface;
1423     int i;
1424     dri_bo *bo;
1425     int width_in_mbs;
1426     int picture_type;
1427
1428     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1429     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1430     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1431     picture_type = pic_param->picture_fields.bits.picture_type;
1432
1433     /* reference picture */
1434     obj_surface = SURFACE(pic_param->forward_reference_picture);
1435
1436     if (obj_surface && obj_surface->bo)
1437         gen6_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1438     else
1439         gen6_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1440
1441     obj_surface = SURFACE(pic_param->backward_reference_picture);
1442
1443     if (obj_surface && obj_surface->bo)
1444         gen6_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1445     else
1446         gen6_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1447
1448     /* must do so !!! */
1449     for (i = 2; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++)
1450         gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[i % 2].surface_id;
1451
1452     /* Current decoded picture */
1453     obj_surface = SURFACE(decode_state->current_render_target);
1454     assert(obj_surface);
1455     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1456     gen6_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1457
1458     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
1459     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1460     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
1461     gen6_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1462
1463     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1464     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1465     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1466     gen6_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1467
1468     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
1469     bo = dri_bo_alloc(i965->intel.bufmgr,
1470                       "intra row store",
1471                       width_in_mbs * 64,
1472                       0x1000);
1473     assert(bo);
1474     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1475     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1476
1477     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1478     bo = dri_bo_alloc(i965->intel.bufmgr,
1479                       "deblocking filter row store",
1480                       width_in_mbs * 6 * 64,
1481                       0x1000);
1482     assert(bo);
1483     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1484     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1485
1486     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1487     bo = dri_bo_alloc(i965->intel.bufmgr,
1488                       "bsd mpc row store",
1489                       width_in_mbs * 96,
1490                       0x1000);
1491     assert(bo);
1492     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1493     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1494
1495     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1496
1497     gen6_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1498     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
1499     
1500     if (gen6_mfd_context->bitplane_read_buffer.valid) {
1501         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1502         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1503         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1504         int src_w, src_h;
1505         uint8_t *src = NULL, *dst = NULL;
1506
1507         assert(decode_state->bit_plane->buffer);
1508         src = decode_state->bit_plane->buffer;
1509
1510         bo = dri_bo_alloc(i965->intel.bufmgr,
1511                           "VC-1 Bitplane",
1512                           bitplane_width * height_in_mbs,
1513                           0x1000);
1514         assert(bo);
1515         gen6_mfd_context->bitplane_read_buffer.bo = bo;
1516
1517         dri_bo_map(bo, True);
1518         assert(bo->virtual);
1519         dst = bo->virtual;
1520
1521         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1522             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1523                 int src_index, dst_index;
1524                 int src_shift;
1525                 uint8_t src_value;
1526
1527                 src_index = (src_h * width_in_mbs + src_w) / 2;
1528                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1529                 src_value = ((src[src_index] >> src_shift) & 0xf);
1530
1531                 if (picture_type == GEN6_VC1_SKIPPED_PICTURE){
1532                     src_value |= 0x2;
1533                 }
1534
1535                 dst_index = src_w / 2;
1536                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1537             }
1538
1539             if (src_w & 1)
1540                 dst[src_w / 2] >>= 4;
1541
1542             dst += bitplane_width;
1543         }
1544
1545         dri_bo_unmap(bo);
1546     } else
1547         gen6_mfd_context->bitplane_read_buffer.bo = NULL;
1548 }
1549
1550 static void
1551 gen6_mfd_vc1_pic_state(VADriverContextP ctx,
1552                        struct decode_state *decode_state,
1553                        struct gen6_mfd_context *gen6_mfd_context)
1554 {
1555     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1556     VAPictureParameterBufferVC1 *pic_param;
1557     struct i965_driver_data *i965 = i965_driver_data(ctx);
1558     struct object_surface *obj_surface;
1559     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1560     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1561     int unified_mv_mode;
1562     int ref_field_pic_polarity = 0;
1563     int scale_factor = 0;
1564     int trans_ac_y = 0;
1565     int dmv_surface_valid = 0;
1566     int brfd = 0;
1567     int fcm = 0;
1568     int picture_type;
1569     int profile;
1570     int overlap;
1571
1572     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1573     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1574
1575     profile = va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile];
1576     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1577     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1578     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1579     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1580     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1581     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1582     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1583
1584     if (dquant == 0) {
1585         alt_pquant_config = 0;
1586         alt_pquant_edge_mask = 0;
1587     } else if (dquant == 2) {
1588         alt_pquant_config = 1;
1589         alt_pquant_edge_mask = 0xf;
1590     } else {
1591         assert(dquant == 1);
1592         if (dquantfrm == 0) {
1593             alt_pquant_config = 0;
1594             alt_pquant_edge_mask = 0;
1595             alt_pq = 0;
1596         } else {
1597             assert(dquantfrm == 1);
1598             alt_pquant_config = 1;
1599
1600             switch (dqprofile) {
1601             case 3:
1602                 if (dqbilevel == 0) {
1603                     alt_pquant_config = 2;
1604                     alt_pquant_edge_mask = 0;
1605                 } else {
1606                     assert(dqbilevel == 1);
1607                     alt_pquant_config = 3;
1608                     alt_pquant_edge_mask = 0;
1609                 }
1610                 break;
1611                 
1612             case 0:
1613                 alt_pquant_edge_mask = 0xf;
1614                 break;
1615
1616             case 1:
1617                 if (dqdbedge == 3)
1618                     alt_pquant_edge_mask = 0x9;
1619                 else
1620                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1621
1622                 break;
1623
1624             case 2:
1625                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1626                 break;
1627
1628             default:
1629                 assert(0);
1630             }
1631         }
1632     }
1633
1634     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1635         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1636         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1637     } else {
1638         assert(pic_param->mv_fields.bits.mv_mode < 4);
1639         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1640     }
1641
1642     if (pic_param->sequence_fields.bits.interlace == 1 &&
1643         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1644         /* FIXME: calculate reference field picture polarity */
1645         assert(0);
1646         ref_field_pic_polarity = 0;
1647     }
1648
1649     if (pic_param->b_picture_fraction < 21)
1650         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1651
1652     picture_type = va_to_gen6_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1653     
1654     if (profile == GEN6_VC1_ADVANCED_PROFILE && 
1655         picture_type == GEN6_VC1_I_PICTURE)
1656         picture_type = GEN6_VC1_BI_PICTURE;
1657
1658     if (picture_type == GEN6_VC1_I_PICTURE || picture_type == GEN6_VC1_BI_PICTURE) /* I picture */
1659         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1660     else {
1661         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1662         /*
1663          * 8.3.6.2.1 Transform Type Selection
1664          * If variable-sized transform coding is not enabled,
1665          * then the 8x8 transform shall be used for all blocks.
1666          * it is also MFX_VC1_PIC_STATE requirement.
1667          */
1668         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1669             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1670             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1671         }
1672     }
1673
1674     if (picture_type == GEN6_VC1_B_PICTURE) {
1675         struct gen6_vc1_surface *gen6_vc1_surface = NULL;
1676
1677         obj_surface = SURFACE(pic_param->backward_reference_picture);
1678         assert(obj_surface);
1679         gen6_vc1_surface = obj_surface->private_data;
1680
1681         if (!gen6_vc1_surface || 
1682             (va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_I_PICTURE ||
1683              va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_BI_PICTURE))
1684             dmv_surface_valid = 0;
1685         else
1686             dmv_surface_valid = 1;
1687     }
1688
1689     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1690
1691     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1692         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1693     else {
1694         if (pic_param->picture_fields.bits.top_field_first)
1695             fcm = 2;
1696         else
1697             fcm = 3;
1698     }
1699
1700     if (pic_param->picture_fields.bits.picture_type == GEN6_VC1_B_PICTURE) { /* B picture */
1701         brfd = pic_param->reference_fields.bits.reference_distance;
1702         brfd = (scale_factor * brfd) >> 8;
1703         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1704
1705         if (brfd < 0)
1706             brfd = 0;
1707     }
1708
1709     overlap = pic_param->sequence_fields.bits.overlap;
1710     if (profile != GEN6_VC1_ADVANCED_PROFILE && pic_param->pic_quantizer_fields.bits.pic_quantizer_scale < 9)
1711         overlap = 0;
1712
1713     assert(pic_param->conditional_overlap_flag < 3);
1714     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1715
1716     BEGIN_BCS_BATCH(batch, 6);
1717     OUT_BCS_BATCH(batch, MFX_VC1_PIC_STATE | (6 - 2));
1718     OUT_BCS_BATCH(batch,
1719                   (ALIGN(pic_param->coded_height, 16) / 16) << 16 |
1720                   (ALIGN(pic_param->coded_width, 16) / 16));
1721     OUT_BCS_BATCH(batch,
1722                   pic_param->sequence_fields.bits.syncmarker << 31 |
1723                   1 << 29 | /* concealment */
1724                   alt_pq << 24 |
1725                   pic_param->entrypoint_fields.bits.loopfilter << 23 |
1726                   overlap << 22 |
1727                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 21 | /* implicit quantizer */
1728                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 16 |
1729                   alt_pquant_edge_mask << 12 |
1730                   alt_pquant_config << 10 |
1731                   pic_param->pic_quantizer_fields.bits.half_qp << 9 |
1732                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 8 |
1733                   va_to_gen6_vc1_condover[pic_param->conditional_overlap_flag] << 6 |
1734                   !pic_param->picture_fields.bits.is_first_field << 5 |
1735                   picture_type << 2 |
1736                   fcm << 0);
1737     OUT_BCS_BATCH(batch,
1738                   !!pic_param->bitplane_present.value << 23 |
1739                   !pic_param->bitplane_present.flags.bp_forward_mb << 22 |
1740                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 21 |
1741                   !pic_param->bitplane_present.flags.bp_skip_mb << 20 |
1742                   !pic_param->bitplane_present.flags.bp_direct_mb << 19 |
1743                   !pic_param->bitplane_present.flags.bp_overflags << 18 |
1744                   !pic_param->bitplane_present.flags.bp_ac_pred << 17 |
1745                   !pic_param->bitplane_present.flags.bp_field_tx << 16 |
1746                   pic_param->mv_fields.bits.extended_dmv_range << 14 |
1747                   pic_param->mv_fields.bits.extended_mv_range << 12 |
1748                   pic_param->mv_fields.bits.four_mv_switch << 11 |
1749                   pic_param->fast_uvmc_flag << 10 |
1750                   unified_mv_mode << 8 |
1751                   ref_field_pic_polarity << 6 |
1752                   pic_param->reference_fields.bits.num_reference_pictures << 5 |
1753                   pic_param->reference_fields.bits.reference_distance << 0);
1754     OUT_BCS_BATCH(batch,
1755                   scale_factor << 24 |
1756                   pic_param->mv_fields.bits.mv_table << 20 |
1757                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1758                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1759                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1760                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1761                   pic_param->mb_mode_table << 8 |
1762                   trans_ac_y << 6 |
1763                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1764                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1765                   pic_param->cbp_table << 0);
1766     OUT_BCS_BATCH(batch,
1767                   dmv_surface_valid << 13 |
1768                   brfd << 8 |
1769                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1));
1770     ADVANCE_BCS_BATCH(batch);
1771 }
1772
1773 static void
1774 gen6_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1775                              struct decode_state *decode_state,
1776                              struct gen6_mfd_context *gen6_mfd_context)
1777 {
1778     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1779     VAPictureParameterBufferVC1 *pic_param;
1780     int interpolation_mode = 0;
1781     int intensitycomp_single;
1782
1783     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1784     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1785
1786     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1787         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1788          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1789         interpolation_mode = 2; /* Half-pel bilinear */
1790     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1791              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1792               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1793         interpolation_mode = 0; /* Half-pel bicubic */
1794     else
1795         interpolation_mode = 1; /* Quarter-pel bicubic */
1796
1797     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1798     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1799     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1800
1801     BEGIN_BCS_BATCH(batch, 7);
1802     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (7 - 2));
1803     OUT_BCS_BATCH(batch,
1804                   0 << 8 | /* FIXME: interlace mode */
1805                   pic_param->rounding_control << 4 |
1806                   va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile] << 2);
1807     OUT_BCS_BATCH(batch,
1808                   pic_param->luma_shift << 16 |
1809                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1810     OUT_BCS_BATCH(batch, 0);
1811     OUT_BCS_BATCH(batch, 0);
1812     OUT_BCS_BATCH(batch, 0);
1813     OUT_BCS_BATCH(batch,
1814                   interpolation_mode << 19 |
1815                   pic_param->fast_uvmc_flag << 18 |
1816                   0 << 17 | /* FIXME: scale up or down ??? */
1817                   pic_param->range_reduction_frame << 16 |
1818                   0 << 6 | /* FIXME: double ??? */
1819                   0 << 4 |
1820                   intensitycomp_single << 2 |
1821                   intensitycomp_single << 0);
1822     ADVANCE_BCS_BATCH(batch);
1823 }
1824
1825
1826 static void
1827 gen6_mfd_vc1_directmode_state(VADriverContextP ctx,
1828                               struct decode_state *decode_state,
1829                               struct gen6_mfd_context *gen6_mfd_context)
1830 {
1831     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1832     VAPictureParameterBufferVC1 *pic_param;
1833     struct i965_driver_data *i965 = i965_driver_data(ctx);
1834     struct object_surface *obj_surface;
1835     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1836
1837     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1838     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1839
1840     obj_surface = SURFACE(decode_state->current_render_target);
1841
1842     if (obj_surface && obj_surface->private_data) {
1843         dmv_write_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1844     }
1845
1846     obj_surface = SURFACE(pic_param->backward_reference_picture);
1847
1848     if (obj_surface && obj_surface->private_data) {
1849         dmv_read_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1850     }
1851
1852     BEGIN_BCS_BATCH(batch, 3);
1853     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
1854
1855     if (dmv_write_buffer)
1856         OUT_BCS_RELOC(batch, dmv_write_buffer,
1857                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1858                       0);
1859     else
1860         OUT_BCS_BATCH(batch, 0);
1861
1862     if (dmv_read_buffer)
1863         OUT_BCS_RELOC(batch, dmv_read_buffer,
1864                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1865                       0);
1866     else
1867         OUT_BCS_BATCH(batch, 0);
1868                   
1869     ADVANCE_BCS_BATCH(batch);
1870 }
1871
1872 static int
1873 gen6_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1874 {
1875     int out_slice_data_bit_offset;
1876     int slice_header_size = in_slice_data_bit_offset / 8;
1877     int i, j;
1878
1879     if (profile != 3)
1880         out_slice_data_bit_offset = in_slice_data_bit_offset;
1881     else {
1882         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1883             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1884                 i++, j += 2;
1885             }
1886         }
1887
1888         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1889     }
1890
1891     return out_slice_data_bit_offset;
1892 }
1893
1894 static void
1895 gen6_mfd_vc1_bsd_object(VADriverContextP ctx,
1896                         VAPictureParameterBufferVC1 *pic_param,
1897                         VASliceParameterBufferVC1 *slice_param,
1898                         VASliceParameterBufferVC1 *next_slice_param,
1899                         dri_bo *slice_data_bo,
1900                         struct gen6_mfd_context *gen6_mfd_context)
1901 {
1902     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1903     int next_slice_start_vert_pos;
1904     int macroblock_offset;
1905     uint8_t *slice_data = NULL;
1906
1907     dri_bo_map(slice_data_bo, 0);
1908     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1909     macroblock_offset = gen6_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1910                                                                slice_param->macroblock_offset,
1911                                                                pic_param->sequence_fields.bits.profile);
1912     dri_bo_unmap(slice_data_bo);
1913
1914     if (next_slice_param)
1915         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1916     else
1917         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1918
1919     BEGIN_BCS_BATCH(batch, 4);
1920     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (4 - 2));
1921     OUT_BCS_BATCH(batch, 
1922                   slice_param->slice_data_size - (macroblock_offset >> 3));
1923     OUT_BCS_BATCH(batch, 
1924                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1925     OUT_BCS_BATCH(batch,
1926                   slice_param->slice_vertical_position << 24 |
1927                   next_slice_start_vert_pos << 16 |
1928                   (macroblock_offset & 0x7));
1929     ADVANCE_BCS_BATCH(batch);
1930 }
1931
1932 static void
1933 gen6_mfd_vc1_decode_picture(VADriverContextP ctx,
1934                             struct decode_state *decode_state,
1935                             struct gen6_mfd_context *gen6_mfd_context)
1936 {
1937     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1938     VAPictureParameterBufferVC1 *pic_param;
1939     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1940     dri_bo *slice_data_bo;
1941     int i, j;
1942
1943     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1944     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1945
1946     gen6_mfd_vc1_decode_init(ctx, decode_state, gen6_mfd_context);
1947     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1948     intel_batchbuffer_emit_mi_flush(batch);
1949     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1950     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1951     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1952     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1953     gen6_mfd_vc1_pic_state(ctx, decode_state, gen6_mfd_context);
1954     gen6_mfd_vc1_pred_pipe_state(ctx, decode_state, gen6_mfd_context);
1955     gen6_mfd_vc1_directmode_state(ctx, decode_state, gen6_mfd_context);
1956
1957     for (j = 0; j < decode_state->num_slice_params; j++) {
1958         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1959         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1960         slice_data_bo = decode_state->slice_datas[j]->bo;
1961         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen6_mfd_context);
1962
1963         if (j == decode_state->num_slice_params - 1)
1964             next_slice_group_param = NULL;
1965         else
1966             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1967
1968         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1969             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1970
1971             if (i < decode_state->slice_params[j]->num_elements - 1)
1972                 next_slice_param = slice_param + 1;
1973             else
1974                 next_slice_param = next_slice_group_param;
1975
1976             gen6_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen6_mfd_context);
1977             slice_param++;
1978         }
1979     }
1980
1981     intel_batchbuffer_end_atomic(batch);
1982     intel_batchbuffer_flush(batch);
1983 }
1984
1985 static void 
1986 gen6_mfd_decode_picture(VADriverContextP ctx, 
1987                         VAProfile profile, 
1988                         union codec_state *codec_state,
1989                         struct hw_context *hw_context)
1990
1991 {
1992     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
1993     struct decode_state *decode_state = &codec_state->decode;
1994
1995     assert(gen6_mfd_context);
1996
1997     switch (profile) {
1998     case VAProfileMPEG2Simple:
1999     case VAProfileMPEG2Main:
2000         gen6_mfd_mpeg2_decode_picture(ctx, decode_state, gen6_mfd_context);
2001         break;
2002         
2003     case VAProfileH264Baseline:
2004     case VAProfileH264Main:
2005     case VAProfileH264High:
2006         gen6_mfd_avc_decode_picture(ctx, decode_state, gen6_mfd_context);
2007         break;
2008
2009     case VAProfileVC1Simple:
2010     case VAProfileVC1Main:
2011     case VAProfileVC1Advanced:
2012         gen6_mfd_vc1_decode_picture(ctx, decode_state, gen6_mfd_context);
2013         break;
2014
2015     default:
2016         assert(0);
2017         break;
2018     }
2019 }
2020
2021 static void
2022 gen6_mfd_context_destroy(void *hw_context)
2023 {
2024     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
2025
2026     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
2027     gen6_mfd_context->post_deblocking_output.bo = NULL;
2028
2029     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
2030     gen6_mfd_context->pre_deblocking_output.bo = NULL;
2031
2032     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
2033     gen6_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2034
2035     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2036     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2037
2038     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2039     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2040
2041     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
2042     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2043
2044     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
2045     gen6_mfd_context->bitplane_read_buffer.bo = NULL;
2046
2047     intel_batchbuffer_free(gen6_mfd_context->base.batch);
2048     free(gen6_mfd_context);
2049 }
2050
2051 struct hw_context *
2052 gen6_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
2053 {
2054     struct intel_driver_data *intel = intel_driver_data(ctx);
2055     struct gen6_mfd_context *gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context));
2056     int i;
2057
2058     gen6_mfd_context->base.destroy = gen6_mfd_context_destroy;
2059     gen6_mfd_context->base.run = gen6_mfd_decode_picture;
2060     gen6_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
2061
2062     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
2063         gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
2064         gen6_mfd_context->reference_surface[i].frame_store_id = -1;
2065     }
2066
2067     gen6_mfd_context->wa_mpeg2_slice_vertical_position = -1;
2068     
2069     return (struct hw_context *)gen6_mfd_context;
2070 }