i965_drv_video/VC1: disable overlap if PQUANT is less than 9 in Main/Simple profile
[platform/upstream/libva.git] / i965_drv_video / gen6_mfd.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include <va/va_backend.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38
39 #include "i965_defines.h"
40 #include "i965_drv_video.h"
41
42 #include "gen6_mfd.h"
43
44 #define DMV_SIZE        0x88000 /* 557056 bytes for a frame */
45
46 static const uint32_t zigzag_direct[64] = {
47     0,   1,  8, 16,  9,  2,  3, 10,
48     17, 24, 32, 25, 18, 11,  4,  5,
49     12, 19, 26, 33, 40, 48, 41, 34,
50     27, 20, 13,  6,  7, 14, 21, 28,
51     35, 42, 49, 56, 57, 50, 43, 36,
52     29, 22, 15, 23, 30, 37, 44, 51,
53     58, 59, 52, 45, 38, 31, 39, 46,
54     53, 60, 61, 54, 47, 55, 62, 63
55 };
56
57 static void
58 gen6_mfd_avc_frame_store_index(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param)
59 {
60     struct i965_driver_data *i965 = i965_driver_data(ctx);
61     struct i965_media_state *media_state = &i965->media_state;
62     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)media_state->private_context;
63     int i, j;
64
65     assert(ARRAY_ELEMS(gen6_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
66
67     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
68         int found = 0;
69
70         if (gen6_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
71             continue;
72
73         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
74             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
75             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
76                 continue;
77
78             if (gen6_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
79                 found = 1;
80                 break;
81             }
82         }
83
84         if (!found) {
85             struct object_surface *obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
86             obj_surface->flags &= ~SURFACE_REFERENCED;
87
88             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
89                 dri_bo_unreference(obj_surface->bo);
90                 obj_surface->bo = NULL;
91                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
92             }
93
94             if (obj_surface->free_private_data)
95                 obj_surface->free_private_data(&obj_surface->private_data);
96
97             gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
98             gen6_mfd_context->reference_surface[i].frame_store_id = -1;
99         }
100     }
101
102     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
103         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
104         int found = 0;
105
106         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
107             continue;
108
109         for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
110             if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
111                 continue;
112             
113             if (gen6_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
114                 found = 1;
115                 break;
116             }
117         }
118
119         if (!found) {
120             int frame_idx;
121             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
122             
123             if (obj_surface->bo == NULL) {
124                 uint32_t tiling_mode = I915_TILING_Y;
125                 unsigned long pitch;
126         
127                 obj_surface->bo = drm_intel_bo_alloc_tiled(i965->intel.bufmgr, 
128                                                            "vaapi surface",
129                                                            obj_surface->width, 
130                                                            obj_surface->height + obj_surface->height / 2,
131                                                            1,
132                                                            &tiling_mode,
133                                                            &pitch,
134                                                            0);
135                 assert(obj_surface->bo);
136                 assert(tiling_mode == I915_TILING_Y);
137                 assert(pitch == obj_surface->width);
138             }
139
140             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) {
141                 for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
142                     if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
143                         continue;
144
145                     if (gen6_mfd_context->reference_surface[j].frame_store_id == frame_idx)
146                         break;
147                 }
148
149                 if (j == ARRAY_ELEMS(gen6_mfd_context->reference_surface))
150                     break;
151             }
152
153             assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface));
154
155             for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
156                 if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
157                     gen6_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
158                     gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
159                     break;
160                 }
161             }
162         }
163     }
164
165     /* sort */
166     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface) - 1; i++) {
167         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
168             gen6_mfd_context->reference_surface[i].frame_store_id == i)
169             continue;
170
171         for (j = i + 1; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
172             if (gen6_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
173                 gen6_mfd_context->reference_surface[j].frame_store_id == i) {
174                 VASurfaceID id = gen6_mfd_context->reference_surface[i].surface_id;
175                 int frame_idx = gen6_mfd_context->reference_surface[i].frame_store_id;
176
177                 gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[j].surface_id;
178                 gen6_mfd_context->reference_surface[i].frame_store_id = gen6_mfd_context->reference_surface[j].frame_store_id;
179                 gen6_mfd_context->reference_surface[j].surface_id = id;
180                 gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
181                 break;
182             }
183         }
184     }
185 }
186
187 static void 
188 gen6_mfd_free_avc_surface(void **data)
189 {
190     struct gen6_avc_surface *gen6_avc_surface = *data;
191
192     if (!gen6_avc_surface)
193         return;
194
195     dri_bo_unreference(gen6_avc_surface->dmv_top);
196     gen6_avc_surface->dmv_top = NULL;
197     dri_bo_unreference(gen6_avc_surface->dmv_bottom);
198     gen6_avc_surface->dmv_bottom = NULL;
199
200     free(gen6_avc_surface);
201     *data = NULL;
202 }
203
204 static void
205 gen6_mfd_init_avc_surface(VADriverContextP ctx, 
206                           VAPictureParameterBufferH264 *pic_param,
207                           struct object_surface *obj_surface)
208 {
209     struct i965_driver_data *i965 = i965_driver_data(ctx);
210     struct gen6_avc_surface *gen6_avc_surface = obj_surface->private_data;
211
212     obj_surface->free_private_data = gen6_mfd_free_avc_surface;
213
214     if (!gen6_avc_surface) {
215         gen6_avc_surface = calloc(sizeof(struct gen6_avc_surface), 1);
216         assert((obj_surface->size & 0x3f) == 0);
217         obj_surface->private_data = gen6_avc_surface;
218     }
219
220     gen6_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
221                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
222
223     if (gen6_avc_surface->dmv_top == NULL) {
224         gen6_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
225                                                  "direct mv w/r buffer",
226                                                  DMV_SIZE,
227                                                  0x1000);
228     }
229
230     if (gen6_avc_surface->dmv_bottom_flag &&
231         gen6_avc_surface->dmv_bottom == NULL) {
232         gen6_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
233                                                     "direct mv w/r buffer",
234                                                     DMV_SIZE,
235                                                     0x1000);
236     }
237 }
238
239 static void
240 gen6_mfd_pipe_mode_select(VADriverContextP ctx,
241                           struct decode_state *decode_state,
242                           int standard_select)
243 {
244     struct i965_driver_data *i965 = i965_driver_data(ctx);
245     struct i965_media_state *media_state = &i965->media_state;
246     struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
247
248     assert(standard_select == MFX_FORMAT_MPEG2 ||
249            standard_select == MFX_FORMAT_AVC ||
250            standard_select == MFX_FORMAT_VC1);
251
252     BEGIN_BCS_BATCH(ctx, 4);
253     OUT_BCS_BATCH(ctx, MFX_PIPE_MODE_SELECT | (4 - 2));
254     OUT_BCS_BATCH(ctx,
255                   (MFD_MODE_VLD << 16) | /* VLD mode */
256                   (0 << 10) | /* disable Stream-Out */
257                   (gen6_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
258                   (gen6_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
259                   (0 << 7)  | /* disable TLB prefectch */
260                   (0 << 5)  | /* not in stitch mode */
261                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
262                   (standard_select << 0));
263     OUT_BCS_BATCH(ctx,
264                   (0 << 20) | /* round flag in PB slice */
265                   (0 << 19) | /* round flag in Intra8x8 */
266                   (0 << 7)  | /* expand NOA bus flag */
267                   (1 << 6)  | /* must be 1 */
268                   (0 << 5)  | /* disable clock gating for NOA */
269                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
270                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
271                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
272                   (0 << 1)  | /* AVC long field motion vector */
273                   (1 << 0));  /* always calculate AVC ILDB boundary strength */
274     OUT_BCS_BATCH(ctx, 0);
275     ADVANCE_BCS_BATCH(ctx);
276 }
277
278 static void
279 gen6_mfd_surface_state(VADriverContextP ctx,
280                        struct decode_state *decode_state,
281                        int standard_select)
282 {
283     struct i965_driver_data *i965 = i965_driver_data(ctx);
284     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
285     assert(obj_surface);
286     
287     BEGIN_BCS_BATCH(ctx, 6);
288     OUT_BCS_BATCH(ctx, MFX_SURFACE_STATE | (6 - 2));
289     OUT_BCS_BATCH(ctx, 0);
290     OUT_BCS_BATCH(ctx,
291                   ((obj_surface->orig_height - 1) << 19) |
292                   ((obj_surface->orig_width - 1) << 6));
293     OUT_BCS_BATCH(ctx,
294                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
295                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
296                   (0 << 22) | /* surface object control state, FIXME??? */
297                   ((obj_surface->width - 1) << 3) | /* pitch */
298                   (0 << 2)  | /* must be 0 for interleave U/V */
299                   (1 << 1)  | /* must be y-tiled */
300                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, FIXME: must be 1 ??? */
301     OUT_BCS_BATCH(ctx,
302                   (0 << 16) | /* must be 0 for interleave U/V */
303                   (obj_surface->height)); /* y offset for U(cb) */
304     OUT_BCS_BATCH(ctx, 0);
305     ADVANCE_BCS_BATCH(ctx);
306 }
307
308 static void
309 gen6_mfd_pipe_buf_addr_state(VADriverContextP ctx,
310                              struct decode_state *decode_state,
311                              int standard_select)
312 {
313     struct i965_driver_data *i965 = i965_driver_data(ctx);
314     struct i965_media_state *media_state = &i965->media_state;
315     struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
316     int i;
317
318     BEGIN_BCS_BATCH(ctx, 24);
319     OUT_BCS_BATCH(ctx, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
320     if (gen6_mfd_context->pre_deblocking_output.valid)
321         OUT_BCS_RELOC(ctx, gen6_mfd_context->pre_deblocking_output.bo,
322                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
323                       0);
324     else
325         OUT_BCS_BATCH(ctx, 0);
326
327     if (gen6_mfd_context->post_deblocking_output.valid)
328         OUT_BCS_RELOC(ctx, gen6_mfd_context->post_deblocking_output.bo,
329                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
330                       0);
331     else
332         OUT_BCS_BATCH(ctx, 0);
333
334     OUT_BCS_BATCH(ctx, 0); /* ignore for decoding */
335     OUT_BCS_BATCH(ctx, 0); /* ignore for decoding */
336
337     if (gen6_mfd_context->intra_row_store_scratch_buffer.valid)
338         OUT_BCS_RELOC(ctx, gen6_mfd_context->intra_row_store_scratch_buffer.bo,
339                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
340                       0);
341     else
342         OUT_BCS_BATCH(ctx, 0);
343
344     if (gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
345         OUT_BCS_RELOC(ctx, gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
346                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
347                       0);
348     else
349         OUT_BCS_BATCH(ctx, 0);
350
351     /* DW 7..22 */
352     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
353         struct object_surface *obj_surface;
354
355         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
356             obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
357             assert(obj_surface && obj_surface->bo);
358
359             OUT_BCS_RELOC(ctx, obj_surface->bo,
360                           I915_GEM_DOMAIN_INSTRUCTION, 0,
361                           0);
362         } else {
363             OUT_BCS_BATCH(ctx, 0);
364         }
365     }
366
367     OUT_BCS_BATCH(ctx, 0);   /* ignore DW23 for decoding */
368     ADVANCE_BCS_BATCH(ctx);
369 }
370
371 static void
372 gen6_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
373                                  dri_bo *slice_data_bo,
374                                  int standard_select)
375 {
376     BEGIN_BCS_BATCH(ctx, 11);
377     OUT_BCS_BATCH(ctx, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
378     OUT_BCS_RELOC(ctx, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
379     OUT_BCS_BATCH(ctx, 0);
380     OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */
381     OUT_BCS_BATCH(ctx, 0);
382     OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */
383     OUT_BCS_BATCH(ctx, 0);
384     OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */
385     OUT_BCS_BATCH(ctx, 0);
386     OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */
387     OUT_BCS_BATCH(ctx, 0);
388     ADVANCE_BCS_BATCH(ctx);
389 }
390
391 static void
392 gen6_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
393                                  struct decode_state *decode_state,
394                                  int standard_select)
395 {
396     struct i965_driver_data *i965 = i965_driver_data(ctx);
397     struct i965_media_state *media_state = &i965->media_state;
398     struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
399
400     BEGIN_BCS_BATCH(ctx, 4);
401     OUT_BCS_BATCH(ctx, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
402
403     if (gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
404         OUT_BCS_RELOC(ctx, gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
405                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
406                       0);
407     else
408         OUT_BCS_BATCH(ctx, 0);
409
410     if (gen6_mfd_context->mpr_row_store_scratch_buffer.valid)
411         OUT_BCS_RELOC(ctx, gen6_mfd_context->mpr_row_store_scratch_buffer.bo,
412                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
413                       0);
414     else
415         OUT_BCS_BATCH(ctx, 0);
416
417     if (gen6_mfd_context->bitplane_read_buffer.valid)
418         OUT_BCS_RELOC(ctx, gen6_mfd_context->bitplane_read_buffer.bo,
419                       I915_GEM_DOMAIN_INSTRUCTION, 0,
420                       0);
421     else
422         OUT_BCS_BATCH(ctx, 0);
423
424     ADVANCE_BCS_BATCH(ctx);
425 }
426
427 static void
428 gen6_mfd_aes_state(VADriverContextP ctx,
429                    struct decode_state *decode_state,
430                    int standard_select)
431 {
432     /* FIXME */
433 }
434
435 static void
436 gen6_mfd_wait(VADriverContextP ctx,
437               struct decode_state *decode_state,
438               int standard_select)
439 {
440     BEGIN_BCS_BATCH(ctx, 1);
441     OUT_BCS_BATCH(ctx, MFX_WAIT | (1 << 8));
442     ADVANCE_BCS_BATCH(ctx);
443 }
444
445 static void
446 gen6_mfd_avc_img_state(VADriverContextP ctx, struct decode_state *decode_state)
447 {
448     int qm_present_flag;
449     int img_struct;
450     int mbaff_frame_flag;
451     unsigned int width_in_mbs, height_in_mbs;
452     VAPictureParameterBufferH264 *pic_param;
453
454     assert(decode_state->pic_param && decode_state->pic_param->buffer);
455     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
456     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
457
458     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
459         qm_present_flag = 1;
460     else
461         qm_present_flag = 0; /* built-in QM matrices */
462
463     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
464         img_struct = 1;
465     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
466         img_struct = 3;
467     else
468         img_struct = 0;
469
470     if ((img_struct & 0x1) == 0x1) {
471         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
472     } else {
473         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
474     }
475
476     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
477         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
478         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
479     } else {
480         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
481     }
482
483     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
484                         !pic_param->pic_fields.bits.field_pic_flag);
485
486     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
487     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
488     assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */
489
490     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
491     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
492            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
493     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
494
495     BEGIN_BCS_BATCH(ctx, 13);
496     OUT_BCS_BATCH(ctx, MFX_AVC_IMG_STATE | (13 - 2));
497     OUT_BCS_BATCH(ctx, 
498                   ((width_in_mbs * height_in_mbs) & 0x7fff));
499     OUT_BCS_BATCH(ctx, 
500                   (height_in_mbs << 16) | 
501                   (width_in_mbs << 0));
502     OUT_BCS_BATCH(ctx, 
503                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
504                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
505                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
506                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
507                   (1 << 12) | /* always 1, hardware requirement */
508                   (qm_present_flag << 10) |
509                   (img_struct << 8) |
510                   (16 << 0));
511     OUT_BCS_BATCH(ctx,
512                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
513                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
514                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
515                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
516                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
517                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
518                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
519                   (mbaff_frame_flag << 1) |
520                   (pic_param->pic_fields.bits.field_pic_flag << 0));
521     OUT_BCS_BATCH(ctx, 0);
522     OUT_BCS_BATCH(ctx, 0);
523     OUT_BCS_BATCH(ctx, 0);
524     OUT_BCS_BATCH(ctx, 0);
525     OUT_BCS_BATCH(ctx, 0);
526     OUT_BCS_BATCH(ctx, 0);
527     OUT_BCS_BATCH(ctx, 0);
528     OUT_BCS_BATCH(ctx, 0);
529     ADVANCE_BCS_BATCH(ctx);
530 }
531
532 static void
533 gen6_mfd_avc_qm_state(VADriverContextP ctx, struct decode_state *decode_state)
534 {
535     int cmd_len;
536     VAIQMatrixBufferH264 *iq_matrix;
537     VAPictureParameterBufferH264 *pic_param;
538
539     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
540         return;
541
542     iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
543
544     assert(decode_state->pic_param && decode_state->pic_param->buffer);
545     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
546
547     cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */
548
549     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
550         cmd_len += 2 * 16; /* load two 8x8 scaling matrices */
551
552     BEGIN_BCS_BATCH(ctx, cmd_len);
553     OUT_BCS_BATCH(ctx, MFX_AVC_QM_STATE | (cmd_len - 2));
554
555     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
556         OUT_BCS_BATCH(ctx, 
557                       (0x0  << 8) | /* don't use default built-in matrices */
558                       (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */
559     else
560         OUT_BCS_BATCH(ctx, 
561                       (0x0  << 8) | /* don't use default built-in matrices */
562                       (0x3f << 0)); /* six 4x4 scaling matrices */
563
564     intel_batchbuffer_data_bcs(ctx, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4);
565
566     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
567         intel_batchbuffer_data_bcs(ctx, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4);
568
569     ADVANCE_BCS_BATCH(ctx);
570 }
571
572 static void
573 gen6_mfd_avc_directmode_state(VADriverContextP ctx,
574                               VAPictureParameterBufferH264 *pic_param,
575                               VASliceParameterBufferH264 *slice_param)
576 {
577     struct i965_driver_data *i965 = i965_driver_data(ctx);
578     struct i965_media_state *media_state = &i965->media_state;
579     struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
580     struct object_surface *obj_surface;
581     struct gen6_avc_surface *gen6_avc_surface;
582     VAPictureH264 *va_pic;
583     int i, j;
584
585     BEGIN_BCS_BATCH(ctx, 69);
586     OUT_BCS_BATCH(ctx, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
587
588     /* reference surfaces 0..15 */
589     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
590         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
591             obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
592             assert(obj_surface);
593             gen6_avc_surface = obj_surface->private_data;
594
595             if (gen6_avc_surface == NULL) {
596                 OUT_BCS_BATCH(ctx, 0);
597                 OUT_BCS_BATCH(ctx, 0);
598             } else {
599                 OUT_BCS_RELOC(ctx, gen6_avc_surface->dmv_top,
600                               I915_GEM_DOMAIN_INSTRUCTION, 0,
601                               0);
602
603                 if (gen6_avc_surface->dmv_bottom_flag == 1)
604                     OUT_BCS_RELOC(ctx, gen6_avc_surface->dmv_bottom,
605                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
606                                   0);
607                 else
608                     OUT_BCS_RELOC(ctx, gen6_avc_surface->dmv_top,
609                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
610                                   0);
611             }
612         } else {
613             OUT_BCS_BATCH(ctx, 0);
614             OUT_BCS_BATCH(ctx, 0);
615         }
616     }
617
618     /* the current decoding frame/field */
619     va_pic = &pic_param->CurrPic;
620     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
621     obj_surface = SURFACE(va_pic->picture_id);
622     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
623     gen6_avc_surface = obj_surface->private_data;
624
625     OUT_BCS_RELOC(ctx, gen6_avc_surface->dmv_top,
626                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
627                   0);
628
629     if (gen6_avc_surface->dmv_bottom_flag == 1)
630         OUT_BCS_RELOC(ctx, gen6_avc_surface->dmv_bottom,
631                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
632                       0);
633     else
634         OUT_BCS_RELOC(ctx, gen6_avc_surface->dmv_top,
635                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
636                       0);
637
638     /* POC List */
639     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
640         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
641             int found = 0;
642             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
643                 va_pic = &pic_param->ReferenceFrames[j];
644                 
645                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
646                     continue;
647
648                 if (va_pic->picture_id == gen6_mfd_context->reference_surface[i].surface_id) {
649                     found = 1;
650                     break;
651                 }
652             }
653
654             assert(found == 1);
655             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
656             
657             OUT_BCS_BATCH(ctx, va_pic->TopFieldOrderCnt);
658             OUT_BCS_BATCH(ctx, va_pic->BottomFieldOrderCnt);
659         } else {
660             OUT_BCS_BATCH(ctx, 0);
661             OUT_BCS_BATCH(ctx, 0);
662         }
663     }
664
665     va_pic = &pic_param->CurrPic;
666     OUT_BCS_BATCH(ctx, va_pic->TopFieldOrderCnt);
667     OUT_BCS_BATCH(ctx, va_pic->BottomFieldOrderCnt);
668
669     ADVANCE_BCS_BATCH(ctx);
670 }
671
672 static void
673 gen6_mfd_avc_slice_state(VADriverContextP ctx,
674                          VAPictureParameterBufferH264 *pic_param,
675                          VASliceParameterBufferH264 *slice_param,
676                          VASliceParameterBufferH264 *next_slice_param)
677 {
678     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
679     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
680     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
681     int num_ref_idx_l0, num_ref_idx_l1;
682     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
683                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
684     int weighted_pred_idc = 0;
685     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
686     int slice_type;
687
688     if (slice_param->slice_type == SLICE_TYPE_I ||
689         slice_param->slice_type == SLICE_TYPE_SI) {
690         slice_type = SLICE_TYPE_I;
691     } else if (slice_param->slice_type == SLICE_TYPE_P ||
692                slice_param->slice_type == SLICE_TYPE_SP) {
693         slice_type = SLICE_TYPE_P;
694     } else { 
695         assert(slice_param->slice_type == SLICE_TYPE_B);
696         slice_type = SLICE_TYPE_B;
697     }
698
699     if (slice_type == SLICE_TYPE_I) {
700         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
701         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
702         num_ref_idx_l0 = 0;
703         num_ref_idx_l1 = 0;
704     } else if (slice_type == SLICE_TYPE_P) {
705         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
706         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
707         num_ref_idx_l1 = 0;
708         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_pred_flag == 1);
709     } else {
710         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
711         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
712         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_bipred_idc == 1);
713     }
714
715     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
716     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
717     slice_ver_pos = first_mb_in_slice / width_in_mbs;
718
719     if (next_slice_param) {
720         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
721         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
722         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
723     } else {
724         next_slice_hor_pos = 0;
725         next_slice_ver_pos = height_in_mbs;
726     }
727
728     BEGIN_BCS_BATCH(ctx, 11); /* FIXME: is it 10??? */
729     OUT_BCS_BATCH(ctx, MFX_AVC_SLICE_STATE | (11 - 2));
730     OUT_BCS_BATCH(ctx, slice_type);
731     OUT_BCS_BATCH(ctx, 
732                   (num_ref_idx_l1 << 24) |
733                   (num_ref_idx_l0 << 16) |
734                   (slice_param->chroma_log2_weight_denom << 8) |
735                   (slice_param->luma_log2_weight_denom << 0));
736     OUT_BCS_BATCH(ctx, 
737                   (weighted_pred_idc << 30) |
738                   (slice_param->direct_spatial_mv_pred_flag << 29) |
739                   (slice_param->disable_deblocking_filter_idc << 27) |
740                   (slice_param->cabac_init_idc << 24) |
741                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
742                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
743                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
744     OUT_BCS_BATCH(ctx, 
745                   (slice_ver_pos << 24) |
746                   (slice_hor_pos << 16) | 
747                   (first_mb_in_slice << 0));
748     OUT_BCS_BATCH(ctx,
749                   (next_slice_ver_pos << 16) |
750                   (next_slice_hor_pos << 0));
751     OUT_BCS_BATCH(ctx, 
752                   (next_slice_param == NULL) << 19); /* last slice flag */
753     OUT_BCS_BATCH(ctx, 0);
754     OUT_BCS_BATCH(ctx, 0);
755     OUT_BCS_BATCH(ctx, 0);
756     OUT_BCS_BATCH(ctx, 0);
757     ADVANCE_BCS_BATCH(ctx);
758 }
759
760 static void
761 gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param)
762 {
763     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
764     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
765
766     BEGIN_BCS_BATCH(ctx, 11); /* FIXME: is it 10??? */
767     OUT_BCS_BATCH(ctx, MFX_AVC_SLICE_STATE | (11 - 2));
768     OUT_BCS_BATCH(ctx, 0);
769     OUT_BCS_BATCH(ctx, 0);
770     OUT_BCS_BATCH(ctx, 0);
771     OUT_BCS_BATCH(ctx,
772                   height_in_mbs << 24 |
773                   width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
774     OUT_BCS_BATCH(ctx, 0);
775     OUT_BCS_BATCH(ctx, 0);
776     OUT_BCS_BATCH(ctx, 0);
777     OUT_BCS_BATCH(ctx, 0);
778     OUT_BCS_BATCH(ctx, 0);
779     OUT_BCS_BATCH(ctx, 0);
780     ADVANCE_BCS_BATCH(ctx);
781 }
782
783 static void
784 gen6_mfd_avc_ref_idx_state(VADriverContextP ctx,
785                            VAPictureParameterBufferH264 *pic_param,
786                            VASliceParameterBufferH264 *slice_param)
787 {
788     struct i965_driver_data *i965 = i965_driver_data(ctx);
789     struct i965_media_state *media_state = &i965->media_state;
790     struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
791     int i, j, num_ref_list;
792     struct {
793         unsigned char bottom_idc:1;
794         unsigned char frame_store_index:4;
795         unsigned char field_picture:1;
796         unsigned char long_term:1;
797         unsigned char non_exist:1;
798     } refs[32];
799
800     if (slice_param->slice_type == SLICE_TYPE_I ||
801         slice_param->slice_type == SLICE_TYPE_SI)
802         return;
803
804     if (slice_param->slice_type == SLICE_TYPE_P ||
805         slice_param->slice_type == SLICE_TYPE_SP) {
806         num_ref_list = 1;
807     } else {
808         num_ref_list = 2;
809     }
810
811     for (i = 0; i < num_ref_list; i++) {
812         VAPictureH264 *va_pic;
813
814         if (i == 0) {
815             va_pic = slice_param->RefPicList0;
816         } else {
817             va_pic = slice_param->RefPicList1;
818         }
819
820         BEGIN_BCS_BATCH(ctx, 10);
821         OUT_BCS_BATCH(ctx, MFX_AVC_REF_IDX_STATE | (10 - 2));
822         OUT_BCS_BATCH(ctx, i);
823
824         for (j = 0; j < 32; j++) {
825             if (va_pic->flags & VA_PICTURE_H264_INVALID) {
826                 refs[j].non_exist = 1;
827                 refs[j].long_term = 1;
828                 refs[j].field_picture = 1;
829                 refs[j].frame_store_index = 0xf;
830                 refs[j].bottom_idc = 1;
831             } else {
832                 int frame_idx;
833                 
834                 for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) {
835                     if (gen6_mfd_context->reference_surface[frame_idx].surface_id != VA_INVALID_ID &&
836                         va_pic->picture_id == gen6_mfd_context->reference_surface[frame_idx].surface_id) {
837                         assert(frame_idx == gen6_mfd_context->reference_surface[frame_idx].frame_store_id);
838                         break;
839                     }
840                 }
841
842                 assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface));
843                 
844                 refs[j].non_exist = 0;
845                 refs[j].long_term = !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
846                 refs[j].field_picture = !!(va_pic->flags & 
847                                            (VA_PICTURE_H264_TOP_FIELD | 
848                                             VA_PICTURE_H264_BOTTOM_FIELD));
849                 refs[j].frame_store_index = frame_idx;
850                 refs[j].bottom_idc = !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
851             }
852
853             va_pic++;
854         }
855         
856         intel_batchbuffer_data_bcs(ctx, refs, sizeof(refs));
857         ADVANCE_BCS_BATCH(ctx);
858     }
859 }
860
861 static void
862 gen6_mfd_avc_weightoffset_state(VADriverContextP ctx,
863                                 VAPictureParameterBufferH264 *pic_param,
864                                 VASliceParameterBufferH264 *slice_param)
865 {
866     int i, j, num_weight_offset_table = 0;
867     short weightoffsets[32 * 6];
868
869     if ((slice_param->slice_type == SLICE_TYPE_P ||
870          slice_param->slice_type == SLICE_TYPE_SP) &&
871         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
872         num_weight_offset_table = 1;
873     }
874     
875     if ((slice_param->slice_type == SLICE_TYPE_B) &&
876         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
877         num_weight_offset_table = 2;
878     }
879
880     for (i = 0; i < num_weight_offset_table; i++) {
881         BEGIN_BCS_BATCH(ctx, 98);
882         OUT_BCS_BATCH(ctx, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
883         OUT_BCS_BATCH(ctx, i);
884
885         if (i == 0) {
886             for (j = 0; j < 32; j++) {
887                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
888                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
889                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
890                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
891                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
892                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
893             }
894         } else {
895             for (j = 0; j < 32; j++) {
896                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
897                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
898                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
899                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
900                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
901                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
902             }
903         }
904
905         intel_batchbuffer_data_bcs(ctx, weightoffsets, sizeof(weightoffsets));
906         ADVANCE_BCS_BATCH(ctx);
907     }
908 }
909
910 static int
911 gen6_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
912 {
913     int out_slice_data_bit_offset;
914     int slice_header_size = in_slice_data_bit_offset / 8;
915     int i, j;
916
917     for (i = 0, j = 0; i < slice_header_size; i++, j++) {
918         if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
919             i++, j += 2;
920         }
921     }
922
923     out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
924
925     if (mode_flag == ENTROPY_CABAC)
926         out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
927
928     return out_slice_data_bit_offset;
929 }
930
931 static void
932 gen6_mfd_avc_bsd_object(VADriverContextP ctx,
933                         VAPictureParameterBufferH264 *pic_param,
934                         VASliceParameterBufferH264 *slice_param,
935                         dri_bo *slice_data_bo)
936 {
937     int slice_data_bit_offset;
938     uint8_t *slice_data = NULL;
939
940     dri_bo_map(slice_data_bo, 0);
941     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
942     slice_data_bit_offset = gen6_mfd_avc_get_slice_bit_offset(slice_data,
943                                                               pic_param->pic_fields.bits.entropy_coding_mode_flag,
944                                                               slice_param->slice_data_bit_offset);
945     dri_bo_unmap(slice_data_bo);
946
947     BEGIN_BCS_BATCH(ctx, 6);
948     OUT_BCS_BATCH(ctx, MFD_AVC_BSD_OBJECT | (6 - 2));
949     OUT_BCS_BATCH(ctx, 
950                   ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
951     OUT_BCS_BATCH(ctx, slice_param->slice_data_offset + (slice_data_bit_offset >> 3));
952     OUT_BCS_BATCH(ctx,
953                   (0 << 31) |
954                   (0 << 14) |
955                   (0 << 12) |
956                   (0 << 10) |
957                   (0 << 8));
958     OUT_BCS_BATCH(ctx,
959                   (0 << 16) |
960                   (0 << 6)  |
961                   ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
962     OUT_BCS_BATCH(ctx, 0);
963     ADVANCE_BCS_BATCH(ctx);
964 }
965
966 static void
967 gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param)
968 {
969     BEGIN_BCS_BATCH(ctx, 6);
970     OUT_BCS_BATCH(ctx, MFD_AVC_BSD_OBJECT | (6 - 2));
971     OUT_BCS_BATCH(ctx, 0);
972     OUT_BCS_BATCH(ctx, 0);
973     OUT_BCS_BATCH(ctx, 0);
974     OUT_BCS_BATCH(ctx, 0);
975     OUT_BCS_BATCH(ctx, 0);
976     ADVANCE_BCS_BATCH(ctx);
977 }
978
979 static void
980 gen6_mfd_avc_phantom_slice(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param)
981 {
982     gen6_mfd_avc_phantom_slice_state(ctx, pic_param);
983     gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param);
984 }
985
986 static void
987 gen6_mfd_avc_decode_init(VADriverContextP ctx, struct decode_state *decode_state)
988 {
989     VAPictureParameterBufferH264 *pic_param;
990     VASliceParameterBufferH264 *slice_param;
991     VAPictureH264 *va_pic;
992     struct i965_driver_data *i965 = i965_driver_data(ctx);
993     struct i965_media_state *media_state = &i965->media_state;
994     struct gen6_mfd_context *gen6_mfd_context;
995     struct object_surface *obj_surface;
996     dri_bo *bo;
997     int i, j, enable_avc_ildb = 0;
998     
999     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
1000         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1001         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1002
1003         assert(decode_state->slice_params[j]->num_elements == 1);
1004         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1005             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1006             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1007                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1008                    (slice_param->slice_type == SLICE_TYPE_P) ||
1009                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1010                    (slice_param->slice_type == SLICE_TYPE_B));
1011
1012             if (slice_param->disable_deblocking_filter_idc != 1) {
1013                 enable_avc_ildb = 1;
1014                 break;
1015             }
1016
1017             slice_param++;
1018         }
1019     }
1020
1021     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1022     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1023     gen6_mfd_context = media_state->private_context;
1024
1025     if (gen6_mfd_context == NULL) {
1026         gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context));
1027         media_state->private_context = gen6_mfd_context;
1028
1029         for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
1030             gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
1031             gen6_mfd_context->reference_surface[i].frame_store_id = -1;
1032         }
1033     }
1034
1035     gen6_mfd_avc_frame_store_index(ctx, pic_param);
1036
1037     /* Current decoded picture */
1038     va_pic = &pic_param->CurrPic;
1039     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
1040     obj_surface = SURFACE(va_pic->picture_id);
1041     assert(obj_surface);
1042     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
1043     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
1044     gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
1045
1046     if (obj_surface->bo == NULL) {
1047         uint32_t tiling_mode = I915_TILING_Y;
1048         unsigned long pitch;
1049         
1050         obj_surface->bo = drm_intel_bo_alloc_tiled(i965->intel.bufmgr, 
1051                                                    "vaapi surface",
1052                                                    obj_surface->width, 
1053                                                    obj_surface->height + obj_surface->height / 2,
1054                                                    1,
1055                                                    &tiling_mode,
1056                                                    &pitch,
1057                                                    0);
1058         assert(obj_surface->bo);
1059         assert(tiling_mode == I915_TILING_Y);
1060         assert(pitch == obj_surface->width);
1061     }
1062     
1063     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
1064     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1065     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
1066     gen6_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
1067
1068     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1069     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1070     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1071     gen6_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
1072
1073     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
1074     bo = dri_bo_alloc(i965->intel.bufmgr,
1075                       "intra row store",
1076                       128 * 64,
1077                       0x1000);
1078     assert(bo);
1079     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1080     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1081
1082     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1083     bo = dri_bo_alloc(i965->intel.bufmgr,
1084                       "deblocking filter row store",
1085                       30720, /* 4 * 120 * 64 */
1086                       0x1000);
1087     assert(bo);
1088     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1089     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1090
1091     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1092     bo = dri_bo_alloc(i965->intel.bufmgr,
1093                       "bsd mpc row store",
1094                       11520, /* 1.5 * 120 * 64 */
1095                       0x1000);
1096     assert(bo);
1097     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1098     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1099
1100     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
1101     bo = dri_bo_alloc(i965->intel.bufmgr,
1102                       "mpr row store",
1103                       7680, /* 1. 0 * 120 * 64 */
1104                       0x1000);
1105     assert(bo);
1106     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
1107     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
1108
1109     gen6_mfd_context->bitplane_read_buffer.valid = 0;
1110 }
1111
1112 static void
1113 gen6_mfd_avc_decode_picture(VADriverContextP ctx, struct decode_state *decode_state)
1114 {
1115     VAPictureParameterBufferH264 *pic_param;
1116     VASliceParameterBufferH264 *slice_param, *next_slice_param;
1117     dri_bo *slice_data_bo;
1118     int i, j;
1119
1120     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1121     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1122
1123     gen6_mfd_avc_decode_init(ctx, decode_state);
1124     intel_batchbuffer_start_atomic_bcs(ctx, 0x1000);
1125     intel_batchbuffer_emit_mi_flush_bcs(ctx);
1126     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC);
1127     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC);
1128     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC);
1129     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC);
1130     gen6_mfd_avc_img_state(ctx, decode_state);
1131     gen6_mfd_avc_qm_state(ctx, decode_state);
1132
1133     for (j = 0; j < decode_state->num_slice_params; j++) {
1134         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1135         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1136         slice_data_bo = decode_state->slice_datas[j]->bo;
1137
1138         if (j == decode_state->num_slice_params - 1)
1139             next_slice_param = NULL;
1140         else
1141             next_slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1142
1143         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC);
1144         assert(decode_state->slice_params[j]->num_elements == 1);
1145
1146         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1147             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1148             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1149                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1150                    (slice_param->slice_type == SLICE_TYPE_P) ||
1151                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1152                    (slice_param->slice_type == SLICE_TYPE_B));
1153
1154             if (i < decode_state->slice_params[j]->num_elements - 1)
1155                 next_slice_param = slice_param + 1;
1156
1157             gen6_mfd_avc_directmode_state(ctx, pic_param, slice_param);
1158             gen6_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param);
1159             gen6_mfd_avc_ref_idx_state(ctx, pic_param, slice_param);
1160             gen6_mfd_avc_weightoffset_state(ctx, pic_param, slice_param);
1161             gen6_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo);
1162             slice_param++;
1163         }
1164     }
1165     
1166     gen6_mfd_avc_phantom_slice(ctx, pic_param);
1167     intel_batchbuffer_end_atomic_bcs(ctx);
1168     intel_batchbuffer_flush_bcs(ctx);
1169 }
1170
1171 static void
1172 gen6_mfd_mpeg2_decode_init(VADriverContextP ctx, struct decode_state *decode_state)
1173 {
1174     VAPictureParameterBufferMPEG2 *pic_param;
1175     struct i965_driver_data *i965 = i965_driver_data(ctx);
1176     struct i965_media_state *media_state = &i965->media_state;
1177     struct gen6_mfd_context *gen6_mfd_context;
1178     struct object_surface *obj_surface;
1179     int i;
1180     dri_bo *bo;
1181
1182     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1183     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1184     gen6_mfd_context = media_state->private_context;
1185
1186     if (gen6_mfd_context == NULL) {
1187         gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context));
1188         media_state->private_context = gen6_mfd_context;
1189
1190         for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
1191             gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
1192             gen6_mfd_context->reference_surface[i].frame_store_id = -1;
1193         }
1194     }
1195
1196     /* reference picture */
1197     obj_surface = SURFACE(pic_param->forward_reference_picture);
1198
1199     if (obj_surface && obj_surface->bo)
1200         gen6_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1201     else
1202         gen6_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1203
1204     obj_surface = SURFACE(pic_param->backward_reference_picture);
1205
1206     if (obj_surface && obj_surface->bo)
1207         gen6_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1208     else
1209         gen6_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1210
1211     /* must do so !!! */
1212     for (i = 2; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++)
1213         gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[i % 2].surface_id;
1214
1215     /* Current decoded picture */
1216     obj_surface = SURFACE(decode_state->current_render_target);
1217     assert(obj_surface);
1218     if (obj_surface->bo == NULL) {
1219         uint32_t tiling_mode = I915_TILING_Y;
1220         unsigned long pitch;
1221
1222         obj_surface->bo = drm_intel_bo_alloc_tiled(i965->intel.bufmgr, 
1223                                                    "vaapi surface",
1224                                                    obj_surface->width, 
1225                                                    obj_surface->height + obj_surface->height / 2,
1226                                                    1,
1227                                                    &tiling_mode,
1228                                                    &pitch,
1229                                                    0);
1230         assert(obj_surface->bo);
1231         assert(tiling_mode == I915_TILING_Y);
1232         assert(pitch == obj_surface->width);
1233     }
1234
1235     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1236     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1237     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1238     gen6_mfd_context->pre_deblocking_output.valid = 1;
1239
1240     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1241     bo = dri_bo_alloc(i965->intel.bufmgr,
1242                       "bsd mpc row store",
1243                       11520, /* 1.5 * 120 * 64 */
1244                       0x1000);
1245     assert(bo);
1246     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1247     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1248
1249     gen6_mfd_context->post_deblocking_output.valid = 0;
1250     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1251     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1252     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1253     gen6_mfd_context->bitplane_read_buffer.valid = 0;
1254 }
1255
1256 static void
1257 gen6_mfd_mpeg2_pic_state(VADriverContextP ctx, struct decode_state *decode_state)
1258 {
1259     VAPictureParameterBufferMPEG2 *pic_param;
1260
1261     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1262     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1263
1264     BEGIN_BCS_BATCH(ctx, 4);
1265     OUT_BCS_BATCH(ctx, MFX_MPEG2_PIC_STATE | (4 - 2));
1266     OUT_BCS_BATCH(ctx,
1267                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1268                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1269                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1270                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1271                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1272                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1273                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1274                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1275                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1276                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1277                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1278                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1279     OUT_BCS_BATCH(ctx,
1280                   pic_param->picture_coding_type << 9);
1281     OUT_BCS_BATCH(ctx,
1282                   (ALIGN(pic_param->vertical_size, 16) / 16) << 16 |
1283                   (ALIGN(pic_param->horizontal_size, 16) / 16));
1284     ADVANCE_BCS_BATCH(ctx);
1285 }
1286
1287 static void
1288 gen6_mfd_mpeg2_qm_state(VADriverContextP ctx, struct decode_state *decode_state)
1289 {
1290     VAIQMatrixBufferMPEG2 *iq_matrix;
1291     int i;
1292
1293     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
1294         return;
1295
1296     iq_matrix = (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1297
1298     for (i = 0; i < 2; i++) {
1299         int k, m;
1300         unsigned char *qm = NULL;
1301         unsigned char qmx[64];
1302
1303         if (i == 0) {
1304             if (iq_matrix->load_intra_quantiser_matrix)
1305                 qm = iq_matrix->intra_quantiser_matrix;
1306         } else {
1307             if (iq_matrix->load_non_intra_quantiser_matrix)
1308                 qm = iq_matrix->non_intra_quantiser_matrix;
1309         }
1310
1311         if (!qm)
1312             continue;
1313
1314         /* Upload quantisation matrix in raster order. The mplayer vaapi
1315          * patch passes quantisation matrix in zig-zag order to va library.
1316          */
1317         for (k = 0; k < 64; k++) {
1318             m = zigzag_direct[k];
1319             qmx[m] = qm[k];
1320         }
1321
1322         BEGIN_BCS_BATCH(ctx, 18);
1323         OUT_BCS_BATCH(ctx, MFX_MPEG2_QM_STATE | (18 - 2));
1324         OUT_BCS_BATCH(ctx, i);
1325         intel_batchbuffer_data_bcs(ctx, qmx, 64);
1326         ADVANCE_BCS_BATCH(ctx);
1327     }
1328 }
1329
1330 static void
1331 gen6_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1332                           VAPictureParameterBufferMPEG2 *pic_param,
1333                           VASliceParameterBufferMPEG2 *slice_param,
1334                           VASliceParameterBufferMPEG2 *next_slice_param)
1335 {
1336     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1337     unsigned int height_in_mbs = ALIGN(pic_param->vertical_size, 16) / 16;
1338     int mb_count;
1339
1340     if (next_slice_param == NULL)
1341         mb_count = width_in_mbs * height_in_mbs - 
1342             (slice_param->slice_vertical_position * width_in_mbs + slice_param->slice_horizontal_position);
1343     else
1344         mb_count = (next_slice_param->slice_vertical_position * width_in_mbs + next_slice_param->slice_horizontal_position) - 
1345             (slice_param->slice_vertical_position * width_in_mbs + slice_param->slice_horizontal_position);
1346
1347     BEGIN_BCS_BATCH(ctx, 5);
1348     OUT_BCS_BATCH(ctx, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1349     OUT_BCS_BATCH(ctx, 
1350                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1351     OUT_BCS_BATCH(ctx, 
1352                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1353     OUT_BCS_BATCH(ctx,
1354                   slice_param->slice_horizontal_position << 24 |
1355                   slice_param->slice_vertical_position << 16 |
1356                   mb_count << 8 |
1357                   (next_slice_param == NULL) << 5 |
1358                   (next_slice_param == NULL) << 3 |
1359                   (slice_param->macroblock_offset & 0x7));
1360     OUT_BCS_BATCH(ctx,
1361                   slice_param->quantiser_scale_code << 24);
1362     ADVANCE_BCS_BATCH(ctx);
1363 }
1364
1365 static void
1366 gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx, struct decode_state *decode_state)
1367 {
1368     VAPictureParameterBufferMPEG2 *pic_param;
1369     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param;
1370     dri_bo *slice_data_bo;
1371     int i, j;
1372
1373     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1374     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1375
1376     gen6_mfd_mpeg2_decode_init(ctx, decode_state);
1377     intel_batchbuffer_start_atomic_bcs(ctx, 0x1000);
1378     intel_batchbuffer_emit_mi_flush_bcs(ctx);
1379     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2);
1380     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2);
1381     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2);
1382     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2);
1383     gen6_mfd_mpeg2_pic_state(ctx, decode_state);
1384     gen6_mfd_mpeg2_qm_state(ctx, decode_state);
1385
1386     assert(decode_state->num_slice_params == 1);
1387     for (j = 0; j < decode_state->num_slice_params; j++) {
1388         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1389         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1390         slice_data_bo = decode_state->slice_datas[j]->bo;
1391         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2);
1392
1393         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1394             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1395
1396             if (i < decode_state->slice_params[j]->num_elements - 1)
1397                 next_slice_param = slice_param + 1;
1398             else
1399                 next_slice_param = NULL;
1400
1401             gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param);
1402             slice_param++;
1403         }
1404     }
1405
1406     intel_batchbuffer_end_atomic_bcs(ctx);
1407     intel_batchbuffer_flush_bcs(ctx);
1408 }
1409
1410 static const int va_to_gen6_vc1_pic_type[5] = {
1411     GEN6_VC1_I_PICTURE,
1412     GEN6_VC1_P_PICTURE,
1413     GEN6_VC1_B_PICTURE,
1414     GEN6_VC1_BI_PICTURE,
1415     GEN6_VC1_P_PICTURE,
1416 };
1417
1418 static const int va_to_gen6_vc1_mv[4] = {
1419     1, /* 1-MV */
1420     2, /* 1-MV half-pel */
1421     3, /* 1-MV half-pef bilinear */
1422     0, /* Mixed MV */
1423 };
1424
1425 static const int b_picture_scale_factor[21] = {
1426     128, 85,  170, 64,  192,
1427     51,  102, 153, 204, 43,
1428     215, 37,  74,  111, 148,
1429     185, 222, 32,  96,  160, 
1430     224,
1431 };
1432
1433 static const int va_to_gen6_vc1_condover[3] = {
1434     0,
1435     2,
1436     3
1437 };
1438
1439 static const int va_to_gen6_vc1_profile[4] = {
1440     GEN6_VC1_SIMPLE_PROFILE,
1441     GEN6_VC1_MAIN_PROFILE,
1442     GEN6_VC1_RESERVED_PROFILE,
1443     GEN6_VC1_ADVANCED_PROFILE
1444 };
1445
1446 static const int va_to_gen6_vc1_ttfrm[8] = {
1447     0,  /* 8x8 */
1448     1,  /* 8x4 bottom */
1449     1,  /* 8x4 top */
1450     1,  /* 8x4 */
1451     2,  /* 4x8 bottom */
1452     2,  /* 4x8 top */
1453     2,  /* 4x8 */
1454     3,  /* 4x4 */
1455 };
1456
1457 static void 
1458 gen6_mfd_free_vc1_surface(void **data)
1459 {
1460     struct gen6_vc1_surface *gen6_vc1_surface = *data;
1461
1462     if (!gen6_vc1_surface)
1463         return;
1464
1465     dri_bo_unreference(gen6_vc1_surface->dmv);
1466     free(gen6_vc1_surface);
1467     *data = NULL;
1468 }
1469
1470 static void
1471 gen6_mfd_init_vc1_surface(VADriverContextP ctx, 
1472                           VAPictureParameterBufferVC1 *pic_param,
1473                           struct object_surface *obj_surface)
1474 {
1475     struct i965_driver_data *i965 = i965_driver_data(ctx);
1476     struct gen6_vc1_surface *gen6_vc1_surface = obj_surface->private_data;
1477
1478     obj_surface->free_private_data = gen6_mfd_free_vc1_surface;
1479
1480     if (!gen6_vc1_surface) {
1481         gen6_vc1_surface = calloc(sizeof(struct gen6_vc1_surface), 1);
1482         assert((obj_surface->size & 0x3f) == 0);
1483         obj_surface->private_data = gen6_vc1_surface;
1484     }
1485
1486     gen6_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1487
1488     if (gen6_vc1_surface->dmv == NULL) {
1489         gen6_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1490                                              "direct mv w/r buffer",
1491                                              557056,    /* 64 * 128 * 64 */
1492                                              0x1000);
1493     }
1494 }
1495
1496 static void
1497 gen6_mfd_vc1_decode_init(VADriverContextP ctx, struct decode_state *decode_state)
1498 {
1499     VAPictureParameterBufferVC1 *pic_param;
1500     struct i965_driver_data *i965 = i965_driver_data(ctx);
1501     struct i965_media_state *media_state = &i965->media_state;
1502     struct gen6_mfd_context *gen6_mfd_context;
1503     struct object_surface *obj_surface;
1504     int i;
1505     dri_bo *bo;
1506
1507     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1508     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1509     gen6_mfd_context = media_state->private_context;
1510
1511     if (gen6_mfd_context == NULL) {
1512         gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context));
1513         media_state->private_context = gen6_mfd_context;
1514
1515         for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
1516             gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
1517             gen6_mfd_context->reference_surface[i].frame_store_id = -1;
1518         }
1519     }
1520
1521     /* reference picture */
1522     obj_surface = SURFACE(pic_param->forward_reference_picture);
1523
1524     if (obj_surface && obj_surface->bo)
1525         gen6_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1526     else
1527         gen6_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1528
1529     obj_surface = SURFACE(pic_param->backward_reference_picture);
1530
1531     if (obj_surface && obj_surface->bo)
1532         gen6_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1533     else
1534         gen6_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1535
1536     /* must do so !!! */
1537     for (i = 2; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++)
1538         gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[i % 2].surface_id;
1539
1540     /* Current decoded picture */
1541     obj_surface = SURFACE(decode_state->current_render_target);
1542     assert(obj_surface);
1543     gen6_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1544
1545     if (obj_surface->bo == NULL) {
1546         uint32_t tiling_mode = I915_TILING_Y;
1547         unsigned long pitch;
1548
1549         obj_surface->bo = drm_intel_bo_alloc_tiled(i965->intel.bufmgr, 
1550                                                    "vaapi surface",
1551                                                    obj_surface->width, 
1552                                                    obj_surface->height + obj_surface->height / 2,
1553                                                    1,
1554                                                    &tiling_mode,
1555                                                    &pitch,
1556                                                    0);
1557         assert(obj_surface->bo);
1558         assert(tiling_mode == I915_TILING_Y);
1559         assert(pitch == obj_surface->width);
1560     }
1561
1562     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
1563     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1564     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
1565     gen6_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1566
1567     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1568     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1569     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1570     gen6_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1571
1572     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
1573     bo = dri_bo_alloc(i965->intel.bufmgr,
1574                       "intra row store",
1575                       128 * 64,
1576                       0x1000);
1577     assert(bo);
1578     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1579     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1580
1581     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1582     bo = dri_bo_alloc(i965->intel.bufmgr,
1583                       "deblocking filter row store",
1584                       46080, /* 6 * 120 * 64 */
1585                       0x1000);
1586     assert(bo);
1587     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1588     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1589
1590     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1591     bo = dri_bo_alloc(i965->intel.bufmgr,
1592                       "bsd mpc row store",
1593                       11520, /* 1.5 * 120 * 64 */
1594                       0x1000);
1595     assert(bo);
1596     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1597     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1598
1599     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1600
1601     gen6_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1602     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
1603     
1604     if (gen6_mfd_context->bitplane_read_buffer.valid) {
1605         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1606         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1607         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1608         int bitplane_height = height_in_mbs;
1609         int src_w, src_h, dst_w, dst_h;
1610         uint8_t *src = NULL, *dst = NULL;
1611
1612         assert(decode_state->bit_plane->buffer);
1613         src = decode_state->bit_plane->buffer;
1614
1615         bo = dri_bo_alloc(i965->intel.bufmgr,
1616                           "VC-1 Bitplane",
1617                           bitplane_width * bitplane_width,
1618                           0x1000);
1619         assert(bo);
1620         gen6_mfd_context->bitplane_read_buffer.bo = bo;
1621
1622         dri_bo_map(bo, True);
1623         assert(bo->virtual);
1624         dst = bo->virtual;
1625
1626         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1627             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1628                 int src_index, dst_index;
1629                 int src_shift;
1630                 uint8_t src_value;
1631
1632                 src_index = (src_h * width_in_mbs + src_w) / 2;
1633                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1634                 src_value = ((src[src_index] >> src_shift) & 0xf);
1635
1636                 dst_index = src_w / 2;
1637                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1638             }
1639
1640             if (src_w & 1)
1641                 dst[src_w / 2] >>= 4;
1642
1643             dst += bitplane_width;
1644         }
1645
1646         dri_bo_unmap(bo);
1647     } else
1648         gen6_mfd_context->bitplane_read_buffer.bo = NULL;
1649 }
1650
1651 static void
1652 gen6_mfd_vc1_pic_state(VADriverContextP ctx, struct decode_state *decode_state)
1653 {
1654     VAPictureParameterBufferVC1 *pic_param;
1655     struct i965_driver_data *i965 = i965_driver_data(ctx);
1656     struct object_surface *obj_surface;
1657     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1658     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1659     int unified_mv_mode;
1660     int ref_field_pic_polarity = 0;
1661     int scale_factor = 0;
1662     int trans_ac_y = 0;
1663     int dmv_surface_valid = 0;
1664     int brfd = 0;
1665     int fcm = 0;
1666     int picture_type;
1667     int profile;
1668     int overlap;
1669
1670     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1671     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1672
1673     profile = va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile];
1674     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1675     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1676     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1677     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1678     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1679     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1680     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1681
1682     if (dquant == 0) {
1683         alt_pquant_config = 0;
1684         alt_pquant_edge_mask = 0;
1685     } else if (dquant == 2) {
1686         alt_pquant_config = 1;
1687         alt_pquant_edge_mask = 0xf;
1688     } else {
1689         assert(dquant == 1);
1690         if (dquantfrm == 0) {
1691             alt_pquant_config = 0;
1692             alt_pquant_edge_mask = 0;
1693             alt_pq = 0;
1694         } else {
1695             assert(dquantfrm == 1);
1696             alt_pquant_config = 1;
1697
1698             switch (dqprofile) {
1699             case 3:
1700                 if (dqbilevel == 0) {
1701                     alt_pquant_config = 2;
1702                     alt_pquant_edge_mask = 0;
1703                 } else {
1704                     assert(dqbilevel == 1);
1705                     alt_pquant_config = 3;
1706                     alt_pquant_edge_mask = 0;
1707                 }
1708                 break;
1709                 
1710             case 0:
1711                 alt_pquant_edge_mask = 0xf;
1712                 break;
1713
1714             case 1:
1715                 if (dqdbedge == 3)
1716                     alt_pquant_edge_mask = 0x9;
1717                 else
1718                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1719
1720                 break;
1721
1722             case 2:
1723                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1724                 break;
1725
1726             default:
1727                 assert(0);
1728             }
1729         }
1730     }
1731
1732     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1733         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1734         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1735     } else {
1736         assert(pic_param->mv_fields.bits.mv_mode < 4);
1737         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1738     }
1739
1740     if (pic_param->sequence_fields.bits.interlace == 1 &&
1741         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1742         /* FIXME: calculate reference field picture polarity */
1743         assert(0);
1744         ref_field_pic_polarity = 0;
1745     }
1746
1747     if (pic_param->b_picture_fraction < 21)
1748         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1749
1750     picture_type = va_to_gen6_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1751     
1752     if (profile == GEN6_VC1_ADVANCED_PROFILE && 
1753         picture_type == GEN6_VC1_I_PICTURE)
1754         picture_type = GEN6_VC1_BI_PICTURE;
1755
1756     if (picture_type == GEN6_VC1_I_PICTURE || picture_type == GEN6_VC1_BI_PICTURE) /* I picture */
1757         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1758     else
1759         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1760
1761
1762     if (picture_type == GEN6_VC1_B_PICTURE) {
1763         struct gen6_vc1_surface *gen6_vc1_surface = NULL;
1764
1765         obj_surface = SURFACE(pic_param->backward_reference_picture);
1766         assert(obj_surface);
1767         gen6_vc1_surface = obj_surface->private_data;
1768
1769         if (!gen6_vc1_surface || 
1770             (va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_I_PICTURE ||
1771              va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_BI_PICTURE))
1772             dmv_surface_valid = 0;
1773         else
1774             dmv_surface_valid = 1;
1775     }
1776
1777     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1778
1779     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1780         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1781     else {
1782         if (pic_param->picture_fields.bits.top_field_first)
1783             fcm = 2;
1784         else
1785             fcm = 3;
1786     }
1787
1788     if (pic_param->picture_fields.bits.picture_type == GEN6_VC1_B_PICTURE) { /* B picture */
1789         brfd = pic_param->reference_fields.bits.reference_distance;
1790         brfd = (scale_factor * brfd) >> 8;
1791         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1792
1793         if (brfd < 0)
1794             brfd = 0;
1795     }
1796
1797     overlap = pic_param->sequence_fields.bits.overlap;
1798     if (profile != GEN6_VC1_ADVANCED_PROFILE && pic_param->pic_quantizer_fields.bits.pic_quantizer_scale < 9)
1799         overlap = 0;
1800
1801     assert(pic_param->conditional_overlap_flag < 3);
1802     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1803
1804     BEGIN_BCS_BATCH(ctx, 6);
1805     OUT_BCS_BATCH(ctx, MFX_VC1_PIC_STATE | (6 - 2));
1806     OUT_BCS_BATCH(ctx,
1807                   (ALIGN(pic_param->coded_height, 16) / 16) << 16 |
1808                   (ALIGN(pic_param->coded_width, 16) / 16));
1809     OUT_BCS_BATCH(ctx,
1810                   pic_param->sequence_fields.bits.syncmarker << 31 |
1811                   1 << 29 | /* concealment */
1812                   alt_pq << 24 |
1813                   pic_param->entrypoint_fields.bits.loopfilter << 23 |
1814                   overlap << 22 |
1815                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 21 | /* implicit quantizer */
1816                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 16 |
1817                   alt_pquant_edge_mask << 12 |
1818                   alt_pquant_config << 10 |
1819                   pic_param->pic_quantizer_fields.bits.half_qp << 9 |
1820                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 8 |
1821                   va_to_gen6_vc1_condover[pic_param->conditional_overlap_flag] << 6 |
1822                   !pic_param->picture_fields.bits.is_first_field << 5 |
1823                   picture_type << 2 |
1824                   fcm << 0);
1825     OUT_BCS_BATCH(ctx,
1826                   !!pic_param->bitplane_present.value << 23 |
1827                   !pic_param->bitplane_present.flags.bp_forward_mb << 22 |
1828                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 21 |
1829                   !pic_param->bitplane_present.flags.bp_skip_mb << 20 |
1830                   !pic_param->bitplane_present.flags.bp_direct_mb << 19 |
1831                   !pic_param->bitplane_present.flags.bp_overflags << 18 |
1832                   !pic_param->bitplane_present.flags.bp_ac_pred << 17 |
1833                   !pic_param->bitplane_present.flags.bp_field_tx << 16 |
1834                   pic_param->mv_fields.bits.extended_dmv_range << 14 |
1835                   pic_param->mv_fields.bits.extended_mv_range << 12 |
1836                   pic_param->mv_fields.bits.four_mv_switch << 11 |
1837                   pic_param->fast_uvmc_flag << 10 |
1838                   unified_mv_mode << 8 |
1839                   ref_field_pic_polarity << 6 |
1840                   pic_param->reference_fields.bits.num_reference_pictures << 5 |
1841                   pic_param->reference_fields.bits.reference_distance << 0);
1842     OUT_BCS_BATCH(ctx,
1843                   scale_factor << 24 |
1844                   pic_param->mv_fields.bits.mv_table << 20 |
1845                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1846                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1847                   va_to_gen6_vc1_ttfrm[pic_param->transform_fields.bits.frame_level_transform_type] << 12 |
1848                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1849                   pic_param->mb_mode_table << 8 |
1850                   trans_ac_y << 6 |
1851                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1852                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1853                   pic_param->cbp_table << 0);
1854     OUT_BCS_BATCH(ctx,
1855                   dmv_surface_valid << 13 |
1856                   brfd << 8 |
1857                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1));
1858     ADVANCE_BCS_BATCH(ctx);
1859 }
1860
1861 static void
1862 gen6_mfd_vc1_pred_pipe_state(VADriverContextP ctx, struct decode_state *decode_state)
1863 {
1864     VAPictureParameterBufferVC1 *pic_param;
1865     int interpolation_mode = 0;
1866
1867     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1868     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1869
1870     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1871         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1872          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1873         interpolation_mode = 2; /* Half-pel bilinear */
1874     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1875              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1876               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1877         interpolation_mode = 0; /* Half-pel bicubic */
1878     else
1879         interpolation_mode = 1; /* Quarter-pel bicubic */
1880
1881     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1882     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1883
1884     BEGIN_BCS_BATCH(ctx, 7);
1885     OUT_BCS_BATCH(ctx, MFX_VC1_PRED_PIPE_STATE | (7 - 2));
1886     OUT_BCS_BATCH(ctx,
1887                   0 << 8 | /* FIXME: interlace mode */
1888                   pic_param->rounding_control << 4 |
1889                   va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile] << 2);
1890     OUT_BCS_BATCH(ctx,
1891                   pic_param->luma_shift << 16 |
1892                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1893     OUT_BCS_BATCH(ctx, 0);
1894     OUT_BCS_BATCH(ctx, 0);
1895     OUT_BCS_BATCH(ctx, 0);
1896     OUT_BCS_BATCH(ctx,
1897                   interpolation_mode << 19 |
1898                   pic_param->fast_uvmc_flag << 18 |
1899                   0 << 17 | /* FIXME: scale up or down ??? */
1900                   pic_param->range_reduction_frame << 16 |
1901                   0 << 6 |
1902                   0 << 4 |
1903                   0 << 2 | /* FIXME: Intensity Compensation */
1904                   0 << 0);
1905     ADVANCE_BCS_BATCH(ctx);
1906 }
1907
1908
1909 static void
1910 gen6_mfd_vc1_directmode_state(VADriverContextP ctx, struct decode_state *decode_state)
1911 {
1912     VAPictureParameterBufferVC1 *pic_param;
1913     struct i965_driver_data *i965 = i965_driver_data(ctx);
1914     struct object_surface *obj_surface;
1915     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1916
1917     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1918     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1919
1920     obj_surface = SURFACE(decode_state->current_render_target);
1921
1922     if (obj_surface && obj_surface->private_data) {
1923         dmv_write_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1924     }
1925
1926     obj_surface = SURFACE(pic_param->backward_reference_picture);
1927
1928     if (obj_surface && obj_surface->private_data) {
1929         dmv_read_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1930     }
1931
1932     BEGIN_BCS_BATCH(ctx, 3);
1933     OUT_BCS_BATCH(ctx, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
1934
1935     if (dmv_write_buffer)
1936         OUT_BCS_RELOC(ctx, dmv_write_buffer,
1937                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1938                       0);
1939     else
1940         OUT_BCS_BATCH(ctx, 0);
1941
1942     if (dmv_read_buffer)
1943         OUT_BCS_RELOC(ctx, dmv_read_buffer,
1944                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1945                       0);
1946     else
1947         OUT_BCS_BATCH(ctx, 0);
1948                   
1949     ADVANCE_BCS_BATCH(ctx);
1950 }
1951
1952 static int
1953 gen6_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset)
1954 {
1955     int out_slice_data_bit_offset;
1956     int slice_header_size = in_slice_data_bit_offset / 8;
1957     int i, j;
1958
1959     for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1960         if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1961             i++, j += 2;
1962         }
1963     }
1964
1965     out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1966
1967     return out_slice_data_bit_offset;
1968 }
1969
1970 static void
1971 gen6_mfd_vc1_bsd_object(VADriverContextP ctx,
1972                         VAPictureParameterBufferVC1 *pic_param,
1973                         VASliceParameterBufferVC1 *slice_param,
1974                         VASliceParameterBufferVC1 *next_slice_param,
1975                         dri_bo *slice_data_bo)
1976 {
1977     int next_slice_start_vert_pos;
1978     int macroblock_offset;
1979     uint8_t *slice_data = NULL;
1980
1981     dri_bo_map(slice_data_bo, 0);
1982     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1983     macroblock_offset = gen6_mfd_vc1_get_macroblock_bit_offset(slice_data, slice_param->macroblock_offset);
1984     dri_bo_unmap(slice_data_bo);
1985
1986     if (next_slice_param)
1987         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1988     else
1989         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1990
1991     BEGIN_BCS_BATCH(ctx, 4);
1992     OUT_BCS_BATCH(ctx, MFD_VC1_BSD_OBJECT | (4 - 2));
1993     OUT_BCS_BATCH(ctx, 
1994                   slice_param->slice_data_size - (macroblock_offset >> 3));
1995     OUT_BCS_BATCH(ctx, 
1996                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1997     OUT_BCS_BATCH(ctx,
1998                   slice_param->slice_vertical_position << 24 |
1999                   next_slice_start_vert_pos << 16 |
2000                   (macroblock_offset & 0x7));
2001     ADVANCE_BCS_BATCH(ctx);
2002 }
2003
2004 static void
2005 gen6_mfd_vc1_decode_picture(VADriverContextP ctx, struct decode_state *decode_state)
2006 {
2007     VAPictureParameterBufferVC1 *pic_param;
2008     VASliceParameterBufferVC1 *slice_param, *next_slice_param;
2009     dri_bo *slice_data_bo;
2010     int i, j;
2011
2012     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2013     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2014
2015     gen6_mfd_vc1_decode_init(ctx, decode_state);
2016     intel_batchbuffer_start_atomic_bcs(ctx, 0x1000);
2017     intel_batchbuffer_emit_mi_flush_bcs(ctx);
2018     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1);
2019     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1);
2020     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1);
2021     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1);
2022     gen6_mfd_vc1_pic_state(ctx, decode_state);
2023     gen6_mfd_vc1_pred_pipe_state(ctx, decode_state);
2024     gen6_mfd_vc1_directmode_state(ctx, decode_state);
2025
2026     assert(decode_state->num_slice_params == 1);
2027     for (j = 0; j < decode_state->num_slice_params; j++) {
2028         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2029         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
2030         slice_data_bo = decode_state->slice_datas[j]->bo;
2031         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1);
2032
2033         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2034             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2035
2036             if (i < decode_state->slice_params[j]->num_elements - 1)
2037                 next_slice_param = slice_param + 1;
2038             else
2039                 next_slice_param = NULL;
2040
2041             gen6_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo);
2042             slice_param++;
2043         }
2044     }
2045
2046     intel_batchbuffer_end_atomic_bcs(ctx);
2047     intel_batchbuffer_flush_bcs(ctx);
2048 }
2049
2050 void 
2051 gen6_mfd_decode_picture(VADriverContextP ctx, 
2052                         VAProfile profile, 
2053                         struct decode_state *decode_state)
2054 {
2055     switch (profile) {
2056     case VAProfileMPEG2Simple:
2057     case VAProfileMPEG2Main:
2058         gen6_mfd_mpeg2_decode_picture(ctx, decode_state);
2059         break;
2060         
2061     case VAProfileH264Baseline:
2062     case VAProfileH264Main:
2063     case VAProfileH264High:
2064         gen6_mfd_avc_decode_picture(ctx, decode_state);
2065         break;
2066
2067     case VAProfileVC1Simple:
2068     case VAProfileVC1Main:
2069     case VAProfileVC1Advanced:
2070         gen6_mfd_vc1_decode_picture(ctx, decode_state);
2071         break;
2072
2073     default:
2074         assert(0);
2075         break;
2076     }
2077 }
2078
2079 Bool
2080 gen6_mfd_init(VADriverContextP ctx)
2081 {
2082     return True;
2083 }
2084
2085 Bool 
2086 gen6_mfd_terminate(VADriverContextP ctx)
2087 {
2088     struct i965_driver_data *i965 = i965_driver_data(ctx);
2089     struct i965_media_state *media_state = &i965->media_state;
2090     struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
2091
2092     if (gen6_mfd_context) {
2093         dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
2094         gen6_mfd_context->post_deblocking_output.bo = NULL;
2095
2096         dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
2097         gen6_mfd_context->pre_deblocking_output.bo = NULL;
2098
2099         dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
2100         gen6_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2101
2102         dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2103         gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2104
2105         dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2106         gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2107
2108         dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
2109         gen6_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2110
2111         dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
2112         gen6_mfd_context->bitplane_read_buffer.bo = NULL;
2113
2114         free(gen6_mfd_context);
2115     }
2116
2117     media_state->private_context = NULL;
2118     return True;
2119 }
2120