25f74ad82c4e8da27489457f01c2c88b184b695d
[platform/upstream/libva-intel-driver.git] / src / gen6_mfd.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36
37 #include "i965_defines.h"
38 #include "i965_drv_video.h"
39 #include "i965_decoder_utils.h"
40
41 #include "gen6_mfd.h"
42 #include "intel_media.h"
43
44 static const uint32_t zigzag_direct[64] = {
45     0,   1,  8, 16,  9,  2,  3, 10,
46     17, 24, 32, 25, 18, 11,  4,  5,
47     12, 19, 26, 33, 40, 48, 41, 34,
48     27, 20, 13,  6,  7, 14, 21, 28,
49     35, 42, 49, 56, 57, 50, 43, 36,
50     29, 22, 15, 23, 30, 37, 44, 51,
51     58, 59, 52, 45, 38, 31, 39, 46,
52     53, 60, 61, 54, 47, 55, 62, 63
53 };
54
55 static void
56 gen6_mfd_avc_frame_store_index(VADriverContextP ctx,
57                                VAPictureParameterBufferH264 *pic_param,
58                                struct gen6_mfd_context *gen6_mfd_context)
59 {
60     struct i965_driver_data *i965 = i965_driver_data(ctx);
61     int i, j;
62
63     assert(ARRAY_ELEMS(gen6_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
64
65     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
66         int found = 0;
67
68         if (gen6_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
69             continue;
70
71         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
72             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
73             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
74                 continue;
75
76             if (gen6_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
77                 found = 1;
78                 break;
79             }
80         }
81
82         if (!found) {
83             struct object_surface *obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
84             obj_surface->flags &= ~SURFACE_REFERENCED;
85
86             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
87                 dri_bo_unreference(obj_surface->bo);
88                 obj_surface->bo = NULL;
89                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
90             }
91
92             if (obj_surface->free_private_data)
93                 obj_surface->free_private_data(&obj_surface->private_data);
94
95             gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
96             gen6_mfd_context->reference_surface[i].frame_store_id = -1;
97         }
98     }
99
100     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
101         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
102         int found = 0;
103
104         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
105             continue;
106
107         for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
108             if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
109                 continue;
110             
111             if (gen6_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
112                 found = 1;
113                 break;
114             }
115         }
116
117         if (!found) {
118             int frame_idx;
119             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
120             
121             assert(obj_surface);
122             i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
123
124             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) {
125                 for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
126                     if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
127                         continue;
128
129                     if (gen6_mfd_context->reference_surface[j].frame_store_id == frame_idx)
130                         break;
131                 }
132
133                 if (j == ARRAY_ELEMS(gen6_mfd_context->reference_surface))
134                     break;
135             }
136
137             assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface));
138
139             for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
140                 if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
141                     gen6_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
142                     gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
143                     break;
144                 }
145             }
146         }
147     }
148
149     /* sort */
150     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface) - 1; i++) {
151         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
152             gen6_mfd_context->reference_surface[i].frame_store_id == i)
153             continue;
154
155         for (j = i + 1; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
156             if (gen6_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
157                 gen6_mfd_context->reference_surface[j].frame_store_id == i) {
158                 VASurfaceID id = gen6_mfd_context->reference_surface[i].surface_id;
159                 int frame_idx = gen6_mfd_context->reference_surface[i].frame_store_id;
160
161                 gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[j].surface_id;
162                 gen6_mfd_context->reference_surface[i].frame_store_id = gen6_mfd_context->reference_surface[j].frame_store_id;
163                 gen6_mfd_context->reference_surface[j].surface_id = id;
164                 gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
165                 break;
166             }
167         }
168     }
169 }
170
171 static void
172 gen6_mfd_init_avc_surface(VADriverContextP ctx, 
173                           VAPictureParameterBufferH264 *pic_param,
174                           struct object_surface *obj_surface)
175 {
176     struct i965_driver_data *i965 = i965_driver_data(ctx);
177     GenAvcSurface *gen6_avc_surface = obj_surface->private_data;
178     int height_in_mbs;
179
180     obj_surface->free_private_data = gen_free_avc_surface;
181     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
182
183     if (!gen6_avc_surface) {
184         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
185         assert((obj_surface->size & 0x3f) == 0);
186         obj_surface->private_data = gen6_avc_surface;
187     }
188
189     gen6_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
190                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
191
192     if (gen6_avc_surface->dmv_top == NULL) {
193         gen6_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
194                                                  "direct mv w/r buffer",
195                                                  128 * height_in_mbs * 64,      /* scalable with frame height */
196                                                  0x1000);
197     }
198
199     if (gen6_avc_surface->dmv_bottom_flag &&
200         gen6_avc_surface->dmv_bottom == NULL) {
201         gen6_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
202                                                     "direct mv w/r buffer",
203                                                     128 * height_in_mbs * 64,   /* scalable with frame height */
204                                                     0x1000);
205     }
206 }
207
208 static void
209 gen6_mfd_pipe_mode_select(VADriverContextP ctx,
210                           struct decode_state *decode_state,
211                           int standard_select,
212                           struct gen6_mfd_context *gen6_mfd_context)
213 {
214     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
215
216     assert(standard_select == MFX_FORMAT_MPEG2 ||
217            standard_select == MFX_FORMAT_AVC ||
218            standard_select == MFX_FORMAT_VC1);
219
220     BEGIN_BCS_BATCH(batch, 4);
221     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
222     OUT_BCS_BATCH(batch,
223                   (MFD_MODE_VLD << 16) | /* VLD mode */
224                   (0 << 10) | /* disable Stream-Out */
225                   (gen6_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
226                   (gen6_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
227                   (0 << 7)  | /* disable TLB prefectch */
228                   (0 << 5)  | /* not in stitch mode */
229                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
230                   (standard_select << 0));
231     OUT_BCS_BATCH(batch,
232                   (0 << 20) | /* round flag in PB slice */
233                   (0 << 19) | /* round flag in Intra8x8 */
234                   (0 << 7)  | /* expand NOA bus flag */
235                   (1 << 6)  | /* must be 1 */
236                   (0 << 5)  | /* disable clock gating for NOA */
237                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
238                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
239                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
240                   (0 << 1)  | /* AVC long field motion vector */
241                   (1 << 0));  /* always calculate AVC ILDB boundary strength */
242     OUT_BCS_BATCH(batch, 0);
243     ADVANCE_BCS_BATCH(batch);
244 }
245
246 static void
247 gen6_mfd_surface_state(VADriverContextP ctx,
248                        struct decode_state *decode_state,
249                        int standard_select,
250                        struct gen6_mfd_context *gen6_mfd_context)
251 {
252     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
253     struct i965_driver_data *i965 = i965_driver_data(ctx);
254     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
255     assert(obj_surface);
256     
257     BEGIN_BCS_BATCH(batch, 6);
258     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch,
261                   ((obj_surface->orig_height - 1) << 19) |
262                   ((obj_surface->orig_width - 1) << 6));
263     OUT_BCS_BATCH(batch,
264                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
265                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
266                   (0 << 22) | /* surface object control state, FIXME??? */
267                   ((obj_surface->width - 1) << 3) | /* pitch */
268                   (0 << 2)  | /* must be 0 for interleave U/V */
269                   (1 << 1)  | /* must be y-tiled */
270                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, FIXME: must be 1 ??? */
271     OUT_BCS_BATCH(batch,
272                   (0 << 16) | /* must be 0 for interleave U/V */
273                   (obj_surface->height)); /* y offset for U(cb) */
274     OUT_BCS_BATCH(batch, 0);
275     ADVANCE_BCS_BATCH(batch);
276 }
277
278 static void
279 gen6_mfd_pipe_buf_addr_state(VADriverContextP ctx,
280                              struct decode_state *decode_state,
281                              int standard_select,
282                              struct gen6_mfd_context *gen6_mfd_context)
283 {
284     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
285     struct i965_driver_data *i965 = i965_driver_data(ctx);
286     int i;
287
288     BEGIN_BCS_BATCH(batch, 24);
289     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
290     if (gen6_mfd_context->pre_deblocking_output.valid)
291         OUT_BCS_RELOC(batch, gen6_mfd_context->pre_deblocking_output.bo,
292                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
293                       0);
294     else
295         OUT_BCS_BATCH(batch, 0);
296
297     if (gen6_mfd_context->post_deblocking_output.valid)
298         OUT_BCS_RELOC(batch, gen6_mfd_context->post_deblocking_output.bo,
299                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
300                       0);
301     else
302         OUT_BCS_BATCH(batch, 0);
303
304     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
305     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
306
307     if (gen6_mfd_context->intra_row_store_scratch_buffer.valid)
308         OUT_BCS_RELOC(batch, gen6_mfd_context->intra_row_store_scratch_buffer.bo,
309                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
310                       0);
311     else
312         OUT_BCS_BATCH(batch, 0);
313
314     if (gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
315         OUT_BCS_RELOC(batch, gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
316                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
317                       0);
318     else
319         OUT_BCS_BATCH(batch, 0);
320
321     /* DW 7..22 */
322     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
323         struct object_surface *obj_surface;
324
325         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
326             obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
327             assert(obj_surface && obj_surface->bo);
328
329             OUT_BCS_RELOC(batch, obj_surface->bo,
330                           I915_GEM_DOMAIN_INSTRUCTION, 0,
331                           0);
332         } else {
333             OUT_BCS_BATCH(batch, 0);
334         }
335     }
336
337     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
338     ADVANCE_BCS_BATCH(batch);
339 }
340
341 static void
342 gen6_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
343                                  dri_bo *slice_data_bo,
344                                  int standard_select,
345                                  struct gen6_mfd_context *gen6_mfd_context)
346 {
347     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
348
349     BEGIN_BCS_BATCH(batch, 11);
350     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
351     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
352     OUT_BCS_BATCH(batch, 0);
353     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
354     OUT_BCS_BATCH(batch, 0);
355     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
356     OUT_BCS_BATCH(batch, 0);
357     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
358     OUT_BCS_BATCH(batch, 0);
359     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
360     OUT_BCS_BATCH(batch, 0);
361     ADVANCE_BCS_BATCH(batch);
362 }
363
364 static void
365 gen6_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
366                                  struct decode_state *decode_state,
367                                  int standard_select,
368                                  struct gen6_mfd_context *gen6_mfd_context)
369 {
370     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
371
372     BEGIN_BCS_BATCH(batch, 4);
373     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
374
375     if (gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
376         OUT_BCS_RELOC(batch, gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
377                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
378                       0);
379     else
380         OUT_BCS_BATCH(batch, 0);
381
382     if (gen6_mfd_context->mpr_row_store_scratch_buffer.valid)
383         OUT_BCS_RELOC(batch, gen6_mfd_context->mpr_row_store_scratch_buffer.bo,
384                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
385                       0);
386     else
387         OUT_BCS_BATCH(batch, 0);
388
389     if (gen6_mfd_context->bitplane_read_buffer.valid)
390         OUT_BCS_RELOC(batch, gen6_mfd_context->bitplane_read_buffer.bo,
391                       I915_GEM_DOMAIN_INSTRUCTION, 0,
392                       0);
393     else
394         OUT_BCS_BATCH(batch, 0);
395
396     ADVANCE_BCS_BATCH(batch);
397 }
398
399 #if 0
400 static void
401 gen6_mfd_aes_state(VADriverContextP ctx,
402                    struct decode_state *decode_state,
403                    int standard_select)
404 {
405     /* FIXME */
406 }
407
408 static void
409 gen6_mfd_wait(VADriverContextP ctx,
410               struct decode_state *decode_state,
411               int standard_select,
412               struct gen6_mfd_context *gen6_mfd_context)
413 {
414     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
415
416     BEGIN_BCS_BATCH(batch, 1);
417     OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8));
418     ADVANCE_BCS_BATCH(batch);
419 }
420 #endif
421
422 static void
423 gen6_mfd_avc_img_state(VADriverContextP ctx,
424                        struct decode_state *decode_state,
425                        struct gen6_mfd_context *gen6_mfd_context)
426 {
427     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
428     int qm_present_flag;
429     int img_struct;
430     int mbaff_frame_flag;
431     unsigned int width_in_mbs, height_in_mbs;
432     VAPictureParameterBufferH264 *pic_param;
433
434     assert(decode_state->pic_param && decode_state->pic_param->buffer);
435     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
436     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
437
438     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
439         qm_present_flag = 1;
440     else
441         qm_present_flag = 0; /* built-in QM matrices */
442
443     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
444         img_struct = 1;
445     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
446         img_struct = 3;
447     else
448         img_struct = 0;
449
450     if ((img_struct & 0x1) == 0x1) {
451         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
452     } else {
453         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
454     }
455
456     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
457         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
458         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
459     } else {
460         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
461     }
462
463     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
464                         !pic_param->pic_fields.bits.field_pic_flag);
465
466     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
467     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
468     assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */
469
470     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
471     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
472            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
473     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
474
475     BEGIN_BCS_BATCH(batch, 13);
476     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
477     OUT_BCS_BATCH(batch, 
478                   ((width_in_mbs * height_in_mbs) & 0x7fff));
479     OUT_BCS_BATCH(batch, 
480                   (height_in_mbs << 16) | 
481                   (width_in_mbs << 0));
482     OUT_BCS_BATCH(batch, 
483                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
484                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
485                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
486                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
487                   (1 << 12) | /* always 1, hardware requirement */
488                   (qm_present_flag << 10) |
489                   (img_struct << 8) |
490                   (16 << 0));
491     OUT_BCS_BATCH(batch,
492                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
493                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
494                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
495                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
496                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
497                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
498                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
499                   (mbaff_frame_flag << 1) |
500                   (pic_param->pic_fields.bits.field_pic_flag << 0));
501     OUT_BCS_BATCH(batch, 0);
502     OUT_BCS_BATCH(batch, 0);
503     OUT_BCS_BATCH(batch, 0);
504     OUT_BCS_BATCH(batch, 0);
505     OUT_BCS_BATCH(batch, 0);
506     OUT_BCS_BATCH(batch, 0);
507     OUT_BCS_BATCH(batch, 0);
508     OUT_BCS_BATCH(batch, 0);
509     ADVANCE_BCS_BATCH(batch);
510 }
511
512 static void
513 gen6_mfd_avc_qm_state(VADriverContextP ctx,
514                       struct decode_state *decode_state,
515                       struct gen6_mfd_context *gen6_mfd_context)
516 {
517     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
518     int cmd_len;
519     VAIQMatrixBufferH264 *iq_matrix;
520     VAPictureParameterBufferH264 *pic_param;
521
522     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
523         return;
524
525     iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
526
527     assert(decode_state->pic_param && decode_state->pic_param->buffer);
528     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
529
530     cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */
531
532     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
533         cmd_len += 2 * 16; /* load two 8x8 scaling matrices */
534
535     BEGIN_BCS_BATCH(batch, cmd_len);
536     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | (cmd_len - 2));
537
538     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
539         OUT_BCS_BATCH(batch, 
540                       (0x0  << 8) | /* don't use default built-in matrices */
541                       (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */
542     else
543         OUT_BCS_BATCH(batch, 
544                       (0x0  << 8) | /* don't use default built-in matrices */
545                       (0x3f << 0)); /* six 4x4 scaling matrices */
546
547     intel_batchbuffer_data(batch, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4);
548
549     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
550         intel_batchbuffer_data(batch, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4);
551
552     ADVANCE_BCS_BATCH(batch);
553 }
554
555 static void
556 gen6_mfd_avc_directmode_state(VADriverContextP ctx,
557                               VAPictureParameterBufferH264 *pic_param,
558                               VASliceParameterBufferH264 *slice_param,
559                               struct gen6_mfd_context *gen6_mfd_context)
560 {
561     struct i965_driver_data *i965 = i965_driver_data(ctx);
562     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
563     struct object_surface *obj_surface;
564     GenAvcSurface *gen6_avc_surface;
565     VAPictureH264 *va_pic;
566     int i, j;
567
568     BEGIN_BCS_BATCH(batch, 69);
569     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
570
571     /* reference surfaces 0..15 */
572     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
573         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
574             obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
575             assert(obj_surface);
576             gen6_avc_surface = obj_surface->private_data;
577
578             if (gen6_avc_surface == NULL) {
579                 OUT_BCS_BATCH(batch, 0);
580                 OUT_BCS_BATCH(batch, 0);
581             } else {
582                 OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
583                               I915_GEM_DOMAIN_INSTRUCTION, 0,
584                               0);
585
586                 if (gen6_avc_surface->dmv_bottom_flag == 1)
587                     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
588                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
589                                   0);
590                 else
591                     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
592                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
593                                   0);
594             }
595         } else {
596             OUT_BCS_BATCH(batch, 0);
597             OUT_BCS_BATCH(batch, 0);
598         }
599     }
600
601     /* the current decoding frame/field */
602     va_pic = &pic_param->CurrPic;
603     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
604     obj_surface = SURFACE(va_pic->picture_id);
605     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
606     gen6_avc_surface = obj_surface->private_data;
607
608     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
609                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
610                   0);
611
612     if (gen6_avc_surface->dmv_bottom_flag == 1)
613         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
614                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
615                       0);
616     else
617         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
618                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
619                       0);
620
621     /* POC List */
622     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
623         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
624             int found = 0;
625             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
626                 va_pic = &pic_param->ReferenceFrames[j];
627                 
628                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
629                     continue;
630
631                 if (va_pic->picture_id == gen6_mfd_context->reference_surface[i].surface_id) {
632                     found = 1;
633                     break;
634                 }
635             }
636
637             assert(found == 1);
638             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
639             
640             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
641             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
642         } else {
643             OUT_BCS_BATCH(batch, 0);
644             OUT_BCS_BATCH(batch, 0);
645         }
646     }
647
648     va_pic = &pic_param->CurrPic;
649     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
650     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
651
652     ADVANCE_BCS_BATCH(batch);
653 }
654
655 static void
656 gen6_mfd_avc_slice_state(VADriverContextP ctx,
657                          VAPictureParameterBufferH264 *pic_param,
658                          VASliceParameterBufferH264 *slice_param,
659                          VASliceParameterBufferH264 *next_slice_param,
660                          struct gen6_mfd_context *gen6_mfd_context)
661 {
662     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
663     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
664     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
665     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
666     int num_ref_idx_l0, num_ref_idx_l1;
667     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
668                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
669     int weighted_pred_idc = 0;
670     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
671     int slice_type;
672
673     if (slice_param->slice_type == SLICE_TYPE_I ||
674         slice_param->slice_type == SLICE_TYPE_SI) {
675         slice_type = SLICE_TYPE_I;
676     } else if (slice_param->slice_type == SLICE_TYPE_P ||
677                slice_param->slice_type == SLICE_TYPE_SP) {
678         slice_type = SLICE_TYPE_P;
679     } else { 
680         assert(slice_param->slice_type == SLICE_TYPE_B);
681         slice_type = SLICE_TYPE_B;
682     }
683
684     if (slice_type == SLICE_TYPE_I) {
685         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
686         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
687         num_ref_idx_l0 = 0;
688         num_ref_idx_l1 = 0;
689     } else if (slice_type == SLICE_TYPE_P) {
690         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
691         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
692         num_ref_idx_l1 = 0;
693         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_pred_flag == 1);
694     } else {
695         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
696         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
697         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_bipred_idc == 1);
698     }
699
700     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
701     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
702     slice_ver_pos = first_mb_in_slice / width_in_mbs;
703
704     if (next_slice_param) {
705         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
706         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
707         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
708     } else {
709         next_slice_hor_pos = 0;
710         next_slice_ver_pos = height_in_mbs;
711     }
712
713     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
714     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
715     OUT_BCS_BATCH(batch, slice_type);
716     OUT_BCS_BATCH(batch, 
717                   (num_ref_idx_l1 << 24) |
718                   (num_ref_idx_l0 << 16) |
719                   (slice_param->chroma_log2_weight_denom << 8) |
720                   (slice_param->luma_log2_weight_denom << 0));
721     OUT_BCS_BATCH(batch, 
722                   (weighted_pred_idc << 30) |
723                   (slice_param->direct_spatial_mv_pred_flag << 29) |
724                   (slice_param->disable_deblocking_filter_idc << 27) |
725                   (slice_param->cabac_init_idc << 24) |
726                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
727                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
728                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
729     OUT_BCS_BATCH(batch, 
730                   (slice_ver_pos << 24) |
731                   (slice_hor_pos << 16) | 
732                   (first_mb_in_slice << 0));
733     OUT_BCS_BATCH(batch,
734                   (next_slice_ver_pos << 16) |
735                   (next_slice_hor_pos << 0));
736     OUT_BCS_BATCH(batch, 
737                   (next_slice_param == NULL) << 19); /* last slice flag */
738     OUT_BCS_BATCH(batch, 0);
739     OUT_BCS_BATCH(batch, 0);
740     OUT_BCS_BATCH(batch, 0);
741     OUT_BCS_BATCH(batch, 0);
742     ADVANCE_BCS_BATCH(batch);
743 }
744
745 static void
746 gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx,
747                                  VAPictureParameterBufferH264 *pic_param,
748                                  struct gen6_mfd_context *gen6_mfd_context)
749 {
750     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
751     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
752     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
753
754     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
755     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
756     OUT_BCS_BATCH(batch, 0);
757     OUT_BCS_BATCH(batch, 0);
758     OUT_BCS_BATCH(batch, 0);
759     OUT_BCS_BATCH(batch,
760                   height_in_mbs << 24 |
761                   width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
762     OUT_BCS_BATCH(batch, 0);
763     OUT_BCS_BATCH(batch, 0);
764     OUT_BCS_BATCH(batch, 0);
765     OUT_BCS_BATCH(batch, 0);
766     OUT_BCS_BATCH(batch, 0);
767     OUT_BCS_BATCH(batch, 0);
768     ADVANCE_BCS_BATCH(batch);
769 }
770
771 static inline void
772 gen6_mfd_avc_ref_idx_state(VADriverContextP ctx,
773                            VAPictureParameterBufferH264 *pic_param,
774                            VASliceParameterBufferH264 *slice_param,
775                            struct gen6_mfd_context *gen6_mfd_context)
776 {
777     gen6_send_avc_ref_idx_state(
778         gen6_mfd_context->base.batch,
779         slice_param,
780         gen6_mfd_context->reference_surface
781     );
782 }
783
784 static void
785 gen6_mfd_avc_weightoffset_state(VADriverContextP ctx,
786                                 VAPictureParameterBufferH264 *pic_param,
787                                 VASliceParameterBufferH264 *slice_param,
788                                 struct gen6_mfd_context *gen6_mfd_context)
789 {
790     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
791     int i, j, num_weight_offset_table = 0;
792     short weightoffsets[32 * 6];
793
794     if ((slice_param->slice_type == SLICE_TYPE_P ||
795          slice_param->slice_type == SLICE_TYPE_SP) &&
796         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
797         num_weight_offset_table = 1;
798     }
799     
800     if ((slice_param->slice_type == SLICE_TYPE_B) &&
801         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
802         num_weight_offset_table = 2;
803     }
804
805     for (i = 0; i < num_weight_offset_table; i++) {
806         BEGIN_BCS_BATCH(batch, 98);
807         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
808         OUT_BCS_BATCH(batch, i);
809
810         if (i == 0) {
811             for (j = 0; j < 32; j++) {
812                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
813                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
814                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
815                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
816                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
817                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
818             }
819         } else {
820             for (j = 0; j < 32; j++) {
821                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
822                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
823                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
824                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
825                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
826                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
827             }
828         }
829
830         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
831         ADVANCE_BCS_BATCH(batch);
832     }
833 }
834
835 static int
836 gen6_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
837 {
838     int out_slice_data_bit_offset;
839     int slice_header_size = in_slice_data_bit_offset / 8;
840     int i, j;
841
842     for (i = 0, j = 0; i < slice_header_size; i++, j++) {
843         if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
844             i++, j += 2;
845         }
846     }
847
848     out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
849
850     if (mode_flag == ENTROPY_CABAC)
851         out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
852
853     return out_slice_data_bit_offset;
854 }
855
856 static void
857 gen6_mfd_avc_bsd_object(VADriverContextP ctx,
858                         VAPictureParameterBufferH264 *pic_param,
859                         VASliceParameterBufferH264 *slice_param,
860                         dri_bo *slice_data_bo,
861                         struct gen6_mfd_context *gen6_mfd_context)
862 {
863     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
864     int slice_data_bit_offset;
865     uint8_t *slice_data = NULL;
866
867     dri_bo_map(slice_data_bo, 0);
868     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
869     slice_data_bit_offset = gen6_mfd_avc_get_slice_bit_offset(slice_data,
870                                                               pic_param->pic_fields.bits.entropy_coding_mode_flag,
871                                                               slice_param->slice_data_bit_offset);
872     dri_bo_unmap(slice_data_bo);
873
874     BEGIN_BCS_BATCH(batch, 6);
875     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
876     OUT_BCS_BATCH(batch, 
877                   ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
878     OUT_BCS_BATCH(batch, slice_param->slice_data_offset + (slice_data_bit_offset >> 3));
879     OUT_BCS_BATCH(batch,
880                   (0 << 31) |
881                   (0 << 14) |
882                   (0 << 12) |
883                   (0 << 10) |
884                   (0 << 8));
885     OUT_BCS_BATCH(batch,
886                   (0 << 16) |
887                   (0 << 6)  |
888                   ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
889     OUT_BCS_BATCH(batch, 0);
890     ADVANCE_BCS_BATCH(batch);
891 }
892
893 static void
894 gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx,
895                                       VAPictureParameterBufferH264 *pic_param,
896                                       struct gen6_mfd_context *gen6_mfd_context)
897 {
898     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
899
900     BEGIN_BCS_BATCH(batch, 6);
901     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
902     OUT_BCS_BATCH(batch, 0);
903     OUT_BCS_BATCH(batch, 0);
904     OUT_BCS_BATCH(batch, 0);
905     OUT_BCS_BATCH(batch, 0);
906     OUT_BCS_BATCH(batch, 0);
907     ADVANCE_BCS_BATCH(batch);
908 }
909
910 static void
911 gen6_mfd_avc_phantom_slice(VADriverContextP ctx,
912                            VAPictureParameterBufferH264 *pic_param,
913                            struct gen6_mfd_context *gen6_mfd_context)
914 {
915     gen6_mfd_avc_phantom_slice_state(ctx, pic_param, gen6_mfd_context);
916     gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param, gen6_mfd_context);
917 }
918
919 static void
920 gen6_mfd_avc_decode_init(VADriverContextP ctx,
921                          struct decode_state *decode_state,
922                          struct gen6_mfd_context *gen6_mfd_context)
923 {
924     VAPictureParameterBufferH264 *pic_param;
925     VASliceParameterBufferH264 *slice_param;
926     VAPictureH264 *va_pic;
927     struct i965_driver_data *i965 = i965_driver_data(ctx);
928     struct object_surface *obj_surface;
929     dri_bo *bo;
930     int i, j, enable_avc_ildb = 0;
931     int width_in_mbs;
932
933     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
934         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
935         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
936
937         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
938             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
939             assert((slice_param->slice_type == SLICE_TYPE_I) ||
940                    (slice_param->slice_type == SLICE_TYPE_SI) ||
941                    (slice_param->slice_type == SLICE_TYPE_P) ||
942                    (slice_param->slice_type == SLICE_TYPE_SP) ||
943                    (slice_param->slice_type == SLICE_TYPE_B));
944
945             if (slice_param->disable_deblocking_filter_idc != 1) {
946                 enable_avc_ildb = 1;
947                 break;
948             }
949
950             slice_param++;
951         }
952     }
953
954     assert(decode_state->pic_param && decode_state->pic_param->buffer);
955     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
956     gen6_mfd_avc_frame_store_index(ctx, pic_param, gen6_mfd_context);
957     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
958
959     /* Current decoded picture */
960     va_pic = &pic_param->CurrPic;
961     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
962     obj_surface = SURFACE(va_pic->picture_id);
963     assert(obj_surface);
964     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
965     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
966     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
967     gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
968
969     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
970     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
971     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
972     gen6_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
973
974     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
975     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
976     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
977     gen6_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
978
979     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
980     bo = dri_bo_alloc(i965->intel.bufmgr,
981                       "intra row store",
982                       width_in_mbs * 64,
983                       0x1000);
984     assert(bo);
985     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
986     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
987
988     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
989     bo = dri_bo_alloc(i965->intel.bufmgr,
990                       "deblocking filter row store",
991                       width_in_mbs * 64 * 4,
992                       0x1000);
993     assert(bo);
994     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
995     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
996
997     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
998     bo = dri_bo_alloc(i965->intel.bufmgr,
999                       "bsd mpc row store",
1000                       width_in_mbs * 96,
1001                       0x1000);
1002     assert(bo);
1003     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1004     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1005
1006     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
1007     bo = dri_bo_alloc(i965->intel.bufmgr,
1008                       "mpr row store",
1009                       width_in_mbs * 64,
1010                       0x1000);
1011     assert(bo);
1012     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
1013     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
1014
1015     gen6_mfd_context->bitplane_read_buffer.valid = 0;
1016 }
1017
1018 static void
1019 gen6_mfd_avc_decode_picture(VADriverContextP ctx,
1020                             struct decode_state *decode_state,
1021                             struct gen6_mfd_context *gen6_mfd_context)
1022 {
1023     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1024     VAPictureParameterBufferH264 *pic_param;
1025     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
1026     dri_bo *slice_data_bo;
1027     int i, j;
1028
1029     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1030     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1031     gen6_mfd_avc_decode_init(ctx, decode_state, gen6_mfd_context);
1032
1033     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1034     intel_batchbuffer_emit_mi_flush(batch);
1035     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1036     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1037     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1038     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1039     gen6_mfd_avc_img_state(ctx, decode_state, gen6_mfd_context);
1040     gen6_mfd_avc_qm_state(ctx, decode_state, gen6_mfd_context);
1041
1042     for (j = 0; j < decode_state->num_slice_params; j++) {
1043         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1044         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1045         slice_data_bo = decode_state->slice_datas[j]->bo;
1046         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen6_mfd_context);
1047
1048         if (j == decode_state->num_slice_params - 1)
1049             next_slice_group_param = NULL;
1050         else
1051             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1052
1053         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1054             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1055             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1056                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1057                    (slice_param->slice_type == SLICE_TYPE_P) ||
1058                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1059                    (slice_param->slice_type == SLICE_TYPE_B));
1060
1061             if (i < decode_state->slice_params[j]->num_elements - 1)
1062                 next_slice_param = slice_param + 1;
1063             else
1064                 next_slice_param = next_slice_group_param;
1065
1066             gen6_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen6_mfd_context);
1067             gen6_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
1068             gen6_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen6_mfd_context);
1069             gen6_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen6_mfd_context);
1070             gen6_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen6_mfd_context);
1071             slice_param++;
1072         }
1073     }
1074     
1075     gen6_mfd_avc_phantom_slice(ctx, pic_param, gen6_mfd_context);
1076     intel_batchbuffer_end_atomic(batch);
1077     intel_batchbuffer_flush(batch);
1078 }
1079
1080 static void
1081 gen6_mfd_mpeg2_decode_init(VADriverContextP ctx,
1082                            struct decode_state *decode_state,
1083                            struct gen6_mfd_context *gen6_mfd_context)
1084 {
1085     VAPictureParameterBufferMPEG2 *pic_param;
1086     struct i965_driver_data *i965 = i965_driver_data(ctx);
1087     struct object_surface *obj_surface;
1088     dri_bo *bo;
1089     unsigned int width_in_mbs;
1090
1091     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1092     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1093     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1094
1095     mpeg2_set_reference_surfaces(
1096         ctx,
1097         gen6_mfd_context->reference_surface,
1098         decode_state,
1099         pic_param
1100     );
1101
1102     /* Current decoded picture */
1103     obj_surface = SURFACE(decode_state->current_render_target);
1104     assert(obj_surface);
1105     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1106
1107     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1108     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1109     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1110     gen6_mfd_context->pre_deblocking_output.valid = 1;
1111
1112     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1113     bo = dri_bo_alloc(i965->intel.bufmgr,
1114                       "bsd mpc row store",
1115                       width_in_mbs * 96,
1116                       0x1000);
1117     assert(bo);
1118     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1119     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1120
1121     gen6_mfd_context->post_deblocking_output.valid = 0;
1122     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1123     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1124     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1125     gen6_mfd_context->bitplane_read_buffer.valid = 0;
1126 }
1127
1128 static void
1129 gen6_mfd_mpeg2_pic_state(VADriverContextP ctx,
1130                          struct decode_state *decode_state,
1131                          struct gen6_mfd_context *gen6_mfd_context)
1132 {
1133     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1134     VAPictureParameterBufferMPEG2 *pic_param;
1135     unsigned int tff, pic_structure;
1136
1137     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1138     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1139
1140     pic_structure = pic_param->picture_coding_extension.bits.picture_structure;
1141     if (pic_structure == MPEG_FRAME)
1142         tff = pic_param->picture_coding_extension.bits.top_field_first;
1143     else
1144         tff = !(pic_param->picture_coding_extension.bits.is_first_field ^
1145                 (pic_structure & MPEG_TOP_FIELD));
1146
1147     BEGIN_BCS_BATCH(batch, 4);
1148     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (4 - 2));
1149     OUT_BCS_BATCH(batch,
1150                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1151                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1152                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1153                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1154                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1155                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1156                   tff << 11 |
1157                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1158                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1159                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1160                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1161                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1162     OUT_BCS_BATCH(batch,
1163                   pic_param->picture_coding_type << 9);
1164     OUT_BCS_BATCH(batch,
1165                   (ALIGN(pic_param->vertical_size, 16) / 16) << 16 |
1166                   (ALIGN(pic_param->horizontal_size, 16) / 16));
1167     ADVANCE_BCS_BATCH(batch);
1168 }
1169
1170 static void
1171 gen6_mfd_mpeg2_qm_state(VADriverContextP ctx,
1172                         struct decode_state *decode_state,
1173                         struct gen6_mfd_context *gen6_mfd_context)
1174 {
1175     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1176     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen6_mfd_context->iq_matrix.mpeg2;
1177     int i, j;
1178
1179     /* Update internal QM state */
1180     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1181         VAIQMatrixBufferMPEG2 * const iq_matrix =
1182             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1183
1184         gen_iq_matrix->load_intra_quantiser_matrix =
1185             iq_matrix->load_intra_quantiser_matrix;
1186         if (iq_matrix->load_intra_quantiser_matrix) {
1187             for (j = 0; j < 64; j++)
1188                 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1189                     iq_matrix->intra_quantiser_matrix[j];
1190         }
1191
1192         gen_iq_matrix->load_non_intra_quantiser_matrix =
1193             iq_matrix->load_non_intra_quantiser_matrix;
1194         if (iq_matrix->load_non_intra_quantiser_matrix) {
1195             for (j = 0; j < 64; j++)
1196                 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1197                     iq_matrix->non_intra_quantiser_matrix[j];
1198         }
1199     }
1200
1201     /* Commit QM state to HW */
1202     for (i = 0; i < 2; i++) {
1203         unsigned char *qm = NULL;
1204
1205         if (i == 0) {
1206             if (gen_iq_matrix->load_intra_quantiser_matrix)
1207                 qm = gen_iq_matrix->intra_quantiser_matrix;
1208         } else {
1209             if (gen_iq_matrix->load_non_intra_quantiser_matrix)
1210                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1211         }
1212
1213         if (!qm)
1214             continue;
1215
1216         BEGIN_BCS_BATCH(batch, 18);
1217         OUT_BCS_BATCH(batch, MFX_MPEG2_QM_STATE | (18 - 2));
1218         OUT_BCS_BATCH(batch, i);
1219         intel_batchbuffer_data(batch, qm, 64);
1220         ADVANCE_BCS_BATCH(batch);
1221     }
1222 }
1223
1224 static void
1225 gen6_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1226                           VAPictureParameterBufferMPEG2 *pic_param,
1227                           VASliceParameterBufferMPEG2 *slice_param,
1228                           VASliceParameterBufferMPEG2 *next_slice_param,
1229                           struct gen6_mfd_context *gen6_mfd_context)
1230 {
1231     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1232     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1233     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1234
1235     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1236         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1237         is_field_pic = 1;
1238     is_field_pic_wa = is_field_pic &&
1239         gen6_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1240
1241     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1242     hpos0 = slice_param->slice_horizontal_position;
1243
1244     if (next_slice_param == NULL) {
1245         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1246         hpos1 = 0;
1247     } else {
1248         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1249         hpos1 = next_slice_param->slice_horizontal_position;
1250     }
1251
1252     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1253
1254     BEGIN_BCS_BATCH(batch, 5);
1255     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1256     OUT_BCS_BATCH(batch, 
1257                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1258     OUT_BCS_BATCH(batch, 
1259                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1260     OUT_BCS_BATCH(batch,
1261                   hpos0 << 24 |
1262                   vpos0 << 16 |
1263                   mb_count << 8 |
1264                   (next_slice_param == NULL) << 5 |
1265                   (next_slice_param == NULL) << 3 |
1266                   (slice_param->macroblock_offset & 0x7));
1267     OUT_BCS_BATCH(batch,
1268                   slice_param->quantiser_scale_code << 24);
1269     ADVANCE_BCS_BATCH(batch);
1270 }
1271
1272 static void
1273 gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1274                               struct decode_state *decode_state,
1275                               struct gen6_mfd_context *gen6_mfd_context)
1276 {
1277     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1278     VAPictureParameterBufferMPEG2 *pic_param;
1279     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1280     dri_bo *slice_data_bo;
1281     int i, j;
1282
1283     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1284     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1285
1286     gen6_mfd_mpeg2_decode_init(ctx, decode_state, gen6_mfd_context);
1287     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1288     intel_batchbuffer_emit_mi_flush(batch);
1289     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1290     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1291     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1292     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1293     gen6_mfd_mpeg2_pic_state(ctx, decode_state, gen6_mfd_context);
1294     gen6_mfd_mpeg2_qm_state(ctx, decode_state, gen6_mfd_context);
1295
1296     if (gen6_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1297         gen6_mfd_context->wa_mpeg2_slice_vertical_position =
1298             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1299
1300     for (j = 0; j < decode_state->num_slice_params; j++) {
1301         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1302         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1303         slice_data_bo = decode_state->slice_datas[j]->bo;
1304         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen6_mfd_context);
1305
1306         if (j == decode_state->num_slice_params - 1)
1307             next_slice_group_param = NULL;
1308         else
1309             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1310
1311         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1312             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1313
1314             if (i < decode_state->slice_params[j]->num_elements - 1)
1315                 next_slice_param = slice_param + 1;
1316             else
1317                 next_slice_param = next_slice_group_param;
1318
1319             gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
1320             slice_param++;
1321         }
1322     }
1323
1324     intel_batchbuffer_end_atomic(batch);
1325     intel_batchbuffer_flush(batch);
1326 }
1327
1328 static const int va_to_gen6_vc1_pic_type[5] = {
1329     GEN6_VC1_I_PICTURE,
1330     GEN6_VC1_P_PICTURE,
1331     GEN6_VC1_B_PICTURE,
1332     GEN6_VC1_BI_PICTURE,
1333     GEN6_VC1_P_PICTURE,
1334 };
1335
1336 static const int va_to_gen6_vc1_mv[4] = {
1337     1, /* 1-MV */
1338     2, /* 1-MV half-pel */
1339     3, /* 1-MV half-pef bilinear */
1340     0, /* Mixed MV */
1341 };
1342
1343 static const int b_picture_scale_factor[21] = {
1344     128, 85,  170, 64,  192,
1345     51,  102, 153, 204, 43,
1346     215, 37,  74,  111, 148,
1347     185, 222, 32,  96,  160, 
1348     224,
1349 };
1350
1351 static const int va_to_gen6_vc1_condover[3] = {
1352     0,
1353     2,
1354     3
1355 };
1356
1357 static const int va_to_gen6_vc1_profile[4] = {
1358     GEN6_VC1_SIMPLE_PROFILE,
1359     GEN6_VC1_MAIN_PROFILE,
1360     GEN6_VC1_RESERVED_PROFILE,
1361     GEN6_VC1_ADVANCED_PROFILE
1362 };
1363
1364 static void 
1365 gen6_mfd_free_vc1_surface(void **data)
1366 {
1367     struct gen6_vc1_surface *gen6_vc1_surface = *data;
1368
1369     if (!gen6_vc1_surface)
1370         return;
1371
1372     dri_bo_unreference(gen6_vc1_surface->dmv);
1373     free(gen6_vc1_surface);
1374     *data = NULL;
1375 }
1376
1377 static void
1378 gen6_mfd_init_vc1_surface(VADriverContextP ctx, 
1379                           VAPictureParameterBufferVC1 *pic_param,
1380                           struct object_surface *obj_surface)
1381 {
1382     struct i965_driver_data *i965 = i965_driver_data(ctx);
1383     struct gen6_vc1_surface *gen6_vc1_surface = obj_surface->private_data;
1384     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1385
1386     obj_surface->free_private_data = gen6_mfd_free_vc1_surface;
1387
1388     if (!gen6_vc1_surface) {
1389         gen6_vc1_surface = calloc(sizeof(struct gen6_vc1_surface), 1);
1390         assert((obj_surface->size & 0x3f) == 0);
1391         obj_surface->private_data = gen6_vc1_surface;
1392     }
1393
1394     gen6_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1395
1396     if (gen6_vc1_surface->dmv == NULL) {
1397         gen6_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1398                                              "direct mv w/r buffer",
1399                                              128 * height_in_mbs * 64,  /* scalable with frame height */
1400                                              0x1000);
1401     }
1402 }
1403
1404 static void
1405 gen6_mfd_vc1_decode_init(VADriverContextP ctx,
1406                          struct decode_state *decode_state,
1407                          struct gen6_mfd_context *gen6_mfd_context)
1408 {
1409     VAPictureParameterBufferVC1 *pic_param;
1410     struct i965_driver_data *i965 = i965_driver_data(ctx);
1411     struct object_surface *obj_surface;
1412     int i;
1413     dri_bo *bo;
1414     int width_in_mbs;
1415
1416     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1417     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1418     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1419
1420     /* reference picture */
1421     obj_surface = SURFACE(pic_param->forward_reference_picture);
1422
1423     if (obj_surface && obj_surface->bo)
1424         gen6_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1425     else
1426         gen6_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1427
1428     obj_surface = SURFACE(pic_param->backward_reference_picture);
1429
1430     if (obj_surface && obj_surface->bo)
1431         gen6_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1432     else
1433         gen6_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1434
1435     /* must do so !!! */
1436     for (i = 2; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++)
1437         gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[i % 2].surface_id;
1438
1439     /* Current decoded picture */
1440     obj_surface = SURFACE(decode_state->current_render_target);
1441     assert(obj_surface);
1442     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1443     gen6_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1444
1445     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
1446     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1447     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
1448     gen6_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1449
1450     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1451     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1452     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1453     gen6_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1454
1455     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
1456     bo = dri_bo_alloc(i965->intel.bufmgr,
1457                       "intra row store",
1458                       width_in_mbs * 64,
1459                       0x1000);
1460     assert(bo);
1461     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1462     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1463
1464     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1465     bo = dri_bo_alloc(i965->intel.bufmgr,
1466                       "deblocking filter row store",
1467                       width_in_mbs * 6 * 64,
1468                       0x1000);
1469     assert(bo);
1470     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1471     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1472
1473     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1474     bo = dri_bo_alloc(i965->intel.bufmgr,
1475                       "bsd mpc row store",
1476                       width_in_mbs * 96,
1477                       0x1000);
1478     assert(bo);
1479     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1480     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1481
1482     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1483
1484     gen6_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1485     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
1486     
1487     if (gen6_mfd_context->bitplane_read_buffer.valid) {
1488         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1489         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1490         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1491         int src_w, src_h;
1492         uint8_t *src = NULL, *dst = NULL;
1493
1494         assert(decode_state->bit_plane->buffer);
1495         src = decode_state->bit_plane->buffer;
1496
1497         bo = dri_bo_alloc(i965->intel.bufmgr,
1498                           "VC-1 Bitplane",
1499                           bitplane_width * height_in_mbs,
1500                           0x1000);
1501         assert(bo);
1502         gen6_mfd_context->bitplane_read_buffer.bo = bo;
1503
1504         dri_bo_map(bo, True);
1505         assert(bo->virtual);
1506         dst = bo->virtual;
1507
1508         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1509             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1510                 int src_index, dst_index;
1511                 int src_shift;
1512                 uint8_t src_value;
1513
1514                 src_index = (src_h * width_in_mbs + src_w) / 2;
1515                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1516                 src_value = ((src[src_index] >> src_shift) & 0xf);
1517
1518                 dst_index = src_w / 2;
1519                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1520             }
1521
1522             if (src_w & 1)
1523                 dst[src_w / 2] >>= 4;
1524
1525             dst += bitplane_width;
1526         }
1527
1528         dri_bo_unmap(bo);
1529     } else
1530         gen6_mfd_context->bitplane_read_buffer.bo = NULL;
1531 }
1532
1533 static void
1534 gen6_mfd_vc1_pic_state(VADriverContextP ctx,
1535                        struct decode_state *decode_state,
1536                        struct gen6_mfd_context *gen6_mfd_context)
1537 {
1538     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1539     VAPictureParameterBufferVC1 *pic_param;
1540     struct i965_driver_data *i965 = i965_driver_data(ctx);
1541     struct object_surface *obj_surface;
1542     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1543     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1544     int unified_mv_mode;
1545     int ref_field_pic_polarity = 0;
1546     int scale_factor = 0;
1547     int trans_ac_y = 0;
1548     int dmv_surface_valid = 0;
1549     int brfd = 0;
1550     int fcm = 0;
1551     int picture_type;
1552     int profile;
1553     int overlap;
1554
1555     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1556     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1557
1558     profile = va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile];
1559     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1560     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1561     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1562     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1563     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1564     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1565     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1566
1567     if (dquant == 0) {
1568         alt_pquant_config = 0;
1569         alt_pquant_edge_mask = 0;
1570     } else if (dquant == 2) {
1571         alt_pquant_config = 1;
1572         alt_pquant_edge_mask = 0xf;
1573     } else {
1574         assert(dquant == 1);
1575         if (dquantfrm == 0) {
1576             alt_pquant_config = 0;
1577             alt_pquant_edge_mask = 0;
1578             alt_pq = 0;
1579         } else {
1580             assert(dquantfrm == 1);
1581             alt_pquant_config = 1;
1582
1583             switch (dqprofile) {
1584             case 3:
1585                 if (dqbilevel == 0) {
1586                     alt_pquant_config = 2;
1587                     alt_pquant_edge_mask = 0;
1588                 } else {
1589                     assert(dqbilevel == 1);
1590                     alt_pquant_config = 3;
1591                     alt_pquant_edge_mask = 0;
1592                 }
1593                 break;
1594                 
1595             case 0:
1596                 alt_pquant_edge_mask = 0xf;
1597                 break;
1598
1599             case 1:
1600                 if (dqdbedge == 3)
1601                     alt_pquant_edge_mask = 0x9;
1602                 else
1603                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1604
1605                 break;
1606
1607             case 2:
1608                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1609                 break;
1610
1611             default:
1612                 assert(0);
1613             }
1614         }
1615     }
1616
1617     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1618         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1619         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1620     } else {
1621         assert(pic_param->mv_fields.bits.mv_mode < 4);
1622         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1623     }
1624
1625     if (pic_param->sequence_fields.bits.interlace == 1 &&
1626         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1627         /* FIXME: calculate reference field picture polarity */
1628         assert(0);
1629         ref_field_pic_polarity = 0;
1630     }
1631
1632     if (pic_param->b_picture_fraction < 21)
1633         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1634
1635     picture_type = va_to_gen6_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1636     
1637     if (profile == GEN6_VC1_ADVANCED_PROFILE && 
1638         picture_type == GEN6_VC1_I_PICTURE)
1639         picture_type = GEN6_VC1_BI_PICTURE;
1640
1641     if (picture_type == GEN6_VC1_I_PICTURE || picture_type == GEN6_VC1_BI_PICTURE) /* I picture */
1642         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1643     else {
1644         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1645         /*
1646          * 8.3.6.2.1 Transform Type Selection
1647          * If variable-sized transform coding is not enabled,
1648          * then the 8x8 transform shall be used for all blocks.
1649          * it is also MFX_VC1_PIC_STATE requirement.
1650          */
1651         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1652             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1653             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1654         }
1655     }
1656
1657     if (picture_type == GEN6_VC1_B_PICTURE) {
1658         struct gen6_vc1_surface *gen6_vc1_surface = NULL;
1659
1660         obj_surface = SURFACE(pic_param->backward_reference_picture);
1661         assert(obj_surface);
1662         gen6_vc1_surface = obj_surface->private_data;
1663
1664         if (!gen6_vc1_surface || 
1665             (va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_I_PICTURE ||
1666              va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_BI_PICTURE))
1667             dmv_surface_valid = 0;
1668         else
1669             dmv_surface_valid = 1;
1670     }
1671
1672     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1673
1674     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1675         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1676     else {
1677         if (pic_param->picture_fields.bits.top_field_first)
1678             fcm = 2;
1679         else
1680             fcm = 3;
1681     }
1682
1683     if (pic_param->picture_fields.bits.picture_type == GEN6_VC1_B_PICTURE) { /* B picture */
1684         brfd = pic_param->reference_fields.bits.reference_distance;
1685         brfd = (scale_factor * brfd) >> 8;
1686         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1687
1688         if (brfd < 0)
1689             brfd = 0;
1690     }
1691
1692     overlap = pic_param->sequence_fields.bits.overlap;
1693     if (profile != GEN6_VC1_ADVANCED_PROFILE && pic_param->pic_quantizer_fields.bits.pic_quantizer_scale < 9)
1694         overlap = 0;
1695
1696     assert(pic_param->conditional_overlap_flag < 3);
1697     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1698
1699     BEGIN_BCS_BATCH(batch, 6);
1700     OUT_BCS_BATCH(batch, MFX_VC1_PIC_STATE | (6 - 2));
1701     OUT_BCS_BATCH(batch,
1702                   (ALIGN(pic_param->coded_height, 16) / 16) << 16 |
1703                   (ALIGN(pic_param->coded_width, 16) / 16));
1704     OUT_BCS_BATCH(batch,
1705                   pic_param->sequence_fields.bits.syncmarker << 31 |
1706                   1 << 29 | /* concealment */
1707                   alt_pq << 24 |
1708                   pic_param->entrypoint_fields.bits.loopfilter << 23 |
1709                   overlap << 22 |
1710                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 21 | /* implicit quantizer */
1711                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 16 |
1712                   alt_pquant_edge_mask << 12 |
1713                   alt_pquant_config << 10 |
1714                   pic_param->pic_quantizer_fields.bits.half_qp << 9 |
1715                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 8 |
1716                   va_to_gen6_vc1_condover[pic_param->conditional_overlap_flag] << 6 |
1717                   !pic_param->picture_fields.bits.is_first_field << 5 |
1718                   picture_type << 2 |
1719                   fcm << 0);
1720     OUT_BCS_BATCH(batch,
1721                   !!pic_param->bitplane_present.value << 23 |
1722                   !pic_param->bitplane_present.flags.bp_forward_mb << 22 |
1723                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 21 |
1724                   !pic_param->bitplane_present.flags.bp_skip_mb << 20 |
1725                   !pic_param->bitplane_present.flags.bp_direct_mb << 19 |
1726                   !pic_param->bitplane_present.flags.bp_overflags << 18 |
1727                   !pic_param->bitplane_present.flags.bp_ac_pred << 17 |
1728                   !pic_param->bitplane_present.flags.bp_field_tx << 16 |
1729                   pic_param->mv_fields.bits.extended_dmv_range << 14 |
1730                   pic_param->mv_fields.bits.extended_mv_range << 12 |
1731                   pic_param->mv_fields.bits.four_mv_switch << 11 |
1732                   pic_param->fast_uvmc_flag << 10 |
1733                   unified_mv_mode << 8 |
1734                   ref_field_pic_polarity << 6 |
1735                   pic_param->reference_fields.bits.num_reference_pictures << 5 |
1736                   pic_param->reference_fields.bits.reference_distance << 0);
1737     OUT_BCS_BATCH(batch,
1738                   scale_factor << 24 |
1739                   pic_param->mv_fields.bits.mv_table << 20 |
1740                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1741                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1742                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1743                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1744                   pic_param->mb_mode_table << 8 |
1745                   trans_ac_y << 6 |
1746                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1747                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1748                   pic_param->cbp_table << 0);
1749     OUT_BCS_BATCH(batch,
1750                   dmv_surface_valid << 13 |
1751                   brfd << 8 |
1752                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1));
1753     ADVANCE_BCS_BATCH(batch);
1754 }
1755
1756 static void
1757 gen6_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1758                              struct decode_state *decode_state,
1759                              struct gen6_mfd_context *gen6_mfd_context)
1760 {
1761     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1762     VAPictureParameterBufferVC1 *pic_param;
1763     int interpolation_mode = 0;
1764     int intensitycomp_single;
1765
1766     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1767     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1768
1769     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1770         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1771          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1772         interpolation_mode = 2; /* Half-pel bilinear */
1773     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1774              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1775               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1776         interpolation_mode = 0; /* Half-pel bicubic */
1777     else
1778         interpolation_mode = 1; /* Quarter-pel bicubic */
1779
1780     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1781     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1782     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1783
1784     BEGIN_BCS_BATCH(batch, 7);
1785     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (7 - 2));
1786     OUT_BCS_BATCH(batch,
1787                   0 << 8 | /* FIXME: interlace mode */
1788                   pic_param->rounding_control << 4 |
1789                   va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile] << 2);
1790     OUT_BCS_BATCH(batch,
1791                   pic_param->luma_shift << 16 |
1792                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1793     OUT_BCS_BATCH(batch, 0);
1794     OUT_BCS_BATCH(batch, 0);
1795     OUT_BCS_BATCH(batch, 0);
1796     OUT_BCS_BATCH(batch,
1797                   interpolation_mode << 19 |
1798                   pic_param->fast_uvmc_flag << 18 |
1799                   0 << 17 | /* FIXME: scale up or down ??? */
1800                   pic_param->range_reduction_frame << 16 |
1801                   0 << 6 | /* FIXME: double ??? */
1802                   0 << 4 |
1803                   intensitycomp_single << 2 |
1804                   intensitycomp_single << 0);
1805     ADVANCE_BCS_BATCH(batch);
1806 }
1807
1808
1809 static void
1810 gen6_mfd_vc1_directmode_state(VADriverContextP ctx,
1811                               struct decode_state *decode_state,
1812                               struct gen6_mfd_context *gen6_mfd_context)
1813 {
1814     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1815     VAPictureParameterBufferVC1 *pic_param;
1816     struct i965_driver_data *i965 = i965_driver_data(ctx);
1817     struct object_surface *obj_surface;
1818     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1819
1820     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1821     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1822
1823     obj_surface = SURFACE(decode_state->current_render_target);
1824
1825     if (obj_surface && obj_surface->private_data) {
1826         dmv_write_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1827     }
1828
1829     obj_surface = SURFACE(pic_param->backward_reference_picture);
1830
1831     if (obj_surface && obj_surface->private_data) {
1832         dmv_read_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1833     }
1834
1835     BEGIN_BCS_BATCH(batch, 3);
1836     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
1837
1838     if (dmv_write_buffer)
1839         OUT_BCS_RELOC(batch, dmv_write_buffer,
1840                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1841                       0);
1842     else
1843         OUT_BCS_BATCH(batch, 0);
1844
1845     if (dmv_read_buffer)
1846         OUT_BCS_RELOC(batch, dmv_read_buffer,
1847                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1848                       0);
1849     else
1850         OUT_BCS_BATCH(batch, 0);
1851                   
1852     ADVANCE_BCS_BATCH(batch);
1853 }
1854
1855 static int
1856 gen6_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1857 {
1858     int out_slice_data_bit_offset;
1859     int slice_header_size = in_slice_data_bit_offset / 8;
1860     int i, j;
1861
1862     if (profile != 3)
1863         out_slice_data_bit_offset = in_slice_data_bit_offset;
1864     else {
1865         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1866             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1867                 i++, j += 2;
1868             }
1869         }
1870
1871         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1872     }
1873
1874     return out_slice_data_bit_offset;
1875 }
1876
1877 static void
1878 gen6_mfd_vc1_bsd_object(VADriverContextP ctx,
1879                         VAPictureParameterBufferVC1 *pic_param,
1880                         VASliceParameterBufferVC1 *slice_param,
1881                         VASliceParameterBufferVC1 *next_slice_param,
1882                         dri_bo *slice_data_bo,
1883                         struct gen6_mfd_context *gen6_mfd_context)
1884 {
1885     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1886     int next_slice_start_vert_pos;
1887     int macroblock_offset;
1888     uint8_t *slice_data = NULL;
1889
1890     dri_bo_map(slice_data_bo, 0);
1891     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1892     macroblock_offset = gen6_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1893                                                                slice_param->macroblock_offset,
1894                                                                pic_param->sequence_fields.bits.profile);
1895     dri_bo_unmap(slice_data_bo);
1896
1897     if (next_slice_param)
1898         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1899     else
1900         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1901
1902     BEGIN_BCS_BATCH(batch, 4);
1903     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (4 - 2));
1904     OUT_BCS_BATCH(batch, 
1905                   slice_param->slice_data_size - (macroblock_offset >> 3));
1906     OUT_BCS_BATCH(batch, 
1907                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1908     OUT_BCS_BATCH(batch,
1909                   slice_param->slice_vertical_position << 24 |
1910                   next_slice_start_vert_pos << 16 |
1911                   (macroblock_offset & 0x7));
1912     ADVANCE_BCS_BATCH(batch);
1913 }
1914
1915 static void
1916 gen6_mfd_vc1_decode_picture(VADriverContextP ctx,
1917                             struct decode_state *decode_state,
1918                             struct gen6_mfd_context *gen6_mfd_context)
1919 {
1920     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1921     VAPictureParameterBufferVC1 *pic_param;
1922     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1923     dri_bo *slice_data_bo;
1924     int i, j;
1925
1926     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1927     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1928
1929     gen6_mfd_vc1_decode_init(ctx, decode_state, gen6_mfd_context);
1930     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1931     intel_batchbuffer_emit_mi_flush(batch);
1932     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1933     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1934     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1935     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1936     gen6_mfd_vc1_pic_state(ctx, decode_state, gen6_mfd_context);
1937     gen6_mfd_vc1_pred_pipe_state(ctx, decode_state, gen6_mfd_context);
1938     gen6_mfd_vc1_directmode_state(ctx, decode_state, gen6_mfd_context);
1939
1940     for (j = 0; j < decode_state->num_slice_params; j++) {
1941         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1942         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1943         slice_data_bo = decode_state->slice_datas[j]->bo;
1944         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen6_mfd_context);
1945
1946         if (j == decode_state->num_slice_params - 1)
1947             next_slice_group_param = NULL;
1948         else
1949             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1950
1951         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1952             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1953
1954             if (i < decode_state->slice_params[j]->num_elements - 1)
1955                 next_slice_param = slice_param + 1;
1956             else
1957                 next_slice_param = next_slice_group_param;
1958
1959             gen6_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen6_mfd_context);
1960             slice_param++;
1961         }
1962     }
1963
1964     intel_batchbuffer_end_atomic(batch);
1965     intel_batchbuffer_flush(batch);
1966 }
1967
1968 static void 
1969 gen6_mfd_decode_picture(VADriverContextP ctx, 
1970                         VAProfile profile, 
1971                         union codec_state *codec_state,
1972                         struct hw_context *hw_context)
1973
1974 {
1975     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
1976     struct decode_state *decode_state = &codec_state->decode;
1977
1978     assert(gen6_mfd_context);
1979
1980     switch (profile) {
1981     case VAProfileMPEG2Simple:
1982     case VAProfileMPEG2Main:
1983         gen6_mfd_mpeg2_decode_picture(ctx, decode_state, gen6_mfd_context);
1984         break;
1985         
1986     case VAProfileH264Baseline:
1987     case VAProfileH264Main:
1988     case VAProfileH264High:
1989         gen6_mfd_avc_decode_picture(ctx, decode_state, gen6_mfd_context);
1990         break;
1991
1992     case VAProfileVC1Simple:
1993     case VAProfileVC1Main:
1994     case VAProfileVC1Advanced:
1995         gen6_mfd_vc1_decode_picture(ctx, decode_state, gen6_mfd_context);
1996         break;
1997
1998     default:
1999         assert(0);
2000         break;
2001     }
2002 }
2003
2004 static void
2005 gen6_mfd_context_destroy(void *hw_context)
2006 {
2007     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
2008
2009     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
2010     gen6_mfd_context->post_deblocking_output.bo = NULL;
2011
2012     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
2013     gen6_mfd_context->pre_deblocking_output.bo = NULL;
2014
2015     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
2016     gen6_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2017
2018     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2019     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2020
2021     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2022     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2023
2024     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
2025     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2026
2027     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
2028     gen6_mfd_context->bitplane_read_buffer.bo = NULL;
2029
2030     intel_batchbuffer_free(gen6_mfd_context->base.batch);
2031     free(gen6_mfd_context);
2032 }
2033
2034 struct hw_context *
2035 gen6_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
2036 {
2037     struct intel_driver_data *intel = intel_driver_data(ctx);
2038     struct gen6_mfd_context *gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context));
2039     int i;
2040
2041     gen6_mfd_context->base.destroy = gen6_mfd_context_destroy;
2042     gen6_mfd_context->base.run = gen6_mfd_decode_picture;
2043     gen6_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
2044
2045     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
2046         gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
2047         gen6_mfd_context->reference_surface[i].frame_store_id = -1;
2048     }
2049
2050     gen6_mfd_context->wa_mpeg2_slice_vertical_position = -1;
2051     
2052     return (struct hw_context *)gen6_mfd_context;
2053 }