Fix H264 YUV400 surface render issue
[platform/upstream/libva-intel-driver.git] / src / gen6_mfd.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36
37 #include "i965_defines.h"
38 #include "i965_drv_video.h"
39 #include "i965_decoder_utils.h"
40
41 #include "gen6_mfd.h"
42 #include "intel_media.h"
43
44 static const uint32_t zigzag_direct[64] = {
45     0,   1,  8, 16,  9,  2,  3, 10,
46     17, 24, 32, 25, 18, 11,  4,  5,
47     12, 19, 26, 33, 40, 48, 41, 34,
48     27, 20, 13,  6,  7, 14, 21, 28,
49     35, 42, 49, 56, 57, 50, 43, 36,
50     29, 22, 15, 23, 30, 37, 44, 51,
51     58, 59, 52, 45, 38, 31, 39, 46,
52     53, 60, 61, 54, 47, 55, 62, 63
53 };
54
55 static void
56 gen6_mfd_avc_frame_store_index(VADriverContextP ctx,
57                                VAPictureParameterBufferH264 *pic_param,
58                                struct gen6_mfd_context *gen6_mfd_context)
59 {
60     struct i965_driver_data *i965 = i965_driver_data(ctx);
61     int i, j;
62
63     assert(ARRAY_ELEMS(gen6_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
64
65     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
66         int found = 0;
67
68         if (gen6_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
69             continue;
70
71         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
72             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
73             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
74                 continue;
75
76             if (gen6_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
77                 found = 1;
78                 break;
79             }
80         }
81
82         if (!found) {
83             struct object_surface *obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
84             obj_surface->flags &= ~SURFACE_REFERENCED;
85
86             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
87                 dri_bo_unreference(obj_surface->bo);
88                 obj_surface->bo = NULL;
89                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
90             }
91
92             if (obj_surface->free_private_data)
93                 obj_surface->free_private_data(&obj_surface->private_data);
94
95             gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
96             gen6_mfd_context->reference_surface[i].frame_store_id = -1;
97         }
98     }
99
100     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
101         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
102         int found = 0;
103
104         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
105             continue;
106
107         for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
108             if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
109                 continue;
110             
111             if (gen6_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
112                 found = 1;
113                 break;
114             }
115         }
116
117         if (!found) {
118             int frame_idx;
119             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
120             
121             assert(obj_surface);
122             i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
123
124             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) {
125                 for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
126                     if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
127                         continue;
128
129                     if (gen6_mfd_context->reference_surface[j].frame_store_id == frame_idx)
130                         break;
131                 }
132
133                 if (j == ARRAY_ELEMS(gen6_mfd_context->reference_surface))
134                     break;
135             }
136
137             assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface));
138
139             for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
140                 if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
141                     gen6_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
142                     gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
143                     break;
144                 }
145             }
146         }
147     }
148
149     /* sort */
150     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface) - 1; i++) {
151         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
152             gen6_mfd_context->reference_surface[i].frame_store_id == i)
153             continue;
154
155         for (j = i + 1; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
156             if (gen6_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
157                 gen6_mfd_context->reference_surface[j].frame_store_id == i) {
158                 VASurfaceID id = gen6_mfd_context->reference_surface[i].surface_id;
159                 int frame_idx = gen6_mfd_context->reference_surface[i].frame_store_id;
160
161                 gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[j].surface_id;
162                 gen6_mfd_context->reference_surface[i].frame_store_id = gen6_mfd_context->reference_surface[j].frame_store_id;
163                 gen6_mfd_context->reference_surface[j].surface_id = id;
164                 gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
165                 break;
166             }
167         }
168     }
169 }
170
171 static void
172 gen6_mfd_init_avc_surface(VADriverContextP ctx, 
173                           VAPictureParameterBufferH264 *pic_param,
174                           struct object_surface *obj_surface)
175 {
176     struct i965_driver_data *i965 = i965_driver_data(ctx);
177     GenAvcSurface *gen6_avc_surface = obj_surface->private_data;
178     int height_in_mbs;
179
180     obj_surface->free_private_data = gen_free_avc_surface;
181     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
182
183     if (!gen6_avc_surface) {
184         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
185         assert((obj_surface->size & 0x3f) == 0);
186         obj_surface->private_data = gen6_avc_surface;
187     }
188
189     gen6_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
190                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
191
192     if (gen6_avc_surface->dmv_top == NULL) {
193         gen6_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
194                                                  "direct mv w/r buffer",
195                                                  128 * height_in_mbs * 64,      /* scalable with frame height */
196                                                  0x1000);
197     }
198
199     if (gen6_avc_surface->dmv_bottom_flag &&
200         gen6_avc_surface->dmv_bottom == NULL) {
201         gen6_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
202                                                     "direct mv w/r buffer",
203                                                     128 * height_in_mbs * 64,   /* scalable with frame height */
204                                                     0x1000);
205     }
206 }
207
208 static void
209 gen6_mfd_pipe_mode_select(VADriverContextP ctx,
210                           struct decode_state *decode_state,
211                           int standard_select,
212                           struct gen6_mfd_context *gen6_mfd_context)
213 {
214     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
215
216     assert(standard_select == MFX_FORMAT_MPEG2 ||
217            standard_select == MFX_FORMAT_AVC ||
218            standard_select == MFX_FORMAT_VC1);
219
220     BEGIN_BCS_BATCH(batch, 4);
221     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
222     OUT_BCS_BATCH(batch,
223                   (MFD_MODE_VLD << 16) | /* VLD mode */
224                   (0 << 10) | /* disable Stream-Out */
225                   (gen6_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
226                   (gen6_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
227                   (0 << 7)  | /* disable TLB prefectch */
228                   (0 << 5)  | /* not in stitch mode */
229                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
230                   (standard_select << 0));
231     OUT_BCS_BATCH(batch,
232                   (0 << 20) | /* round flag in PB slice */
233                   (0 << 19) | /* round flag in Intra8x8 */
234                   (0 << 7)  | /* expand NOA bus flag */
235                   (1 << 6)  | /* must be 1 */
236                   (0 << 5)  | /* disable clock gating for NOA */
237                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
238                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
239                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
240                   (0 << 1)  | /* AVC long field motion vector */
241                   (1 << 0));  /* always calculate AVC ILDB boundary strength */
242     OUT_BCS_BATCH(batch, 0);
243     ADVANCE_BCS_BATCH(batch);
244 }
245
246 static void
247 gen6_mfd_surface_state(VADriverContextP ctx,
248                        struct decode_state *decode_state,
249                        int standard_select,
250                        struct gen6_mfd_context *gen6_mfd_context)
251 {
252     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
253     struct i965_driver_data *i965 = i965_driver_data(ctx);
254     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
255     assert(obj_surface);
256     
257     BEGIN_BCS_BATCH(batch, 6);
258     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch,
261                   ((obj_surface->orig_height - 1) << 19) |
262                   ((obj_surface->orig_width - 1) << 6));
263     OUT_BCS_BATCH(batch,
264                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
265                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
266                   (0 << 22) | /* surface object control state, FIXME??? */
267                   ((obj_surface->width - 1) << 3) | /* pitch */
268                   (0 << 2)  | /* must be 0 for interleave U/V */
269                   (1 << 1)  | /* must be y-tiled */
270                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, FIXME: must be 1 ??? */
271     OUT_BCS_BATCH(batch,
272                   (0 << 16) | /* must be 0 for interleave U/V */
273                   (obj_surface->height)); /* y offset for U(cb) */
274     OUT_BCS_BATCH(batch, 0);
275     ADVANCE_BCS_BATCH(batch);
276 }
277
278 static void
279 gen6_mfd_pipe_buf_addr_state(VADriverContextP ctx,
280                              struct decode_state *decode_state,
281                              int standard_select,
282                              struct gen6_mfd_context *gen6_mfd_context)
283 {
284     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
285     struct i965_driver_data *i965 = i965_driver_data(ctx);
286     int i;
287
288     BEGIN_BCS_BATCH(batch, 24);
289     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
290     if (gen6_mfd_context->pre_deblocking_output.valid)
291         OUT_BCS_RELOC(batch, gen6_mfd_context->pre_deblocking_output.bo,
292                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
293                       0);
294     else
295         OUT_BCS_BATCH(batch, 0);
296
297     if (gen6_mfd_context->post_deblocking_output.valid)
298         OUT_BCS_RELOC(batch, gen6_mfd_context->post_deblocking_output.bo,
299                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
300                       0);
301     else
302         OUT_BCS_BATCH(batch, 0);
303
304     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
305     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
306
307     if (gen6_mfd_context->intra_row_store_scratch_buffer.valid)
308         OUT_BCS_RELOC(batch, gen6_mfd_context->intra_row_store_scratch_buffer.bo,
309                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
310                       0);
311     else
312         OUT_BCS_BATCH(batch, 0);
313
314     if (gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
315         OUT_BCS_RELOC(batch, gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
316                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
317                       0);
318     else
319         OUT_BCS_BATCH(batch, 0);
320
321     /* DW 7..22 */
322     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
323         struct object_surface *obj_surface;
324
325         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
326             obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
327             assert(obj_surface && obj_surface->bo);
328
329             OUT_BCS_RELOC(batch, obj_surface->bo,
330                           I915_GEM_DOMAIN_INSTRUCTION, 0,
331                           0);
332         } else {
333             OUT_BCS_BATCH(batch, 0);
334         }
335     }
336
337     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
338     ADVANCE_BCS_BATCH(batch);
339 }
340
341 static void
342 gen6_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
343                                  dri_bo *slice_data_bo,
344                                  int standard_select,
345                                  struct gen6_mfd_context *gen6_mfd_context)
346 {
347     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
348
349     BEGIN_BCS_BATCH(batch, 11);
350     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
351     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
352     OUT_BCS_BATCH(batch, 0);
353     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
354     OUT_BCS_BATCH(batch, 0);
355     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
356     OUT_BCS_BATCH(batch, 0);
357     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
358     OUT_BCS_BATCH(batch, 0);
359     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
360     OUT_BCS_BATCH(batch, 0);
361     ADVANCE_BCS_BATCH(batch);
362 }
363
364 static void
365 gen6_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
366                                  struct decode_state *decode_state,
367                                  int standard_select,
368                                  struct gen6_mfd_context *gen6_mfd_context)
369 {
370     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
371
372     BEGIN_BCS_BATCH(batch, 4);
373     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
374
375     if (gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
376         OUT_BCS_RELOC(batch, gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
377                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
378                       0);
379     else
380         OUT_BCS_BATCH(batch, 0);
381
382     if (gen6_mfd_context->mpr_row_store_scratch_buffer.valid)
383         OUT_BCS_RELOC(batch, gen6_mfd_context->mpr_row_store_scratch_buffer.bo,
384                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
385                       0);
386     else
387         OUT_BCS_BATCH(batch, 0);
388
389     if (gen6_mfd_context->bitplane_read_buffer.valid)
390         OUT_BCS_RELOC(batch, gen6_mfd_context->bitplane_read_buffer.bo,
391                       I915_GEM_DOMAIN_INSTRUCTION, 0,
392                       0);
393     else
394         OUT_BCS_BATCH(batch, 0);
395
396     ADVANCE_BCS_BATCH(batch);
397 }
398
399 #if 0
400 static void
401 gen6_mfd_aes_state(VADriverContextP ctx,
402                    struct decode_state *decode_state,
403                    int standard_select)
404 {
405     /* FIXME */
406 }
407
408 static void
409 gen6_mfd_wait(VADriverContextP ctx,
410               struct decode_state *decode_state,
411               int standard_select,
412               struct gen6_mfd_context *gen6_mfd_context)
413 {
414     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
415
416     BEGIN_BCS_BATCH(batch, 1);
417     OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8));
418     ADVANCE_BCS_BATCH(batch);
419 }
420 #endif
421
422 static void
423 gen6_mfd_avc_img_state(VADriverContextP ctx,
424                        struct decode_state *decode_state,
425                        struct gen6_mfd_context *gen6_mfd_context)
426 {
427     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
428     int qm_present_flag;
429     int img_struct;
430     int mbaff_frame_flag;
431     unsigned int width_in_mbs, height_in_mbs;
432     VAPictureParameterBufferH264 *pic_param;
433
434     assert(decode_state->pic_param && decode_state->pic_param->buffer);
435     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
436     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
437
438     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
439         qm_present_flag = 1;
440     else
441         qm_present_flag = 0; /* built-in QM matrices */
442
443     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
444         img_struct = 1;
445     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
446         img_struct = 3;
447     else
448         img_struct = 0;
449
450     if ((img_struct & 0x1) == 0x1) {
451         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
452     } else {
453         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
454     }
455
456     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
457         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
458         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
459     } else {
460         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
461     }
462
463     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
464                         !pic_param->pic_fields.bits.field_pic_flag);
465
466     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
467     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
468     assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */
469
470     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
471     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
472            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
473     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
474
475     BEGIN_BCS_BATCH(batch, 13);
476     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
477     OUT_BCS_BATCH(batch, 
478                   ((width_in_mbs * height_in_mbs) & 0x7fff));
479     OUT_BCS_BATCH(batch, 
480                   (height_in_mbs << 16) | 
481                   (width_in_mbs << 0));
482     OUT_BCS_BATCH(batch, 
483                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
484                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
485                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
486                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
487                   (1 << 12) | /* always 1, hardware requirement */
488                   (qm_present_flag << 10) |
489                   (img_struct << 8) |
490                   (16 << 0));
491     OUT_BCS_BATCH(batch,
492                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
493                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
494                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
495                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
496                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
497                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
498                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
499                   (mbaff_frame_flag << 1) |
500                   (pic_param->pic_fields.bits.field_pic_flag << 0));
501     OUT_BCS_BATCH(batch, 0);
502     OUT_BCS_BATCH(batch, 0);
503     OUT_BCS_BATCH(batch, 0);
504     OUT_BCS_BATCH(batch, 0);
505     OUT_BCS_BATCH(batch, 0);
506     OUT_BCS_BATCH(batch, 0);
507     OUT_BCS_BATCH(batch, 0);
508     OUT_BCS_BATCH(batch, 0);
509     ADVANCE_BCS_BATCH(batch);
510 }
511
512 static void
513 gen6_mfd_avc_qm_state(VADriverContextP ctx,
514                       struct decode_state *decode_state,
515                       struct gen6_mfd_context *gen6_mfd_context)
516 {
517     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
518     int cmd_len;
519     VAIQMatrixBufferH264 *iq_matrix;
520     VAPictureParameterBufferH264 *pic_param;
521
522     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
523         return;
524
525     iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
526
527     assert(decode_state->pic_param && decode_state->pic_param->buffer);
528     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
529
530     cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */
531
532     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
533         cmd_len += 2 * 16; /* load two 8x8 scaling matrices */
534
535     BEGIN_BCS_BATCH(batch, cmd_len);
536     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | (cmd_len - 2));
537
538     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
539         OUT_BCS_BATCH(batch, 
540                       (0x0  << 8) | /* don't use default built-in matrices */
541                       (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */
542     else
543         OUT_BCS_BATCH(batch, 
544                       (0x0  << 8) | /* don't use default built-in matrices */
545                       (0x3f << 0)); /* six 4x4 scaling matrices */
546
547     intel_batchbuffer_data(batch, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4);
548
549     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
550         intel_batchbuffer_data(batch, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4);
551
552     ADVANCE_BCS_BATCH(batch);
553 }
554
555 static void
556 gen6_mfd_avc_directmode_state(VADriverContextP ctx,
557                               VAPictureParameterBufferH264 *pic_param,
558                               VASliceParameterBufferH264 *slice_param,
559                               struct gen6_mfd_context *gen6_mfd_context)
560 {
561     struct i965_driver_data *i965 = i965_driver_data(ctx);
562     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
563     struct object_surface *obj_surface;
564     GenAvcSurface *gen6_avc_surface;
565     VAPictureH264 *va_pic;
566     int i, j;
567
568     BEGIN_BCS_BATCH(batch, 69);
569     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
570
571     /* reference surfaces 0..15 */
572     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
573         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
574             obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
575             assert(obj_surface);
576             gen6_avc_surface = obj_surface->private_data;
577
578             if (gen6_avc_surface == NULL) {
579                 OUT_BCS_BATCH(batch, 0);
580                 OUT_BCS_BATCH(batch, 0);
581             } else {
582                 OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
583                               I915_GEM_DOMAIN_INSTRUCTION, 0,
584                               0);
585
586                 if (gen6_avc_surface->dmv_bottom_flag == 1)
587                     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
588                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
589                                   0);
590                 else
591                     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
592                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
593                                   0);
594             }
595         } else {
596             OUT_BCS_BATCH(batch, 0);
597             OUT_BCS_BATCH(batch, 0);
598         }
599     }
600
601     /* the current decoding frame/field */
602     va_pic = &pic_param->CurrPic;
603     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
604     obj_surface = SURFACE(va_pic->picture_id);
605     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
606     gen6_avc_surface = obj_surface->private_data;
607
608     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
609                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
610                   0);
611
612     if (gen6_avc_surface->dmv_bottom_flag == 1)
613         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
614                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
615                       0);
616     else
617         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
618                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
619                       0);
620
621     /* POC List */
622     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
623         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
624             int found = 0;
625             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
626                 va_pic = &pic_param->ReferenceFrames[j];
627                 
628                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
629                     continue;
630
631                 if (va_pic->picture_id == gen6_mfd_context->reference_surface[i].surface_id) {
632                     found = 1;
633                     break;
634                 }
635             }
636
637             assert(found == 1);
638             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
639             
640             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
641             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
642         } else {
643             OUT_BCS_BATCH(batch, 0);
644             OUT_BCS_BATCH(batch, 0);
645         }
646     }
647
648     va_pic = &pic_param->CurrPic;
649     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
650     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
651
652     ADVANCE_BCS_BATCH(batch);
653 }
654
655 static void
656 gen6_mfd_avc_slice_state(VADriverContextP ctx,
657                          VAPictureParameterBufferH264 *pic_param,
658                          VASliceParameterBufferH264 *slice_param,
659                          VASliceParameterBufferH264 *next_slice_param,
660                          struct gen6_mfd_context *gen6_mfd_context)
661 {
662     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
663     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
664     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
665     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
666     int num_ref_idx_l0, num_ref_idx_l1;
667     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
668                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
669     int weighted_pred_idc = 0;
670     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
671     int slice_type;
672
673     if (slice_param->slice_type == SLICE_TYPE_I ||
674         slice_param->slice_type == SLICE_TYPE_SI) {
675         slice_type = SLICE_TYPE_I;
676     } else if (slice_param->slice_type == SLICE_TYPE_P ||
677                slice_param->slice_type == SLICE_TYPE_SP) {
678         slice_type = SLICE_TYPE_P;
679     } else { 
680         assert(slice_param->slice_type == SLICE_TYPE_B);
681         slice_type = SLICE_TYPE_B;
682     }
683
684     if (slice_type == SLICE_TYPE_I) {
685         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
686         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
687         num_ref_idx_l0 = 0;
688         num_ref_idx_l1 = 0;
689     } else if (slice_type == SLICE_TYPE_P) {
690         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
691         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
692         num_ref_idx_l1 = 0;
693         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_pred_flag == 1);
694     } else {
695         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
696         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
697         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_bipred_idc == 1);
698     }
699
700     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
701     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
702     slice_ver_pos = first_mb_in_slice / width_in_mbs;
703
704     if (next_slice_param) {
705         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
706         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
707         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
708     } else {
709         next_slice_hor_pos = 0;
710         next_slice_ver_pos = height_in_mbs;
711     }
712
713     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
714     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
715     OUT_BCS_BATCH(batch, slice_type);
716     OUT_BCS_BATCH(batch, 
717                   (num_ref_idx_l1 << 24) |
718                   (num_ref_idx_l0 << 16) |
719                   (slice_param->chroma_log2_weight_denom << 8) |
720                   (slice_param->luma_log2_weight_denom << 0));
721     OUT_BCS_BATCH(batch, 
722                   (weighted_pred_idc << 30) |
723                   (slice_param->direct_spatial_mv_pred_flag << 29) |
724                   (slice_param->disable_deblocking_filter_idc << 27) |
725                   (slice_param->cabac_init_idc << 24) |
726                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
727                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
728                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
729     OUT_BCS_BATCH(batch, 
730                   (slice_ver_pos << 24) |
731                   (slice_hor_pos << 16) | 
732                   (first_mb_in_slice << 0));
733     OUT_BCS_BATCH(batch,
734                   (next_slice_ver_pos << 16) |
735                   (next_slice_hor_pos << 0));
736     OUT_BCS_BATCH(batch, 
737                   (next_slice_param == NULL) << 19); /* last slice flag */
738     OUT_BCS_BATCH(batch, 0);
739     OUT_BCS_BATCH(batch, 0);
740     OUT_BCS_BATCH(batch, 0);
741     OUT_BCS_BATCH(batch, 0);
742     ADVANCE_BCS_BATCH(batch);
743 }
744
745 static void
746 gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx,
747                                  VAPictureParameterBufferH264 *pic_param,
748                                  struct gen6_mfd_context *gen6_mfd_context)
749 {
750     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
751     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
752     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
753
754     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
755     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
756     OUT_BCS_BATCH(batch, 0);
757     OUT_BCS_BATCH(batch, 0);
758     OUT_BCS_BATCH(batch, 0);
759     OUT_BCS_BATCH(batch,
760                   height_in_mbs << 24 |
761                   width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
762     OUT_BCS_BATCH(batch, 0);
763     OUT_BCS_BATCH(batch, 0);
764     OUT_BCS_BATCH(batch, 0);
765     OUT_BCS_BATCH(batch, 0);
766     OUT_BCS_BATCH(batch, 0);
767     OUT_BCS_BATCH(batch, 0);
768     ADVANCE_BCS_BATCH(batch);
769 }
770
771 static inline void
772 gen6_mfd_avc_ref_idx_state(VADriverContextP ctx,
773                            VAPictureParameterBufferH264 *pic_param,
774                            VASliceParameterBufferH264 *slice_param,
775                            struct gen6_mfd_context *gen6_mfd_context)
776 {
777     gen6_send_avc_ref_idx_state(
778         gen6_mfd_context->base.batch,
779         slice_param,
780         gen6_mfd_context->reference_surface
781     );
782 }
783
784 static void
785 gen6_mfd_avc_weightoffset_state(VADriverContextP ctx,
786                                 VAPictureParameterBufferH264 *pic_param,
787                                 VASliceParameterBufferH264 *slice_param,
788                                 struct gen6_mfd_context *gen6_mfd_context)
789 {
790     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
791     int i, j, num_weight_offset_table = 0;
792     short weightoffsets[32 * 6];
793
794     if ((slice_param->slice_type == SLICE_TYPE_P ||
795          slice_param->slice_type == SLICE_TYPE_SP) &&
796         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
797         num_weight_offset_table = 1;
798     }
799     
800     if ((slice_param->slice_type == SLICE_TYPE_B) &&
801         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
802         num_weight_offset_table = 2;
803     }
804
805     for (i = 0; i < num_weight_offset_table; i++) {
806         BEGIN_BCS_BATCH(batch, 98);
807         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
808         OUT_BCS_BATCH(batch, i);
809
810         if (i == 0) {
811             for (j = 0; j < 32; j++) {
812                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
813                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
814                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
815                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
816                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
817                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
818             }
819         } else {
820             for (j = 0; j < 32; j++) {
821                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
822                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
823                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
824                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
825                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
826                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
827             }
828         }
829
830         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
831         ADVANCE_BCS_BATCH(batch);
832     }
833 }
834
835 static int
836 gen6_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
837 {
838     int out_slice_data_bit_offset;
839     int slice_header_size = in_slice_data_bit_offset / 8;
840     int i, j;
841
842     for (i = 0, j = 0; i < slice_header_size; i++, j++) {
843         if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
844             i++, j += 2;
845         }
846     }
847
848     out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
849
850     if (mode_flag == ENTROPY_CABAC)
851         out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
852
853     return out_slice_data_bit_offset;
854 }
855
856 static void
857 gen6_mfd_avc_bsd_object(VADriverContextP ctx,
858                         VAPictureParameterBufferH264 *pic_param,
859                         VASliceParameterBufferH264 *slice_param,
860                         dri_bo *slice_data_bo,
861                         struct gen6_mfd_context *gen6_mfd_context)
862 {
863     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
864     int slice_data_bit_offset;
865     uint8_t *slice_data = NULL;
866
867     dri_bo_map(slice_data_bo, 0);
868     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
869     slice_data_bit_offset = gen6_mfd_avc_get_slice_bit_offset(slice_data,
870                                                               pic_param->pic_fields.bits.entropy_coding_mode_flag,
871                                                               slice_param->slice_data_bit_offset);
872     dri_bo_unmap(slice_data_bo);
873
874     BEGIN_BCS_BATCH(batch, 6);
875     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
876     OUT_BCS_BATCH(batch, 
877                   ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
878     OUT_BCS_BATCH(batch, slice_param->slice_data_offset + (slice_data_bit_offset >> 3));
879     OUT_BCS_BATCH(batch,
880                   (0 << 31) |
881                   (0 << 14) |
882                   (0 << 12) |
883                   (0 << 10) |
884                   (0 << 8));
885     OUT_BCS_BATCH(batch,
886                   (0 << 16) |
887                   (0 << 6)  |
888                   ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
889     OUT_BCS_BATCH(batch, 0);
890     ADVANCE_BCS_BATCH(batch);
891 }
892
893 static void
894 gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx,
895                                       VAPictureParameterBufferH264 *pic_param,
896                                       struct gen6_mfd_context *gen6_mfd_context)
897 {
898     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
899
900     BEGIN_BCS_BATCH(batch, 6);
901     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
902     OUT_BCS_BATCH(batch, 0);
903     OUT_BCS_BATCH(batch, 0);
904     OUT_BCS_BATCH(batch, 0);
905     OUT_BCS_BATCH(batch, 0);
906     OUT_BCS_BATCH(batch, 0);
907     ADVANCE_BCS_BATCH(batch);
908 }
909
910 static void
911 gen6_mfd_avc_phantom_slice(VADriverContextP ctx,
912                            VAPictureParameterBufferH264 *pic_param,
913                            struct gen6_mfd_context *gen6_mfd_context)
914 {
915     gen6_mfd_avc_phantom_slice_state(ctx, pic_param, gen6_mfd_context);
916     gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param, gen6_mfd_context);
917 }
918
919 static void
920 gen6_mfd_avc_decode_init(VADriverContextP ctx,
921                          struct decode_state *decode_state,
922                          struct gen6_mfd_context *gen6_mfd_context)
923 {
924     VAPictureParameterBufferH264 *pic_param;
925     VASliceParameterBufferH264 *slice_param;
926     VAPictureH264 *va_pic;
927     struct i965_driver_data *i965 = i965_driver_data(ctx);
928     struct object_surface *obj_surface;
929     dri_bo *bo;
930     int i, j, enable_avc_ildb = 0;
931     int width_in_mbs;
932
933     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
934         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
935         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
936
937         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
938             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
939             assert((slice_param->slice_type == SLICE_TYPE_I) ||
940                    (slice_param->slice_type == SLICE_TYPE_SI) ||
941                    (slice_param->slice_type == SLICE_TYPE_P) ||
942                    (slice_param->slice_type == SLICE_TYPE_SP) ||
943                    (slice_param->slice_type == SLICE_TYPE_B));
944
945             if (slice_param->disable_deblocking_filter_idc != 1) {
946                 enable_avc_ildb = 1;
947                 break;
948             }
949
950             slice_param++;
951         }
952     }
953
954     assert(decode_state->pic_param && decode_state->pic_param->buffer);
955     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
956     gen6_mfd_avc_frame_store_index(ctx, pic_param, gen6_mfd_context);
957     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
958
959     /* Current decoded picture */
960     va_pic = &pic_param->CurrPic;
961     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
962     obj_surface = SURFACE(va_pic->picture_id);
963     assert(obj_surface);
964     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
965     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
966     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
967
968     /* initial uv component for YUV400 case */
969     if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
970          unsigned int uv_offset = obj_surface->width * obj_surface->height; 
971          unsigned int uv_size   = obj_surface->width * obj_surface->height / 2; 
972
973          drm_intel_gem_bo_map_gtt(obj_surface->bo);
974          memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
975          drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
976     }
977
978     gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
979
980     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
981     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
982     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
983     gen6_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
984
985     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
986     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
987     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
988     gen6_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
989
990     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
991     bo = dri_bo_alloc(i965->intel.bufmgr,
992                       "intra row store",
993                       width_in_mbs * 64,
994                       0x1000);
995     assert(bo);
996     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
997     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
998
999     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1000     bo = dri_bo_alloc(i965->intel.bufmgr,
1001                       "deblocking filter row store",
1002                       width_in_mbs * 64 * 4,
1003                       0x1000);
1004     assert(bo);
1005     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1006     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1007
1008     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1009     bo = dri_bo_alloc(i965->intel.bufmgr,
1010                       "bsd mpc row store",
1011                       width_in_mbs * 96,
1012                       0x1000);
1013     assert(bo);
1014     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1015     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1016
1017     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
1018     bo = dri_bo_alloc(i965->intel.bufmgr,
1019                       "mpr row store",
1020                       width_in_mbs * 64,
1021                       0x1000);
1022     assert(bo);
1023     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
1024     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
1025
1026     gen6_mfd_context->bitplane_read_buffer.valid = 0;
1027 }
1028
1029 static void
1030 gen6_mfd_avc_decode_picture(VADriverContextP ctx,
1031                             struct decode_state *decode_state,
1032                             struct gen6_mfd_context *gen6_mfd_context)
1033 {
1034     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1035     VAPictureParameterBufferH264 *pic_param;
1036     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
1037     dri_bo *slice_data_bo;
1038     int i, j;
1039
1040     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1041     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1042     gen6_mfd_avc_decode_init(ctx, decode_state, gen6_mfd_context);
1043
1044     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1045     intel_batchbuffer_emit_mi_flush(batch);
1046     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1047     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1048     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1049     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1050     gen6_mfd_avc_img_state(ctx, decode_state, gen6_mfd_context);
1051     gen6_mfd_avc_qm_state(ctx, decode_state, gen6_mfd_context);
1052
1053     for (j = 0; j < decode_state->num_slice_params; j++) {
1054         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1055         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1056         slice_data_bo = decode_state->slice_datas[j]->bo;
1057         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen6_mfd_context);
1058
1059         if (j == decode_state->num_slice_params - 1)
1060             next_slice_group_param = NULL;
1061         else
1062             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1063
1064         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1065             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1066             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1067                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1068                    (slice_param->slice_type == SLICE_TYPE_P) ||
1069                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1070                    (slice_param->slice_type == SLICE_TYPE_B));
1071
1072             if (i < decode_state->slice_params[j]->num_elements - 1)
1073                 next_slice_param = slice_param + 1;
1074             else
1075                 next_slice_param = next_slice_group_param;
1076
1077             gen6_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen6_mfd_context);
1078             gen6_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
1079             gen6_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen6_mfd_context);
1080             gen6_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen6_mfd_context);
1081             gen6_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen6_mfd_context);
1082             slice_param++;
1083         }
1084     }
1085     
1086     gen6_mfd_avc_phantom_slice(ctx, pic_param, gen6_mfd_context);
1087     intel_batchbuffer_end_atomic(batch);
1088     intel_batchbuffer_flush(batch);
1089 }
1090
1091 static void
1092 gen6_mfd_mpeg2_decode_init(VADriverContextP ctx,
1093                            struct decode_state *decode_state,
1094                            struct gen6_mfd_context *gen6_mfd_context)
1095 {
1096     VAPictureParameterBufferMPEG2 *pic_param;
1097     struct i965_driver_data *i965 = i965_driver_data(ctx);
1098     struct object_surface *obj_surface;
1099     dri_bo *bo;
1100     unsigned int width_in_mbs;
1101
1102     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1103     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1104     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1105
1106     mpeg2_set_reference_surfaces(
1107         ctx,
1108         gen6_mfd_context->reference_surface,
1109         decode_state,
1110         pic_param
1111     );
1112
1113     /* Current decoded picture */
1114     obj_surface = SURFACE(decode_state->current_render_target);
1115     assert(obj_surface);
1116     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1117
1118     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1119     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1120     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1121     gen6_mfd_context->pre_deblocking_output.valid = 1;
1122
1123     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1124     bo = dri_bo_alloc(i965->intel.bufmgr,
1125                       "bsd mpc row store",
1126                       width_in_mbs * 96,
1127                       0x1000);
1128     assert(bo);
1129     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1130     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1131
1132     gen6_mfd_context->post_deblocking_output.valid = 0;
1133     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1134     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1135     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1136     gen6_mfd_context->bitplane_read_buffer.valid = 0;
1137 }
1138
1139 static void
1140 gen6_mfd_mpeg2_pic_state(VADriverContextP ctx,
1141                          struct decode_state *decode_state,
1142                          struct gen6_mfd_context *gen6_mfd_context)
1143 {
1144     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1145     VAPictureParameterBufferMPEG2 *pic_param;
1146     unsigned int tff, pic_structure;
1147
1148     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1149     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1150
1151     pic_structure = pic_param->picture_coding_extension.bits.picture_structure;
1152     if (pic_structure == MPEG_FRAME)
1153         tff = pic_param->picture_coding_extension.bits.top_field_first;
1154     else
1155         tff = !(pic_param->picture_coding_extension.bits.is_first_field ^
1156                 (pic_structure & MPEG_TOP_FIELD));
1157
1158     BEGIN_BCS_BATCH(batch, 4);
1159     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (4 - 2));
1160     OUT_BCS_BATCH(batch,
1161                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1162                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1163                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1164                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1165                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1166                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1167                   tff << 11 |
1168                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1169                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1170                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1171                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1172                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1173     OUT_BCS_BATCH(batch,
1174                   pic_param->picture_coding_type << 9);
1175     OUT_BCS_BATCH(batch,
1176                   (ALIGN(pic_param->vertical_size, 16) / 16) << 16 |
1177                   (ALIGN(pic_param->horizontal_size, 16) / 16));
1178     ADVANCE_BCS_BATCH(batch);
1179 }
1180
1181 static void
1182 gen6_mfd_mpeg2_qm_state(VADriverContextP ctx,
1183                         struct decode_state *decode_state,
1184                         struct gen6_mfd_context *gen6_mfd_context)
1185 {
1186     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1187     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen6_mfd_context->iq_matrix.mpeg2;
1188     int i, j;
1189
1190     /* Update internal QM state */
1191     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1192         VAIQMatrixBufferMPEG2 * const iq_matrix =
1193             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1194
1195         gen_iq_matrix->load_intra_quantiser_matrix =
1196             iq_matrix->load_intra_quantiser_matrix;
1197         if (iq_matrix->load_intra_quantiser_matrix) {
1198             for (j = 0; j < 64; j++)
1199                 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1200                     iq_matrix->intra_quantiser_matrix[j];
1201         }
1202
1203         gen_iq_matrix->load_non_intra_quantiser_matrix =
1204             iq_matrix->load_non_intra_quantiser_matrix;
1205         if (iq_matrix->load_non_intra_quantiser_matrix) {
1206             for (j = 0; j < 64; j++)
1207                 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1208                     iq_matrix->non_intra_quantiser_matrix[j];
1209         }
1210     }
1211
1212     /* Commit QM state to HW */
1213     for (i = 0; i < 2; i++) {
1214         unsigned char *qm = NULL;
1215
1216         if (i == 0) {
1217             if (gen_iq_matrix->load_intra_quantiser_matrix)
1218                 qm = gen_iq_matrix->intra_quantiser_matrix;
1219         } else {
1220             if (gen_iq_matrix->load_non_intra_quantiser_matrix)
1221                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1222         }
1223
1224         if (!qm)
1225             continue;
1226
1227         BEGIN_BCS_BATCH(batch, 18);
1228         OUT_BCS_BATCH(batch, MFX_MPEG2_QM_STATE | (18 - 2));
1229         OUT_BCS_BATCH(batch, i);
1230         intel_batchbuffer_data(batch, qm, 64);
1231         ADVANCE_BCS_BATCH(batch);
1232     }
1233 }
1234
1235 static void
1236 gen6_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1237                           VAPictureParameterBufferMPEG2 *pic_param,
1238                           VASliceParameterBufferMPEG2 *slice_param,
1239                           VASliceParameterBufferMPEG2 *next_slice_param,
1240                           struct gen6_mfd_context *gen6_mfd_context)
1241 {
1242     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1243     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1244     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1245
1246     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1247         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1248         is_field_pic = 1;
1249     is_field_pic_wa = is_field_pic &&
1250         gen6_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1251
1252     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1253     hpos0 = slice_param->slice_horizontal_position;
1254
1255     if (next_slice_param == NULL) {
1256         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1257         hpos1 = 0;
1258     } else {
1259         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1260         hpos1 = next_slice_param->slice_horizontal_position;
1261     }
1262
1263     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1264
1265     BEGIN_BCS_BATCH(batch, 5);
1266     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1267     OUT_BCS_BATCH(batch, 
1268                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1269     OUT_BCS_BATCH(batch, 
1270                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1271     OUT_BCS_BATCH(batch,
1272                   hpos0 << 24 |
1273                   vpos0 << 16 |
1274                   mb_count << 8 |
1275                   (next_slice_param == NULL) << 5 |
1276                   (next_slice_param == NULL) << 3 |
1277                   (slice_param->macroblock_offset & 0x7));
1278     OUT_BCS_BATCH(batch,
1279                   slice_param->quantiser_scale_code << 24);
1280     ADVANCE_BCS_BATCH(batch);
1281 }
1282
1283 static void
1284 gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1285                               struct decode_state *decode_state,
1286                               struct gen6_mfd_context *gen6_mfd_context)
1287 {
1288     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1289     VAPictureParameterBufferMPEG2 *pic_param;
1290     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1291     dri_bo *slice_data_bo;
1292     int i, j;
1293
1294     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1295     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1296
1297     gen6_mfd_mpeg2_decode_init(ctx, decode_state, gen6_mfd_context);
1298     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1299     intel_batchbuffer_emit_mi_flush(batch);
1300     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1301     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1302     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1303     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1304     gen6_mfd_mpeg2_pic_state(ctx, decode_state, gen6_mfd_context);
1305     gen6_mfd_mpeg2_qm_state(ctx, decode_state, gen6_mfd_context);
1306
1307     if (gen6_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1308         gen6_mfd_context->wa_mpeg2_slice_vertical_position =
1309             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1310
1311     for (j = 0; j < decode_state->num_slice_params; j++) {
1312         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1313         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1314         slice_data_bo = decode_state->slice_datas[j]->bo;
1315         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen6_mfd_context);
1316
1317         if (j == decode_state->num_slice_params - 1)
1318             next_slice_group_param = NULL;
1319         else
1320             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1321
1322         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1323             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1324
1325             if (i < decode_state->slice_params[j]->num_elements - 1)
1326                 next_slice_param = slice_param + 1;
1327             else
1328                 next_slice_param = next_slice_group_param;
1329
1330             gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
1331             slice_param++;
1332         }
1333     }
1334
1335     intel_batchbuffer_end_atomic(batch);
1336     intel_batchbuffer_flush(batch);
1337 }
1338
1339 static const int va_to_gen6_vc1_pic_type[5] = {
1340     GEN6_VC1_I_PICTURE,
1341     GEN6_VC1_P_PICTURE,
1342     GEN6_VC1_B_PICTURE,
1343     GEN6_VC1_BI_PICTURE,
1344     GEN6_VC1_P_PICTURE,
1345 };
1346
1347 static const int va_to_gen6_vc1_mv[4] = {
1348     1, /* 1-MV */
1349     2, /* 1-MV half-pel */
1350     3, /* 1-MV half-pef bilinear */
1351     0, /* Mixed MV */
1352 };
1353
1354 static const int b_picture_scale_factor[21] = {
1355     128, 85,  170, 64,  192,
1356     51,  102, 153, 204, 43,
1357     215, 37,  74,  111, 148,
1358     185, 222, 32,  96,  160, 
1359     224,
1360 };
1361
1362 static const int va_to_gen6_vc1_condover[3] = {
1363     0,
1364     2,
1365     3
1366 };
1367
1368 static const int va_to_gen6_vc1_profile[4] = {
1369     GEN6_VC1_SIMPLE_PROFILE,
1370     GEN6_VC1_MAIN_PROFILE,
1371     GEN6_VC1_RESERVED_PROFILE,
1372     GEN6_VC1_ADVANCED_PROFILE
1373 };
1374
1375 static void 
1376 gen6_mfd_free_vc1_surface(void **data)
1377 {
1378     struct gen6_vc1_surface *gen6_vc1_surface = *data;
1379
1380     if (!gen6_vc1_surface)
1381         return;
1382
1383     dri_bo_unreference(gen6_vc1_surface->dmv);
1384     free(gen6_vc1_surface);
1385     *data = NULL;
1386 }
1387
1388 static void
1389 gen6_mfd_init_vc1_surface(VADriverContextP ctx, 
1390                           VAPictureParameterBufferVC1 *pic_param,
1391                           struct object_surface *obj_surface)
1392 {
1393     struct i965_driver_data *i965 = i965_driver_data(ctx);
1394     struct gen6_vc1_surface *gen6_vc1_surface = obj_surface->private_data;
1395     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1396
1397     obj_surface->free_private_data = gen6_mfd_free_vc1_surface;
1398
1399     if (!gen6_vc1_surface) {
1400         gen6_vc1_surface = calloc(sizeof(struct gen6_vc1_surface), 1);
1401         assert((obj_surface->size & 0x3f) == 0);
1402         obj_surface->private_data = gen6_vc1_surface;
1403     }
1404
1405     gen6_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1406
1407     if (gen6_vc1_surface->dmv == NULL) {
1408         gen6_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1409                                              "direct mv w/r buffer",
1410                                              128 * height_in_mbs * 64,  /* scalable with frame height */
1411                                              0x1000);
1412     }
1413 }
1414
1415 static void
1416 gen6_mfd_vc1_decode_init(VADriverContextP ctx,
1417                          struct decode_state *decode_state,
1418                          struct gen6_mfd_context *gen6_mfd_context)
1419 {
1420     VAPictureParameterBufferVC1 *pic_param;
1421     struct i965_driver_data *i965 = i965_driver_data(ctx);
1422     struct object_surface *obj_surface;
1423     int i;
1424     dri_bo *bo;
1425     int width_in_mbs;
1426
1427     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1428     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1429     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1430
1431     /* reference picture */
1432     obj_surface = SURFACE(pic_param->forward_reference_picture);
1433
1434     if (obj_surface && obj_surface->bo)
1435         gen6_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1436     else
1437         gen6_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1438
1439     obj_surface = SURFACE(pic_param->backward_reference_picture);
1440
1441     if (obj_surface && obj_surface->bo)
1442         gen6_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1443     else
1444         gen6_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1445
1446     /* must do so !!! */
1447     for (i = 2; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++)
1448         gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[i % 2].surface_id;
1449
1450     /* Current decoded picture */
1451     obj_surface = SURFACE(decode_state->current_render_target);
1452     assert(obj_surface);
1453     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1454     gen6_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1455
1456     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
1457     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1458     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
1459     gen6_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1460
1461     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1462     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1463     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1464     gen6_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1465
1466     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
1467     bo = dri_bo_alloc(i965->intel.bufmgr,
1468                       "intra row store",
1469                       width_in_mbs * 64,
1470                       0x1000);
1471     assert(bo);
1472     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1473     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1474
1475     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1476     bo = dri_bo_alloc(i965->intel.bufmgr,
1477                       "deblocking filter row store",
1478                       width_in_mbs * 6 * 64,
1479                       0x1000);
1480     assert(bo);
1481     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1482     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1483
1484     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1485     bo = dri_bo_alloc(i965->intel.bufmgr,
1486                       "bsd mpc row store",
1487                       width_in_mbs * 96,
1488                       0x1000);
1489     assert(bo);
1490     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1491     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1492
1493     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1494
1495     gen6_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1496     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
1497     
1498     if (gen6_mfd_context->bitplane_read_buffer.valid) {
1499         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1500         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1501         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1502         int src_w, src_h;
1503         uint8_t *src = NULL, *dst = NULL;
1504
1505         assert(decode_state->bit_plane->buffer);
1506         src = decode_state->bit_plane->buffer;
1507
1508         bo = dri_bo_alloc(i965->intel.bufmgr,
1509                           "VC-1 Bitplane",
1510                           bitplane_width * height_in_mbs,
1511                           0x1000);
1512         assert(bo);
1513         gen6_mfd_context->bitplane_read_buffer.bo = bo;
1514
1515         dri_bo_map(bo, True);
1516         assert(bo->virtual);
1517         dst = bo->virtual;
1518
1519         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1520             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1521                 int src_index, dst_index;
1522                 int src_shift;
1523                 uint8_t src_value;
1524
1525                 src_index = (src_h * width_in_mbs + src_w) / 2;
1526                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1527                 src_value = ((src[src_index] >> src_shift) & 0xf);
1528
1529                 dst_index = src_w / 2;
1530                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1531             }
1532
1533             if (src_w & 1)
1534                 dst[src_w / 2] >>= 4;
1535
1536             dst += bitplane_width;
1537         }
1538
1539         dri_bo_unmap(bo);
1540     } else
1541         gen6_mfd_context->bitplane_read_buffer.bo = NULL;
1542 }
1543
1544 static void
1545 gen6_mfd_vc1_pic_state(VADriverContextP ctx,
1546                        struct decode_state *decode_state,
1547                        struct gen6_mfd_context *gen6_mfd_context)
1548 {
1549     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1550     VAPictureParameterBufferVC1 *pic_param;
1551     struct i965_driver_data *i965 = i965_driver_data(ctx);
1552     struct object_surface *obj_surface;
1553     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1554     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1555     int unified_mv_mode;
1556     int ref_field_pic_polarity = 0;
1557     int scale_factor = 0;
1558     int trans_ac_y = 0;
1559     int dmv_surface_valid = 0;
1560     int brfd = 0;
1561     int fcm = 0;
1562     int picture_type;
1563     int profile;
1564     int overlap;
1565
1566     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1567     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1568
1569     profile = va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile];
1570     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1571     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1572     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1573     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1574     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1575     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1576     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1577
1578     if (dquant == 0) {
1579         alt_pquant_config = 0;
1580         alt_pquant_edge_mask = 0;
1581     } else if (dquant == 2) {
1582         alt_pquant_config = 1;
1583         alt_pquant_edge_mask = 0xf;
1584     } else {
1585         assert(dquant == 1);
1586         if (dquantfrm == 0) {
1587             alt_pquant_config = 0;
1588             alt_pquant_edge_mask = 0;
1589             alt_pq = 0;
1590         } else {
1591             assert(dquantfrm == 1);
1592             alt_pquant_config = 1;
1593
1594             switch (dqprofile) {
1595             case 3:
1596                 if (dqbilevel == 0) {
1597                     alt_pquant_config = 2;
1598                     alt_pquant_edge_mask = 0;
1599                 } else {
1600                     assert(dqbilevel == 1);
1601                     alt_pquant_config = 3;
1602                     alt_pquant_edge_mask = 0;
1603                 }
1604                 break;
1605                 
1606             case 0:
1607                 alt_pquant_edge_mask = 0xf;
1608                 break;
1609
1610             case 1:
1611                 if (dqdbedge == 3)
1612                     alt_pquant_edge_mask = 0x9;
1613                 else
1614                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1615
1616                 break;
1617
1618             case 2:
1619                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1620                 break;
1621
1622             default:
1623                 assert(0);
1624             }
1625         }
1626     }
1627
1628     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1629         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1630         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1631     } else {
1632         assert(pic_param->mv_fields.bits.mv_mode < 4);
1633         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1634     }
1635
1636     if (pic_param->sequence_fields.bits.interlace == 1 &&
1637         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1638         /* FIXME: calculate reference field picture polarity */
1639         assert(0);
1640         ref_field_pic_polarity = 0;
1641     }
1642
1643     if (pic_param->b_picture_fraction < 21)
1644         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1645
1646     picture_type = va_to_gen6_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1647     
1648     if (profile == GEN6_VC1_ADVANCED_PROFILE && 
1649         picture_type == GEN6_VC1_I_PICTURE)
1650         picture_type = GEN6_VC1_BI_PICTURE;
1651
1652     if (picture_type == GEN6_VC1_I_PICTURE || picture_type == GEN6_VC1_BI_PICTURE) /* I picture */
1653         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1654     else {
1655         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1656         /*
1657          * 8.3.6.2.1 Transform Type Selection
1658          * If variable-sized transform coding is not enabled,
1659          * then the 8x8 transform shall be used for all blocks.
1660          * it is also MFX_VC1_PIC_STATE requirement.
1661          */
1662         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1663             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1664             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1665         }
1666     }
1667
1668     if (picture_type == GEN6_VC1_B_PICTURE) {
1669         struct gen6_vc1_surface *gen6_vc1_surface = NULL;
1670
1671         obj_surface = SURFACE(pic_param->backward_reference_picture);
1672         assert(obj_surface);
1673         gen6_vc1_surface = obj_surface->private_data;
1674
1675         if (!gen6_vc1_surface || 
1676             (va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_I_PICTURE ||
1677              va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_BI_PICTURE))
1678             dmv_surface_valid = 0;
1679         else
1680             dmv_surface_valid = 1;
1681     }
1682
1683     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1684
1685     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1686         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1687     else {
1688         if (pic_param->picture_fields.bits.top_field_first)
1689             fcm = 2;
1690         else
1691             fcm = 3;
1692     }
1693
1694     if (pic_param->picture_fields.bits.picture_type == GEN6_VC1_B_PICTURE) { /* B picture */
1695         brfd = pic_param->reference_fields.bits.reference_distance;
1696         brfd = (scale_factor * brfd) >> 8;
1697         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1698
1699         if (brfd < 0)
1700             brfd = 0;
1701     }
1702
1703     overlap = pic_param->sequence_fields.bits.overlap;
1704     if (profile != GEN6_VC1_ADVANCED_PROFILE && pic_param->pic_quantizer_fields.bits.pic_quantizer_scale < 9)
1705         overlap = 0;
1706
1707     assert(pic_param->conditional_overlap_flag < 3);
1708     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1709
1710     BEGIN_BCS_BATCH(batch, 6);
1711     OUT_BCS_BATCH(batch, MFX_VC1_PIC_STATE | (6 - 2));
1712     OUT_BCS_BATCH(batch,
1713                   (ALIGN(pic_param->coded_height, 16) / 16) << 16 |
1714                   (ALIGN(pic_param->coded_width, 16) / 16));
1715     OUT_BCS_BATCH(batch,
1716                   pic_param->sequence_fields.bits.syncmarker << 31 |
1717                   1 << 29 | /* concealment */
1718                   alt_pq << 24 |
1719                   pic_param->entrypoint_fields.bits.loopfilter << 23 |
1720                   overlap << 22 |
1721                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 21 | /* implicit quantizer */
1722                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 16 |
1723                   alt_pquant_edge_mask << 12 |
1724                   alt_pquant_config << 10 |
1725                   pic_param->pic_quantizer_fields.bits.half_qp << 9 |
1726                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 8 |
1727                   va_to_gen6_vc1_condover[pic_param->conditional_overlap_flag] << 6 |
1728                   !pic_param->picture_fields.bits.is_first_field << 5 |
1729                   picture_type << 2 |
1730                   fcm << 0);
1731     OUT_BCS_BATCH(batch,
1732                   !!pic_param->bitplane_present.value << 23 |
1733                   !pic_param->bitplane_present.flags.bp_forward_mb << 22 |
1734                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 21 |
1735                   !pic_param->bitplane_present.flags.bp_skip_mb << 20 |
1736                   !pic_param->bitplane_present.flags.bp_direct_mb << 19 |
1737                   !pic_param->bitplane_present.flags.bp_overflags << 18 |
1738                   !pic_param->bitplane_present.flags.bp_ac_pred << 17 |
1739                   !pic_param->bitplane_present.flags.bp_field_tx << 16 |
1740                   pic_param->mv_fields.bits.extended_dmv_range << 14 |
1741                   pic_param->mv_fields.bits.extended_mv_range << 12 |
1742                   pic_param->mv_fields.bits.four_mv_switch << 11 |
1743                   pic_param->fast_uvmc_flag << 10 |
1744                   unified_mv_mode << 8 |
1745                   ref_field_pic_polarity << 6 |
1746                   pic_param->reference_fields.bits.num_reference_pictures << 5 |
1747                   pic_param->reference_fields.bits.reference_distance << 0);
1748     OUT_BCS_BATCH(batch,
1749                   scale_factor << 24 |
1750                   pic_param->mv_fields.bits.mv_table << 20 |
1751                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1752                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1753                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1754                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1755                   pic_param->mb_mode_table << 8 |
1756                   trans_ac_y << 6 |
1757                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1758                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1759                   pic_param->cbp_table << 0);
1760     OUT_BCS_BATCH(batch,
1761                   dmv_surface_valid << 13 |
1762                   brfd << 8 |
1763                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1));
1764     ADVANCE_BCS_BATCH(batch);
1765 }
1766
1767 static void
1768 gen6_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1769                              struct decode_state *decode_state,
1770                              struct gen6_mfd_context *gen6_mfd_context)
1771 {
1772     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1773     VAPictureParameterBufferVC1 *pic_param;
1774     int interpolation_mode = 0;
1775     int intensitycomp_single;
1776
1777     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1778     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1779
1780     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1781         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1782          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1783         interpolation_mode = 2; /* Half-pel bilinear */
1784     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1785              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1786               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1787         interpolation_mode = 0; /* Half-pel bicubic */
1788     else
1789         interpolation_mode = 1; /* Quarter-pel bicubic */
1790
1791     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1792     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1793     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1794
1795     BEGIN_BCS_BATCH(batch, 7);
1796     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (7 - 2));
1797     OUT_BCS_BATCH(batch,
1798                   0 << 8 | /* FIXME: interlace mode */
1799                   pic_param->rounding_control << 4 |
1800                   va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile] << 2);
1801     OUT_BCS_BATCH(batch,
1802                   pic_param->luma_shift << 16 |
1803                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1804     OUT_BCS_BATCH(batch, 0);
1805     OUT_BCS_BATCH(batch, 0);
1806     OUT_BCS_BATCH(batch, 0);
1807     OUT_BCS_BATCH(batch,
1808                   interpolation_mode << 19 |
1809                   pic_param->fast_uvmc_flag << 18 |
1810                   0 << 17 | /* FIXME: scale up or down ??? */
1811                   pic_param->range_reduction_frame << 16 |
1812                   0 << 6 | /* FIXME: double ??? */
1813                   0 << 4 |
1814                   intensitycomp_single << 2 |
1815                   intensitycomp_single << 0);
1816     ADVANCE_BCS_BATCH(batch);
1817 }
1818
1819
1820 static void
1821 gen6_mfd_vc1_directmode_state(VADriverContextP ctx,
1822                               struct decode_state *decode_state,
1823                               struct gen6_mfd_context *gen6_mfd_context)
1824 {
1825     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1826     VAPictureParameterBufferVC1 *pic_param;
1827     struct i965_driver_data *i965 = i965_driver_data(ctx);
1828     struct object_surface *obj_surface;
1829     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1830
1831     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1832     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1833
1834     obj_surface = SURFACE(decode_state->current_render_target);
1835
1836     if (obj_surface && obj_surface->private_data) {
1837         dmv_write_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1838     }
1839
1840     obj_surface = SURFACE(pic_param->backward_reference_picture);
1841
1842     if (obj_surface && obj_surface->private_data) {
1843         dmv_read_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1844     }
1845
1846     BEGIN_BCS_BATCH(batch, 3);
1847     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
1848
1849     if (dmv_write_buffer)
1850         OUT_BCS_RELOC(batch, dmv_write_buffer,
1851                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1852                       0);
1853     else
1854         OUT_BCS_BATCH(batch, 0);
1855
1856     if (dmv_read_buffer)
1857         OUT_BCS_RELOC(batch, dmv_read_buffer,
1858                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1859                       0);
1860     else
1861         OUT_BCS_BATCH(batch, 0);
1862                   
1863     ADVANCE_BCS_BATCH(batch);
1864 }
1865
1866 static int
1867 gen6_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1868 {
1869     int out_slice_data_bit_offset;
1870     int slice_header_size = in_slice_data_bit_offset / 8;
1871     int i, j;
1872
1873     if (profile != 3)
1874         out_slice_data_bit_offset = in_slice_data_bit_offset;
1875     else {
1876         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1877             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1878                 i++, j += 2;
1879             }
1880         }
1881
1882         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1883     }
1884
1885     return out_slice_data_bit_offset;
1886 }
1887
1888 static void
1889 gen6_mfd_vc1_bsd_object(VADriverContextP ctx,
1890                         VAPictureParameterBufferVC1 *pic_param,
1891                         VASliceParameterBufferVC1 *slice_param,
1892                         VASliceParameterBufferVC1 *next_slice_param,
1893                         dri_bo *slice_data_bo,
1894                         struct gen6_mfd_context *gen6_mfd_context)
1895 {
1896     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1897     int next_slice_start_vert_pos;
1898     int macroblock_offset;
1899     uint8_t *slice_data = NULL;
1900
1901     dri_bo_map(slice_data_bo, 0);
1902     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1903     macroblock_offset = gen6_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1904                                                                slice_param->macroblock_offset,
1905                                                                pic_param->sequence_fields.bits.profile);
1906     dri_bo_unmap(slice_data_bo);
1907
1908     if (next_slice_param)
1909         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1910     else
1911         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1912
1913     BEGIN_BCS_BATCH(batch, 4);
1914     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (4 - 2));
1915     OUT_BCS_BATCH(batch, 
1916                   slice_param->slice_data_size - (macroblock_offset >> 3));
1917     OUT_BCS_BATCH(batch, 
1918                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1919     OUT_BCS_BATCH(batch,
1920                   slice_param->slice_vertical_position << 24 |
1921                   next_slice_start_vert_pos << 16 |
1922                   (macroblock_offset & 0x7));
1923     ADVANCE_BCS_BATCH(batch);
1924 }
1925
1926 static void
1927 gen6_mfd_vc1_decode_picture(VADriverContextP ctx,
1928                             struct decode_state *decode_state,
1929                             struct gen6_mfd_context *gen6_mfd_context)
1930 {
1931     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1932     VAPictureParameterBufferVC1 *pic_param;
1933     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1934     dri_bo *slice_data_bo;
1935     int i, j;
1936
1937     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1938     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1939
1940     gen6_mfd_vc1_decode_init(ctx, decode_state, gen6_mfd_context);
1941     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1942     intel_batchbuffer_emit_mi_flush(batch);
1943     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1944     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1945     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1946     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1947     gen6_mfd_vc1_pic_state(ctx, decode_state, gen6_mfd_context);
1948     gen6_mfd_vc1_pred_pipe_state(ctx, decode_state, gen6_mfd_context);
1949     gen6_mfd_vc1_directmode_state(ctx, decode_state, gen6_mfd_context);
1950
1951     for (j = 0; j < decode_state->num_slice_params; j++) {
1952         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1953         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1954         slice_data_bo = decode_state->slice_datas[j]->bo;
1955         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen6_mfd_context);
1956
1957         if (j == decode_state->num_slice_params - 1)
1958             next_slice_group_param = NULL;
1959         else
1960             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1961
1962         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1963             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1964
1965             if (i < decode_state->slice_params[j]->num_elements - 1)
1966                 next_slice_param = slice_param + 1;
1967             else
1968                 next_slice_param = next_slice_group_param;
1969
1970             gen6_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen6_mfd_context);
1971             slice_param++;
1972         }
1973     }
1974
1975     intel_batchbuffer_end_atomic(batch);
1976     intel_batchbuffer_flush(batch);
1977 }
1978
1979 static void 
1980 gen6_mfd_decode_picture(VADriverContextP ctx, 
1981                         VAProfile profile, 
1982                         union codec_state *codec_state,
1983                         struct hw_context *hw_context)
1984
1985 {
1986     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
1987     struct decode_state *decode_state = &codec_state->decode;
1988
1989     assert(gen6_mfd_context);
1990
1991     switch (profile) {
1992     case VAProfileMPEG2Simple:
1993     case VAProfileMPEG2Main:
1994         gen6_mfd_mpeg2_decode_picture(ctx, decode_state, gen6_mfd_context);
1995         break;
1996         
1997     case VAProfileH264Baseline:
1998     case VAProfileH264Main:
1999     case VAProfileH264High:
2000         gen6_mfd_avc_decode_picture(ctx, decode_state, gen6_mfd_context);
2001         break;
2002
2003     case VAProfileVC1Simple:
2004     case VAProfileVC1Main:
2005     case VAProfileVC1Advanced:
2006         gen6_mfd_vc1_decode_picture(ctx, decode_state, gen6_mfd_context);
2007         break;
2008
2009     default:
2010         assert(0);
2011         break;
2012     }
2013 }
2014
2015 static void
2016 gen6_mfd_context_destroy(void *hw_context)
2017 {
2018     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
2019
2020     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
2021     gen6_mfd_context->post_deblocking_output.bo = NULL;
2022
2023     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
2024     gen6_mfd_context->pre_deblocking_output.bo = NULL;
2025
2026     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
2027     gen6_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2028
2029     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2030     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2031
2032     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2033     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2034
2035     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
2036     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2037
2038     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
2039     gen6_mfd_context->bitplane_read_buffer.bo = NULL;
2040
2041     intel_batchbuffer_free(gen6_mfd_context->base.batch);
2042     free(gen6_mfd_context);
2043 }
2044
2045 struct hw_context *
2046 gen6_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
2047 {
2048     struct intel_driver_data *intel = intel_driver_data(ctx);
2049     struct gen6_mfd_context *gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context));
2050     int i;
2051
2052     gen6_mfd_context->base.destroy = gen6_mfd_context_destroy;
2053     gen6_mfd_context->base.run = gen6_mfd_decode_picture;
2054     gen6_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
2055
2056     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
2057         gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
2058         gen6_mfd_context->reference_surface[i].frame_store_id = -1;
2059     }
2060
2061     gen6_mfd_context->wa_mpeg2_slice_vertical_position = -1;
2062     
2063     return (struct hw_context *)gen6_mfd_context;
2064 }