Avoid depending on va_backend.h for some files
[platform/upstream/libva-intel-driver.git] / src / gen6_mfd.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36
37 #include "i965_defines.h"
38 #include "i965_drv_video.h"
39
40 #include "gen6_mfd.h"
41
42 #define DMV_SIZE        0x88000 /* 557056 bytes for a frame */
43
44 static const uint32_t zigzag_direct[64] = {
45     0,   1,  8, 16,  9,  2,  3, 10,
46     17, 24, 32, 25, 18, 11,  4,  5,
47     12, 19, 26, 33, 40, 48, 41, 34,
48     27, 20, 13,  6,  7, 14, 21, 28,
49     35, 42, 49, 56, 57, 50, 43, 36,
50     29, 22, 15, 23, 30, 37, 44, 51,
51     58, 59, 52, 45, 38, 31, 39, 46,
52     53, 60, 61, 54, 47, 55, 62, 63
53 };
54
55 static void
56 gen6_mfd_avc_frame_store_index(VADriverContextP ctx,
57                                VAPictureParameterBufferH264 *pic_param,
58                                struct gen6_mfd_context *gen6_mfd_context)
59 {
60     struct i965_driver_data *i965 = i965_driver_data(ctx);
61     int i, j;
62
63     assert(ARRAY_ELEMS(gen6_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
64
65     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
66         int found = 0;
67
68         if (gen6_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
69             continue;
70
71         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
72             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
73             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
74                 continue;
75
76             if (gen6_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
77                 found = 1;
78                 break;
79             }
80         }
81
82         if (!found) {
83             struct object_surface *obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
84             obj_surface->flags &= ~SURFACE_REFERENCED;
85
86             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
87                 dri_bo_unreference(obj_surface->bo);
88                 obj_surface->bo = NULL;
89                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
90             }
91
92             if (obj_surface->free_private_data)
93                 obj_surface->free_private_data(&obj_surface->private_data);
94
95             gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
96             gen6_mfd_context->reference_surface[i].frame_store_id = -1;
97         }
98     }
99
100     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
101         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
102         int found = 0;
103
104         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
105             continue;
106
107         for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
108             if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
109                 continue;
110             
111             if (gen6_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
112                 found = 1;
113                 break;
114             }
115         }
116
117         if (!found) {
118             int frame_idx;
119             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
120             
121             assert(obj_surface);
122             i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'));
123
124             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) {
125                 for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
126                     if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
127                         continue;
128
129                     if (gen6_mfd_context->reference_surface[j].frame_store_id == frame_idx)
130                         break;
131                 }
132
133                 if (j == ARRAY_ELEMS(gen6_mfd_context->reference_surface))
134                     break;
135             }
136
137             assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface));
138
139             for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
140                 if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
141                     gen6_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
142                     gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
143                     break;
144                 }
145             }
146         }
147     }
148
149     /* sort */
150     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface) - 1; i++) {
151         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
152             gen6_mfd_context->reference_surface[i].frame_store_id == i)
153             continue;
154
155         for (j = i + 1; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
156             if (gen6_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
157                 gen6_mfd_context->reference_surface[j].frame_store_id == i) {
158                 VASurfaceID id = gen6_mfd_context->reference_surface[i].surface_id;
159                 int frame_idx = gen6_mfd_context->reference_surface[i].frame_store_id;
160
161                 gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[j].surface_id;
162                 gen6_mfd_context->reference_surface[i].frame_store_id = gen6_mfd_context->reference_surface[j].frame_store_id;
163                 gen6_mfd_context->reference_surface[j].surface_id = id;
164                 gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
165                 break;
166             }
167         }
168     }
169 }
170
171 static void 
172 gen6_mfd_free_avc_surface(void **data)
173 {
174     struct gen6_avc_surface *gen6_avc_surface = *data;
175
176     if (!gen6_avc_surface)
177         return;
178
179     dri_bo_unreference(gen6_avc_surface->dmv_top);
180     gen6_avc_surface->dmv_top = NULL;
181     dri_bo_unreference(gen6_avc_surface->dmv_bottom);
182     gen6_avc_surface->dmv_bottom = NULL;
183
184     free(gen6_avc_surface);
185     *data = NULL;
186 }
187
188 static void
189 gen6_mfd_init_avc_surface(VADriverContextP ctx, 
190                           VAPictureParameterBufferH264 *pic_param,
191                           struct object_surface *obj_surface)
192 {
193     struct i965_driver_data *i965 = i965_driver_data(ctx);
194     struct gen6_avc_surface *gen6_avc_surface = obj_surface->private_data;
195
196     obj_surface->free_private_data = gen6_mfd_free_avc_surface;
197
198     if (!gen6_avc_surface) {
199         gen6_avc_surface = calloc(sizeof(struct gen6_avc_surface), 1);
200         assert((obj_surface->size & 0x3f) == 0);
201         obj_surface->private_data = gen6_avc_surface;
202     }
203
204     gen6_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
205                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
206
207     if (gen6_avc_surface->dmv_top == NULL) {
208         gen6_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
209                                                  "direct mv w/r buffer",
210                                                  DMV_SIZE,
211                                                  0x1000);
212     }
213
214     if (gen6_avc_surface->dmv_bottom_flag &&
215         gen6_avc_surface->dmv_bottom == NULL) {
216         gen6_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
217                                                     "direct mv w/r buffer",
218                                                     DMV_SIZE,
219                                                     0x1000);
220     }
221 }
222
223 static void
224 gen6_mfd_pipe_mode_select(VADriverContextP ctx,
225                           struct decode_state *decode_state,
226                           int standard_select,
227                           struct gen6_mfd_context *gen6_mfd_context)
228 {
229     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
230
231     assert(standard_select == MFX_FORMAT_MPEG2 ||
232            standard_select == MFX_FORMAT_AVC ||
233            standard_select == MFX_FORMAT_VC1);
234
235     BEGIN_BCS_BATCH(batch, 4);
236     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
237     OUT_BCS_BATCH(batch,
238                   (MFD_MODE_VLD << 16) | /* VLD mode */
239                   (0 << 10) | /* disable Stream-Out */
240                   (gen6_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
241                   (gen6_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
242                   (0 << 7)  | /* disable TLB prefectch */
243                   (0 << 5)  | /* not in stitch mode */
244                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
245                   (standard_select << 0));
246     OUT_BCS_BATCH(batch,
247                   (0 << 20) | /* round flag in PB slice */
248                   (0 << 19) | /* round flag in Intra8x8 */
249                   (0 << 7)  | /* expand NOA bus flag */
250                   (1 << 6)  | /* must be 1 */
251                   (0 << 5)  | /* disable clock gating for NOA */
252                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
253                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
254                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
255                   (0 << 1)  | /* AVC long field motion vector */
256                   (1 << 0));  /* always calculate AVC ILDB boundary strength */
257     OUT_BCS_BATCH(batch, 0);
258     ADVANCE_BCS_BATCH(batch);
259 }
260
261 static void
262 gen6_mfd_surface_state(VADriverContextP ctx,
263                        struct decode_state *decode_state,
264                        int standard_select,
265                        struct gen6_mfd_context *gen6_mfd_context)
266 {
267     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
268     struct i965_driver_data *i965 = i965_driver_data(ctx);
269     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
270     assert(obj_surface);
271     
272     BEGIN_BCS_BATCH(batch, 6);
273     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
274     OUT_BCS_BATCH(batch, 0);
275     OUT_BCS_BATCH(batch,
276                   ((obj_surface->orig_height - 1) << 19) |
277                   ((obj_surface->orig_width - 1) << 6));
278     OUT_BCS_BATCH(batch,
279                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
280                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
281                   (0 << 22) | /* surface object control state, FIXME??? */
282                   ((obj_surface->width - 1) << 3) | /* pitch */
283                   (0 << 2)  | /* must be 0 for interleave U/V */
284                   (1 << 1)  | /* must be y-tiled */
285                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, FIXME: must be 1 ??? */
286     OUT_BCS_BATCH(batch,
287                   (0 << 16) | /* must be 0 for interleave U/V */
288                   (obj_surface->height)); /* y offset for U(cb) */
289     OUT_BCS_BATCH(batch, 0);
290     ADVANCE_BCS_BATCH(batch);
291 }
292
293 static void
294 gen6_mfd_pipe_buf_addr_state(VADriverContextP ctx,
295                              struct decode_state *decode_state,
296                              int standard_select,
297                              struct gen6_mfd_context *gen6_mfd_context)
298 {
299     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
300     struct i965_driver_data *i965 = i965_driver_data(ctx);
301     int i;
302
303     BEGIN_BCS_BATCH(batch, 24);
304     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
305     if (gen6_mfd_context->pre_deblocking_output.valid)
306         OUT_BCS_RELOC(batch, gen6_mfd_context->pre_deblocking_output.bo,
307                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
308                       0);
309     else
310         OUT_BCS_BATCH(batch, 0);
311
312     if (gen6_mfd_context->post_deblocking_output.valid)
313         OUT_BCS_RELOC(batch, gen6_mfd_context->post_deblocking_output.bo,
314                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
315                       0);
316     else
317         OUT_BCS_BATCH(batch, 0);
318
319     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
320     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
321
322     if (gen6_mfd_context->intra_row_store_scratch_buffer.valid)
323         OUT_BCS_RELOC(batch, gen6_mfd_context->intra_row_store_scratch_buffer.bo,
324                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
325                       0);
326     else
327         OUT_BCS_BATCH(batch, 0);
328
329     if (gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
330         OUT_BCS_RELOC(batch, gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
331                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
332                       0);
333     else
334         OUT_BCS_BATCH(batch, 0);
335
336     /* DW 7..22 */
337     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
338         struct object_surface *obj_surface;
339
340         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
341             obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
342             assert(obj_surface && obj_surface->bo);
343
344             OUT_BCS_RELOC(batch, obj_surface->bo,
345                           I915_GEM_DOMAIN_INSTRUCTION, 0,
346                           0);
347         } else {
348             OUT_BCS_BATCH(batch, 0);
349         }
350     }
351
352     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
353     ADVANCE_BCS_BATCH(batch);
354 }
355
356 static void
357 gen6_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
358                                  dri_bo *slice_data_bo,
359                                  int standard_select,
360                                  struct gen6_mfd_context *gen6_mfd_context)
361 {
362     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
363
364     BEGIN_BCS_BATCH(batch, 11);
365     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
366     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
367     OUT_BCS_BATCH(batch, 0);
368     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
369     OUT_BCS_BATCH(batch, 0);
370     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
371     OUT_BCS_BATCH(batch, 0);
372     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
373     OUT_BCS_BATCH(batch, 0);
374     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
375     OUT_BCS_BATCH(batch, 0);
376     ADVANCE_BCS_BATCH(batch);
377 }
378
379 static void
380 gen6_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
381                                  struct decode_state *decode_state,
382                                  int standard_select,
383                                  struct gen6_mfd_context *gen6_mfd_context)
384 {
385     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
386
387     BEGIN_BCS_BATCH(batch, 4);
388     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
389
390     if (gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
391         OUT_BCS_RELOC(batch, gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
392                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
393                       0);
394     else
395         OUT_BCS_BATCH(batch, 0);
396
397     if (gen6_mfd_context->mpr_row_store_scratch_buffer.valid)
398         OUT_BCS_RELOC(batch, gen6_mfd_context->mpr_row_store_scratch_buffer.bo,
399                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
400                       0);
401     else
402         OUT_BCS_BATCH(batch, 0);
403
404     if (gen6_mfd_context->bitplane_read_buffer.valid)
405         OUT_BCS_RELOC(batch, gen6_mfd_context->bitplane_read_buffer.bo,
406                       I915_GEM_DOMAIN_INSTRUCTION, 0,
407                       0);
408     else
409         OUT_BCS_BATCH(batch, 0);
410
411     ADVANCE_BCS_BATCH(batch);
412 }
413
414 static void
415 gen6_mfd_aes_state(VADriverContextP ctx,
416                    struct decode_state *decode_state,
417                    int standard_select)
418 {
419     /* FIXME */
420 }
421
422 static void
423 gen6_mfd_wait(VADriverContextP ctx,
424               struct decode_state *decode_state,
425               int standard_select,
426               struct gen6_mfd_context *gen6_mfd_context)
427 {
428     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
429
430     BEGIN_BCS_BATCH(batch, 1);
431     OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8));
432     ADVANCE_BCS_BATCH(batch);
433 }
434
435 static void
436 gen6_mfd_avc_img_state(VADriverContextP ctx,
437                        struct decode_state *decode_state,
438                        struct gen6_mfd_context *gen6_mfd_context)
439 {
440     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
441     int qm_present_flag;
442     int img_struct;
443     int mbaff_frame_flag;
444     unsigned int width_in_mbs, height_in_mbs;
445     VAPictureParameterBufferH264 *pic_param;
446
447     assert(decode_state->pic_param && decode_state->pic_param->buffer);
448     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
449     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
450
451     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
452         qm_present_flag = 1;
453     else
454         qm_present_flag = 0; /* built-in QM matrices */
455
456     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
457         img_struct = 1;
458     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
459         img_struct = 3;
460     else
461         img_struct = 0;
462
463     if ((img_struct & 0x1) == 0x1) {
464         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
465     } else {
466         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
467     }
468
469     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
470         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
471         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
472     } else {
473         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
474     }
475
476     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
477                         !pic_param->pic_fields.bits.field_pic_flag);
478
479     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
480     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
481     assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */
482
483     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
484     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
485            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
486     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
487
488     BEGIN_BCS_BATCH(batch, 13);
489     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
490     OUT_BCS_BATCH(batch, 
491                   ((width_in_mbs * height_in_mbs) & 0x7fff));
492     OUT_BCS_BATCH(batch, 
493                   (height_in_mbs << 16) | 
494                   (width_in_mbs << 0));
495     OUT_BCS_BATCH(batch, 
496                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
497                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
498                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
499                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
500                   (1 << 12) | /* always 1, hardware requirement */
501                   (qm_present_flag << 10) |
502                   (img_struct << 8) |
503                   (16 << 0));
504     OUT_BCS_BATCH(batch,
505                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
506                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
507                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
508                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
509                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
510                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
511                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
512                   (mbaff_frame_flag << 1) |
513                   (pic_param->pic_fields.bits.field_pic_flag << 0));
514     OUT_BCS_BATCH(batch, 0);
515     OUT_BCS_BATCH(batch, 0);
516     OUT_BCS_BATCH(batch, 0);
517     OUT_BCS_BATCH(batch, 0);
518     OUT_BCS_BATCH(batch, 0);
519     OUT_BCS_BATCH(batch, 0);
520     OUT_BCS_BATCH(batch, 0);
521     OUT_BCS_BATCH(batch, 0);
522     ADVANCE_BCS_BATCH(batch);
523 }
524
525 static void
526 gen6_mfd_avc_qm_state(VADriverContextP ctx,
527                       struct decode_state *decode_state,
528                       struct gen6_mfd_context *gen6_mfd_context)
529 {
530     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
531     int cmd_len;
532     VAIQMatrixBufferH264 *iq_matrix;
533     VAPictureParameterBufferH264 *pic_param;
534
535     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
536         return;
537
538     iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
539
540     assert(decode_state->pic_param && decode_state->pic_param->buffer);
541     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
542
543     cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */
544
545     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
546         cmd_len += 2 * 16; /* load two 8x8 scaling matrices */
547
548     BEGIN_BCS_BATCH(batch, cmd_len);
549     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | (cmd_len - 2));
550
551     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
552         OUT_BCS_BATCH(batch, 
553                       (0x0  << 8) | /* don't use default built-in matrices */
554                       (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */
555     else
556         OUT_BCS_BATCH(batch, 
557                       (0x0  << 8) | /* don't use default built-in matrices */
558                       (0x3f << 0)); /* six 4x4 scaling matrices */
559
560     intel_batchbuffer_data(batch, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4);
561
562     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
563         intel_batchbuffer_data(batch, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4);
564
565     ADVANCE_BCS_BATCH(batch);
566 }
567
568 static void
569 gen6_mfd_avc_directmode_state(VADriverContextP ctx,
570                               VAPictureParameterBufferH264 *pic_param,
571                               VASliceParameterBufferH264 *slice_param,
572                               struct gen6_mfd_context *gen6_mfd_context)
573 {
574     struct i965_driver_data *i965 = i965_driver_data(ctx);
575     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
576     struct object_surface *obj_surface;
577     struct gen6_avc_surface *gen6_avc_surface;
578     VAPictureH264 *va_pic;
579     int i, j;
580
581     BEGIN_BCS_BATCH(batch, 69);
582     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
583
584     /* reference surfaces 0..15 */
585     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
586         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
587             obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
588             assert(obj_surface);
589             gen6_avc_surface = obj_surface->private_data;
590
591             if (gen6_avc_surface == NULL) {
592                 OUT_BCS_BATCH(batch, 0);
593                 OUT_BCS_BATCH(batch, 0);
594             } else {
595                 OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
596                               I915_GEM_DOMAIN_INSTRUCTION, 0,
597                               0);
598
599                 if (gen6_avc_surface->dmv_bottom_flag == 1)
600                     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
601                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
602                                   0);
603                 else
604                     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
605                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
606                                   0);
607             }
608         } else {
609             OUT_BCS_BATCH(batch, 0);
610             OUT_BCS_BATCH(batch, 0);
611         }
612     }
613
614     /* the current decoding frame/field */
615     va_pic = &pic_param->CurrPic;
616     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
617     obj_surface = SURFACE(va_pic->picture_id);
618     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
619     gen6_avc_surface = obj_surface->private_data;
620
621     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
622                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
623                   0);
624
625     if (gen6_avc_surface->dmv_bottom_flag == 1)
626         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
627                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
628                       0);
629     else
630         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
631                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
632                       0);
633
634     /* POC List */
635     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
636         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
637             int found = 0;
638             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
639                 va_pic = &pic_param->ReferenceFrames[j];
640                 
641                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
642                     continue;
643
644                 if (va_pic->picture_id == gen6_mfd_context->reference_surface[i].surface_id) {
645                     found = 1;
646                     break;
647                 }
648             }
649
650             assert(found == 1);
651             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
652             
653             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
654             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
655         } else {
656             OUT_BCS_BATCH(batch, 0);
657             OUT_BCS_BATCH(batch, 0);
658         }
659     }
660
661     va_pic = &pic_param->CurrPic;
662     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
663     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
664
665     ADVANCE_BCS_BATCH(batch);
666 }
667
668 static void
669 gen6_mfd_avc_slice_state(VADriverContextP ctx,
670                          VAPictureParameterBufferH264 *pic_param,
671                          VASliceParameterBufferH264 *slice_param,
672                          VASliceParameterBufferH264 *next_slice_param,
673                          struct gen6_mfd_context *gen6_mfd_context)
674 {
675     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
676     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
677     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
678     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
679     int num_ref_idx_l0, num_ref_idx_l1;
680     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
681                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
682     int weighted_pred_idc = 0;
683     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
684     int slice_type;
685
686     if (slice_param->slice_type == SLICE_TYPE_I ||
687         slice_param->slice_type == SLICE_TYPE_SI) {
688         slice_type = SLICE_TYPE_I;
689     } else if (slice_param->slice_type == SLICE_TYPE_P ||
690                slice_param->slice_type == SLICE_TYPE_SP) {
691         slice_type = SLICE_TYPE_P;
692     } else { 
693         assert(slice_param->slice_type == SLICE_TYPE_B);
694         slice_type = SLICE_TYPE_B;
695     }
696
697     if (slice_type == SLICE_TYPE_I) {
698         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
699         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
700         num_ref_idx_l0 = 0;
701         num_ref_idx_l1 = 0;
702     } else if (slice_type == SLICE_TYPE_P) {
703         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
704         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
705         num_ref_idx_l1 = 0;
706         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_pred_flag == 1);
707     } else {
708         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
709         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
710         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_bipred_idc == 1);
711     }
712
713     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
714     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
715     slice_ver_pos = first_mb_in_slice / width_in_mbs;
716
717     if (next_slice_param) {
718         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
719         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
720         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
721     } else {
722         next_slice_hor_pos = 0;
723         next_slice_ver_pos = height_in_mbs;
724     }
725
726     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
727     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
728     OUT_BCS_BATCH(batch, slice_type);
729     OUT_BCS_BATCH(batch, 
730                   (num_ref_idx_l1 << 24) |
731                   (num_ref_idx_l0 << 16) |
732                   (slice_param->chroma_log2_weight_denom << 8) |
733                   (slice_param->luma_log2_weight_denom << 0));
734     OUT_BCS_BATCH(batch, 
735                   (weighted_pred_idc << 30) |
736                   (slice_param->direct_spatial_mv_pred_flag << 29) |
737                   (slice_param->disable_deblocking_filter_idc << 27) |
738                   (slice_param->cabac_init_idc << 24) |
739                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
740                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
741                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
742     OUT_BCS_BATCH(batch, 
743                   (slice_ver_pos << 24) |
744                   (slice_hor_pos << 16) | 
745                   (first_mb_in_slice << 0));
746     OUT_BCS_BATCH(batch,
747                   (next_slice_ver_pos << 16) |
748                   (next_slice_hor_pos << 0));
749     OUT_BCS_BATCH(batch, 
750                   (next_slice_param == NULL) << 19); /* last slice flag */
751     OUT_BCS_BATCH(batch, 0);
752     OUT_BCS_BATCH(batch, 0);
753     OUT_BCS_BATCH(batch, 0);
754     OUT_BCS_BATCH(batch, 0);
755     ADVANCE_BCS_BATCH(batch);
756 }
757
758 static void
759 gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx,
760                                  VAPictureParameterBufferH264 *pic_param,
761                                  struct gen6_mfd_context *gen6_mfd_context)
762 {
763     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
764     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
765     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
766
767     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
768     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
769     OUT_BCS_BATCH(batch, 0);
770     OUT_BCS_BATCH(batch, 0);
771     OUT_BCS_BATCH(batch, 0);
772     OUT_BCS_BATCH(batch,
773                   height_in_mbs << 24 |
774                   width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
775     OUT_BCS_BATCH(batch, 0);
776     OUT_BCS_BATCH(batch, 0);
777     OUT_BCS_BATCH(batch, 0);
778     OUT_BCS_BATCH(batch, 0);
779     OUT_BCS_BATCH(batch, 0);
780     OUT_BCS_BATCH(batch, 0);
781     ADVANCE_BCS_BATCH(batch);
782 }
783
784 static void
785 gen6_mfd_avc_ref_idx_state(VADriverContextP ctx,
786                            VAPictureParameterBufferH264 *pic_param,
787                            VASliceParameterBufferH264 *slice_param,
788                            struct gen6_mfd_context *gen6_mfd_context)
789 {
790     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
791     int i, j, num_ref_list;
792     struct {
793         unsigned char bottom_idc:1;
794         unsigned char frame_store_index:4;
795         unsigned char field_picture:1;
796         unsigned char long_term:1;
797         unsigned char non_exist:1;
798     } refs[32];
799
800     if (slice_param->slice_type == SLICE_TYPE_I ||
801         slice_param->slice_type == SLICE_TYPE_SI)
802         return;
803
804     if (slice_param->slice_type == SLICE_TYPE_P ||
805         slice_param->slice_type == SLICE_TYPE_SP) {
806         num_ref_list = 1;
807     } else {
808         num_ref_list = 2;
809     }
810
811     for (i = 0; i < num_ref_list; i++) {
812         VAPictureH264 *va_pic;
813
814         if (i == 0) {
815             va_pic = slice_param->RefPicList0;
816         } else {
817             va_pic = slice_param->RefPicList1;
818         }
819
820         BEGIN_BCS_BATCH(batch, 10);
821         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | (10 - 2));
822         OUT_BCS_BATCH(batch, i);
823
824         for (j = 0; j < 32; j++) {
825             if (va_pic->flags & VA_PICTURE_H264_INVALID) {
826                 refs[j].non_exist = 1;
827                 refs[j].long_term = 1;
828                 refs[j].field_picture = 1;
829                 refs[j].frame_store_index = 0xf;
830                 refs[j].bottom_idc = 1;
831             } else {
832                 int frame_idx;
833                 
834                 for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) {
835                     if (gen6_mfd_context->reference_surface[frame_idx].surface_id != VA_INVALID_ID &&
836                         va_pic->picture_id == gen6_mfd_context->reference_surface[frame_idx].surface_id) {
837                         assert(frame_idx == gen6_mfd_context->reference_surface[frame_idx].frame_store_id);
838                         break;
839                     }
840                 }
841
842                 assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface));
843                 
844                 refs[j].non_exist = 0;
845                 refs[j].long_term = !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
846                 refs[j].field_picture = !!(va_pic->flags & 
847                                            (VA_PICTURE_H264_TOP_FIELD | 
848                                             VA_PICTURE_H264_BOTTOM_FIELD));
849                 refs[j].frame_store_index = frame_idx;
850                 refs[j].bottom_idc = !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
851             }
852
853             va_pic++;
854         }
855         
856         intel_batchbuffer_data(batch, refs, sizeof(refs));
857         ADVANCE_BCS_BATCH(batch);
858     }
859 }
860
861 static void
862 gen6_mfd_avc_weightoffset_state(VADriverContextP ctx,
863                                 VAPictureParameterBufferH264 *pic_param,
864                                 VASliceParameterBufferH264 *slice_param,
865                                 struct gen6_mfd_context *gen6_mfd_context)
866 {
867     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
868     int i, j, num_weight_offset_table = 0;
869     short weightoffsets[32 * 6];
870
871     if ((slice_param->slice_type == SLICE_TYPE_P ||
872          slice_param->slice_type == SLICE_TYPE_SP) &&
873         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
874         num_weight_offset_table = 1;
875     }
876     
877     if ((slice_param->slice_type == SLICE_TYPE_B) &&
878         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
879         num_weight_offset_table = 2;
880     }
881
882     for (i = 0; i < num_weight_offset_table; i++) {
883         BEGIN_BCS_BATCH(batch, 98);
884         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
885         OUT_BCS_BATCH(batch, i);
886
887         if (i == 0) {
888             for (j = 0; j < 32; j++) {
889                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
890                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
891                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
892                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
893                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
894                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
895             }
896         } else {
897             for (j = 0; j < 32; j++) {
898                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
899                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
900                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
901                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
902                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
903                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
904             }
905         }
906
907         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
908         ADVANCE_BCS_BATCH(batch);
909     }
910 }
911
912 static int
913 gen6_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
914 {
915     int out_slice_data_bit_offset;
916     int slice_header_size = in_slice_data_bit_offset / 8;
917     int i, j;
918
919     for (i = 0, j = 0; i < slice_header_size; i++, j++) {
920         if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
921             i++, j += 2;
922         }
923     }
924
925     out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
926
927     if (mode_flag == ENTROPY_CABAC)
928         out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
929
930     return out_slice_data_bit_offset;
931 }
932
933 static void
934 gen6_mfd_avc_bsd_object(VADriverContextP ctx,
935                         VAPictureParameterBufferH264 *pic_param,
936                         VASliceParameterBufferH264 *slice_param,
937                         dri_bo *slice_data_bo,
938                         struct gen6_mfd_context *gen6_mfd_context)
939 {
940     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
941     int slice_data_bit_offset;
942     uint8_t *slice_data = NULL;
943
944     dri_bo_map(slice_data_bo, 0);
945     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
946     slice_data_bit_offset = gen6_mfd_avc_get_slice_bit_offset(slice_data,
947                                                               pic_param->pic_fields.bits.entropy_coding_mode_flag,
948                                                               slice_param->slice_data_bit_offset);
949     dri_bo_unmap(slice_data_bo);
950
951     BEGIN_BCS_BATCH(batch, 6);
952     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
953     OUT_BCS_BATCH(batch, 
954                   ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
955     OUT_BCS_BATCH(batch, slice_param->slice_data_offset + (slice_data_bit_offset >> 3));
956     OUT_BCS_BATCH(batch,
957                   (0 << 31) |
958                   (0 << 14) |
959                   (0 << 12) |
960                   (0 << 10) |
961                   (0 << 8));
962     OUT_BCS_BATCH(batch,
963                   (0 << 16) |
964                   (0 << 6)  |
965                   ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
966     OUT_BCS_BATCH(batch, 0);
967     ADVANCE_BCS_BATCH(batch);
968 }
969
970 static void
971 gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx,
972                                       VAPictureParameterBufferH264 *pic_param,
973                                       struct gen6_mfd_context *gen6_mfd_context)
974 {
975     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
976
977     BEGIN_BCS_BATCH(batch, 6);
978     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
979     OUT_BCS_BATCH(batch, 0);
980     OUT_BCS_BATCH(batch, 0);
981     OUT_BCS_BATCH(batch, 0);
982     OUT_BCS_BATCH(batch, 0);
983     OUT_BCS_BATCH(batch, 0);
984     ADVANCE_BCS_BATCH(batch);
985 }
986
987 static void
988 gen6_mfd_avc_phantom_slice(VADriverContextP ctx,
989                            VAPictureParameterBufferH264 *pic_param,
990                            struct gen6_mfd_context *gen6_mfd_context)
991 {
992     gen6_mfd_avc_phantom_slice_state(ctx, pic_param, gen6_mfd_context);
993     gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param, gen6_mfd_context);
994 }
995
996 static void
997 gen6_mfd_avc_decode_init(VADriverContextP ctx,
998                          struct decode_state *decode_state,
999                          struct gen6_mfd_context *gen6_mfd_context)
1000 {
1001     VAPictureParameterBufferH264 *pic_param;
1002     VASliceParameterBufferH264 *slice_param;
1003     VAPictureH264 *va_pic;
1004     struct i965_driver_data *i965 = i965_driver_data(ctx);
1005     struct object_surface *obj_surface;
1006     dri_bo *bo;
1007     int i, j, enable_avc_ildb = 0;
1008
1009     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
1010         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1011         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1012
1013         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1014             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1015             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1016                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1017                    (slice_param->slice_type == SLICE_TYPE_P) ||
1018                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1019                    (slice_param->slice_type == SLICE_TYPE_B));
1020
1021             if (slice_param->disable_deblocking_filter_idc != 1) {
1022                 enable_avc_ildb = 1;
1023                 break;
1024             }
1025
1026             slice_param++;
1027         }
1028     }
1029
1030     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1031     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1032     gen6_mfd_avc_frame_store_index(ctx, pic_param, gen6_mfd_context);
1033
1034     /* Current decoded picture */
1035     va_pic = &pic_param->CurrPic;
1036     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
1037     obj_surface = SURFACE(va_pic->picture_id);
1038     assert(obj_surface);
1039     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
1040     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
1041     gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
1042     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
1043
1044     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
1045     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1046     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
1047     gen6_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
1048
1049     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1050     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1051     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1052     gen6_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
1053
1054     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
1055     bo = dri_bo_alloc(i965->intel.bufmgr,
1056                       "intra row store",
1057                       128 * 64,
1058                       0x1000);
1059     assert(bo);
1060     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1061     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1062
1063     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1064     bo = dri_bo_alloc(i965->intel.bufmgr,
1065                       "deblocking filter row store",
1066                       30720, /* 4 * 120 * 64 */
1067                       0x1000);
1068     assert(bo);
1069     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1070     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1071
1072     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1073     bo = dri_bo_alloc(i965->intel.bufmgr,
1074                       "bsd mpc row store",
1075                       11520, /* 1.5 * 120 * 64 */
1076                       0x1000);
1077     assert(bo);
1078     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1079     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1080
1081     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
1082     bo = dri_bo_alloc(i965->intel.bufmgr,
1083                       "mpr row store",
1084                       7680, /* 1. 0 * 120 * 64 */
1085                       0x1000);
1086     assert(bo);
1087     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
1088     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
1089
1090     gen6_mfd_context->bitplane_read_buffer.valid = 0;
1091 }
1092
1093 static void
1094 gen6_mfd_avc_decode_picture(VADriverContextP ctx,
1095                             struct decode_state *decode_state,
1096                             struct gen6_mfd_context *gen6_mfd_context)
1097 {
1098     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1099     VAPictureParameterBufferH264 *pic_param;
1100     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
1101     dri_bo *slice_data_bo;
1102     int i, j;
1103
1104     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1105     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1106     gen6_mfd_avc_decode_init(ctx, decode_state, gen6_mfd_context);
1107
1108     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1109     intel_batchbuffer_emit_mi_flush(batch);
1110     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1111     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1112     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1113     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1114     gen6_mfd_avc_img_state(ctx, decode_state, gen6_mfd_context);
1115     gen6_mfd_avc_qm_state(ctx, decode_state, gen6_mfd_context);
1116
1117     for (j = 0; j < decode_state->num_slice_params; j++) {
1118         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1119         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1120         slice_data_bo = decode_state->slice_datas[j]->bo;
1121         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen6_mfd_context);
1122
1123         if (j == decode_state->num_slice_params - 1)
1124             next_slice_group_param = NULL;
1125         else
1126             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1127
1128         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1129             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1130             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1131                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1132                    (slice_param->slice_type == SLICE_TYPE_P) ||
1133                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1134                    (slice_param->slice_type == SLICE_TYPE_B));
1135
1136             if (i < decode_state->slice_params[j]->num_elements - 1)
1137                 next_slice_param = slice_param + 1;
1138             else
1139                 next_slice_param = next_slice_group_param;
1140
1141             gen6_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen6_mfd_context);
1142             gen6_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
1143             gen6_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen6_mfd_context);
1144             gen6_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen6_mfd_context);
1145             gen6_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen6_mfd_context);
1146             slice_param++;
1147         }
1148     }
1149     
1150     gen6_mfd_avc_phantom_slice(ctx, pic_param, gen6_mfd_context);
1151     intel_batchbuffer_end_atomic(batch);
1152     intel_batchbuffer_flush(batch);
1153 }
1154
1155 static void
1156 gen6_mfd_mpeg2_decode_init(VADriverContextP ctx,
1157                            struct decode_state *decode_state,
1158                            struct gen6_mfd_context *gen6_mfd_context)
1159 {
1160     VAPictureParameterBufferMPEG2 *pic_param;
1161     struct i965_driver_data *i965 = i965_driver_data(ctx);
1162     struct object_surface *obj_surface;
1163     int i;
1164     dri_bo *bo;
1165
1166     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1167     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1168
1169     /* reference picture */
1170     obj_surface = SURFACE(pic_param->forward_reference_picture);
1171
1172     if (obj_surface && obj_surface->bo)
1173         gen6_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1174     else
1175         gen6_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1176
1177     obj_surface = SURFACE(pic_param->backward_reference_picture);
1178
1179     if (obj_surface && obj_surface->bo)
1180         gen6_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1181     else
1182         gen6_mfd_context->reference_surface[1].surface_id = gen6_mfd_context->reference_surface[0].surface_id;
1183
1184     /* must do so !!! */
1185     for (i = 2; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++)
1186         gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[i % 2].surface_id;
1187
1188     /* Current decoded picture */
1189     obj_surface = SURFACE(decode_state->current_render_target);
1190     assert(obj_surface);
1191     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
1192
1193     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1194     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1195     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1196     gen6_mfd_context->pre_deblocking_output.valid = 1;
1197
1198     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1199     bo = dri_bo_alloc(i965->intel.bufmgr,
1200                       "bsd mpc row store",
1201                       11520, /* 1.5 * 120 * 64 */
1202                       0x1000);
1203     assert(bo);
1204     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1205     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1206
1207     gen6_mfd_context->post_deblocking_output.valid = 0;
1208     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1209     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1210     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1211     gen6_mfd_context->bitplane_read_buffer.valid = 0;
1212 }
1213
1214 static void
1215 gen6_mfd_mpeg2_pic_state(VADriverContextP ctx,
1216                          struct decode_state *decode_state,
1217                          struct gen6_mfd_context *gen6_mfd_context)
1218 {
1219     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1220     VAPictureParameterBufferMPEG2 *pic_param;
1221
1222     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1223     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1224
1225     BEGIN_BCS_BATCH(batch, 4);
1226     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (4 - 2));
1227     OUT_BCS_BATCH(batch,
1228                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1229                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1230                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1231                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1232                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1233                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1234                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1235                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1236                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1237                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1238                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1239                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1240     OUT_BCS_BATCH(batch,
1241                   pic_param->picture_coding_type << 9);
1242     OUT_BCS_BATCH(batch,
1243                   (ALIGN(pic_param->vertical_size, 16) / 16) << 16 |
1244                   (ALIGN(pic_param->horizontal_size, 16) / 16));
1245     ADVANCE_BCS_BATCH(batch);
1246 }
1247
1248 static void
1249 gen6_mfd_mpeg2_qm_state(VADriverContextP ctx,
1250                         struct decode_state *decode_state,
1251                         struct gen6_mfd_context *gen6_mfd_context)
1252 {
1253     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1254     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen6_mfd_context->iq_matrix.mpeg2;
1255     int i, j;
1256
1257     /* Update internal QM state */
1258     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1259         VAIQMatrixBufferMPEG2 * const iq_matrix =
1260             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1261
1262         gen_iq_matrix->load_intra_quantiser_matrix =
1263             iq_matrix->load_intra_quantiser_matrix;
1264         if (iq_matrix->load_intra_quantiser_matrix) {
1265             for (j = 0; j < 64; j++)
1266                 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1267                     iq_matrix->intra_quantiser_matrix[j];
1268         }
1269
1270         gen_iq_matrix->load_non_intra_quantiser_matrix =
1271             iq_matrix->load_non_intra_quantiser_matrix;
1272         if (iq_matrix->load_non_intra_quantiser_matrix) {
1273             for (j = 0; j < 64; j++)
1274                 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1275                     iq_matrix->non_intra_quantiser_matrix[j];
1276         }
1277     }
1278
1279     /* Commit QM state to HW */
1280     for (i = 0; i < 2; i++) {
1281         unsigned char *qm = NULL;
1282
1283         if (i == 0) {
1284             if (gen_iq_matrix->load_intra_quantiser_matrix)
1285                 qm = gen_iq_matrix->intra_quantiser_matrix;
1286         } else {
1287             if (gen_iq_matrix->load_non_intra_quantiser_matrix)
1288                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1289         }
1290
1291         if (!qm)
1292             continue;
1293
1294         BEGIN_BCS_BATCH(batch, 18);
1295         OUT_BCS_BATCH(batch, MFX_MPEG2_QM_STATE | (18 - 2));
1296         OUT_BCS_BATCH(batch, i);
1297         intel_batchbuffer_data(batch, qm, 64);
1298         ADVANCE_BCS_BATCH(batch);
1299     }
1300 }
1301
1302 static void
1303 gen6_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1304                           VAPictureParameterBufferMPEG2 *pic_param,
1305                           VASliceParameterBufferMPEG2 *slice_param,
1306                           VASliceParameterBufferMPEG2 *next_slice_param,
1307                           struct gen6_mfd_context *gen6_mfd_context)
1308 {
1309     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1310     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1311     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic = 0;
1312
1313     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1314         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1315         is_field_pic = 1;
1316
1317     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic);
1318     hpos0 = slice_param->slice_horizontal_position;
1319
1320     if (next_slice_param == NULL) {
1321         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1322         hpos1 = 0;
1323     } else {
1324         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic);
1325         hpos1 = next_slice_param->slice_horizontal_position;
1326     }
1327
1328     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1329
1330     BEGIN_BCS_BATCH(batch, 5);
1331     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1332     OUT_BCS_BATCH(batch, 
1333                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1334     OUT_BCS_BATCH(batch, 
1335                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1336     OUT_BCS_BATCH(batch,
1337                   hpos0 << 24 |
1338                   vpos0 << 16 |
1339                   mb_count << 8 |
1340                   (next_slice_param == NULL) << 5 |
1341                   (next_slice_param == NULL) << 3 |
1342                   (slice_param->macroblock_offset & 0x7));
1343     OUT_BCS_BATCH(batch,
1344                   slice_param->quantiser_scale_code << 24);
1345     ADVANCE_BCS_BATCH(batch);
1346 }
1347
1348 static void
1349 gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1350                               struct decode_state *decode_state,
1351                               struct gen6_mfd_context *gen6_mfd_context)
1352 {
1353     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1354     VAPictureParameterBufferMPEG2 *pic_param;
1355     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1356     dri_bo *slice_data_bo;
1357     int i, j;
1358
1359     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1360     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1361
1362     gen6_mfd_mpeg2_decode_init(ctx, decode_state, gen6_mfd_context);
1363     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1364     intel_batchbuffer_emit_mi_flush(batch);
1365     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1366     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1367     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1368     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1369     gen6_mfd_mpeg2_pic_state(ctx, decode_state, gen6_mfd_context);
1370     gen6_mfd_mpeg2_qm_state(ctx, decode_state, gen6_mfd_context);
1371
1372     for (j = 0; j < decode_state->num_slice_params; j++) {
1373         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1374         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1375         slice_data_bo = decode_state->slice_datas[j]->bo;
1376         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen6_mfd_context);
1377
1378         if (j == decode_state->num_slice_params - 1)
1379             next_slice_group_param = NULL;
1380         else
1381             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1382
1383         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1384             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1385
1386             if (i < decode_state->slice_params[j]->num_elements - 1)
1387                 next_slice_param = slice_param + 1;
1388             else
1389                 next_slice_param = next_slice_group_param;
1390
1391             gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
1392             slice_param++;
1393         }
1394     }
1395
1396     intel_batchbuffer_end_atomic(batch);
1397     intel_batchbuffer_flush(batch);
1398 }
1399
1400 static const int va_to_gen6_vc1_pic_type[5] = {
1401     GEN6_VC1_I_PICTURE,
1402     GEN6_VC1_P_PICTURE,
1403     GEN6_VC1_B_PICTURE,
1404     GEN6_VC1_BI_PICTURE,
1405     GEN6_VC1_P_PICTURE,
1406 };
1407
1408 static const int va_to_gen6_vc1_mv[4] = {
1409     1, /* 1-MV */
1410     2, /* 1-MV half-pel */
1411     3, /* 1-MV half-pef bilinear */
1412     0, /* Mixed MV */
1413 };
1414
1415 static const int b_picture_scale_factor[21] = {
1416     128, 85,  170, 64,  192,
1417     51,  102, 153, 204, 43,
1418     215, 37,  74,  111, 148,
1419     185, 222, 32,  96,  160, 
1420     224,
1421 };
1422
1423 static const int va_to_gen6_vc1_condover[3] = {
1424     0,
1425     2,
1426     3
1427 };
1428
1429 static const int va_to_gen6_vc1_profile[4] = {
1430     GEN6_VC1_SIMPLE_PROFILE,
1431     GEN6_VC1_MAIN_PROFILE,
1432     GEN6_VC1_RESERVED_PROFILE,
1433     GEN6_VC1_ADVANCED_PROFILE
1434 };
1435
1436 static void 
1437 gen6_mfd_free_vc1_surface(void **data)
1438 {
1439     struct gen6_vc1_surface *gen6_vc1_surface = *data;
1440
1441     if (!gen6_vc1_surface)
1442         return;
1443
1444     dri_bo_unreference(gen6_vc1_surface->dmv);
1445     free(gen6_vc1_surface);
1446     *data = NULL;
1447 }
1448
1449 static void
1450 gen6_mfd_init_vc1_surface(VADriverContextP ctx, 
1451                           VAPictureParameterBufferVC1 *pic_param,
1452                           struct object_surface *obj_surface)
1453 {
1454     struct i965_driver_data *i965 = i965_driver_data(ctx);
1455     struct gen6_vc1_surface *gen6_vc1_surface = obj_surface->private_data;
1456
1457     obj_surface->free_private_data = gen6_mfd_free_vc1_surface;
1458
1459     if (!gen6_vc1_surface) {
1460         gen6_vc1_surface = calloc(sizeof(struct gen6_vc1_surface), 1);
1461         assert((obj_surface->size & 0x3f) == 0);
1462         obj_surface->private_data = gen6_vc1_surface;
1463     }
1464
1465     gen6_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1466
1467     if (gen6_vc1_surface->dmv == NULL) {
1468         gen6_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1469                                              "direct mv w/r buffer",
1470                                              557056,    /* 64 * 128 * 64 */
1471                                              0x1000);
1472     }
1473 }
1474
1475 static void
1476 gen6_mfd_vc1_decode_init(VADriverContextP ctx,
1477                          struct decode_state *decode_state,
1478                          struct gen6_mfd_context *gen6_mfd_context)
1479 {
1480     VAPictureParameterBufferVC1 *pic_param;
1481     struct i965_driver_data *i965 = i965_driver_data(ctx);
1482     struct object_surface *obj_surface;
1483     int i;
1484     dri_bo *bo;
1485
1486     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1487     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1488
1489     /* reference picture */
1490     obj_surface = SURFACE(pic_param->forward_reference_picture);
1491
1492     if (obj_surface && obj_surface->bo)
1493         gen6_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1494     else
1495         gen6_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1496
1497     obj_surface = SURFACE(pic_param->backward_reference_picture);
1498
1499     if (obj_surface && obj_surface->bo)
1500         gen6_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1501     else
1502         gen6_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1503
1504     /* must do so !!! */
1505     for (i = 2; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++)
1506         gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[i % 2].surface_id;
1507
1508     /* Current decoded picture */
1509     obj_surface = SURFACE(decode_state->current_render_target);
1510     assert(obj_surface);
1511     gen6_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1512     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
1513
1514     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
1515     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1516     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
1517     gen6_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1518
1519     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1520     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1521     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1522     gen6_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1523
1524     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
1525     bo = dri_bo_alloc(i965->intel.bufmgr,
1526                       "intra row store",
1527                       128 * 64,
1528                       0x1000);
1529     assert(bo);
1530     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1531     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1532
1533     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1534     bo = dri_bo_alloc(i965->intel.bufmgr,
1535                       "deblocking filter row store",
1536                       46080, /* 6 * 120 * 64 */
1537                       0x1000);
1538     assert(bo);
1539     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1540     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1541
1542     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1543     bo = dri_bo_alloc(i965->intel.bufmgr,
1544                       "bsd mpc row store",
1545                       11520, /* 1.5 * 120 * 64 */
1546                       0x1000);
1547     assert(bo);
1548     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1549     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1550
1551     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1552
1553     gen6_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1554     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
1555     
1556     if (gen6_mfd_context->bitplane_read_buffer.valid) {
1557         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1558         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1559         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1560         int src_w, src_h;
1561         uint8_t *src = NULL, *dst = NULL;
1562
1563         assert(decode_state->bit_plane->buffer);
1564         src = decode_state->bit_plane->buffer;
1565
1566         bo = dri_bo_alloc(i965->intel.bufmgr,
1567                           "VC-1 Bitplane",
1568                           bitplane_width * bitplane_width,
1569                           0x1000);
1570         assert(bo);
1571         gen6_mfd_context->bitplane_read_buffer.bo = bo;
1572
1573         dri_bo_map(bo, True);
1574         assert(bo->virtual);
1575         dst = bo->virtual;
1576
1577         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1578             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1579                 int src_index, dst_index;
1580                 int src_shift;
1581                 uint8_t src_value;
1582
1583                 src_index = (src_h * width_in_mbs + src_w) / 2;
1584                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1585                 src_value = ((src[src_index] >> src_shift) & 0xf);
1586
1587                 dst_index = src_w / 2;
1588                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1589             }
1590
1591             if (src_w & 1)
1592                 dst[src_w / 2] >>= 4;
1593
1594             dst += bitplane_width;
1595         }
1596
1597         dri_bo_unmap(bo);
1598     } else
1599         gen6_mfd_context->bitplane_read_buffer.bo = NULL;
1600 }
1601
1602 static void
1603 gen6_mfd_vc1_pic_state(VADriverContextP ctx,
1604                        struct decode_state *decode_state,
1605                        struct gen6_mfd_context *gen6_mfd_context)
1606 {
1607     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1608     VAPictureParameterBufferVC1 *pic_param;
1609     struct i965_driver_data *i965 = i965_driver_data(ctx);
1610     struct object_surface *obj_surface;
1611     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1612     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1613     int unified_mv_mode;
1614     int ref_field_pic_polarity = 0;
1615     int scale_factor = 0;
1616     int trans_ac_y = 0;
1617     int dmv_surface_valid = 0;
1618     int brfd = 0;
1619     int fcm = 0;
1620     int picture_type;
1621     int profile;
1622     int overlap;
1623
1624     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1625     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1626
1627     profile = va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile];
1628     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1629     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1630     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1631     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1632     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1633     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1634     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1635
1636     if (dquant == 0) {
1637         alt_pquant_config = 0;
1638         alt_pquant_edge_mask = 0;
1639     } else if (dquant == 2) {
1640         alt_pquant_config = 1;
1641         alt_pquant_edge_mask = 0xf;
1642     } else {
1643         assert(dquant == 1);
1644         if (dquantfrm == 0) {
1645             alt_pquant_config = 0;
1646             alt_pquant_edge_mask = 0;
1647             alt_pq = 0;
1648         } else {
1649             assert(dquantfrm == 1);
1650             alt_pquant_config = 1;
1651
1652             switch (dqprofile) {
1653             case 3:
1654                 if (dqbilevel == 0) {
1655                     alt_pquant_config = 2;
1656                     alt_pquant_edge_mask = 0;
1657                 } else {
1658                     assert(dqbilevel == 1);
1659                     alt_pquant_config = 3;
1660                     alt_pquant_edge_mask = 0;
1661                 }
1662                 break;
1663                 
1664             case 0:
1665                 alt_pquant_edge_mask = 0xf;
1666                 break;
1667
1668             case 1:
1669                 if (dqdbedge == 3)
1670                     alt_pquant_edge_mask = 0x9;
1671                 else
1672                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1673
1674                 break;
1675
1676             case 2:
1677                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1678                 break;
1679
1680             default:
1681                 assert(0);
1682             }
1683         }
1684     }
1685
1686     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1687         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1688         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1689     } else {
1690         assert(pic_param->mv_fields.bits.mv_mode < 4);
1691         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1692     }
1693
1694     if (pic_param->sequence_fields.bits.interlace == 1 &&
1695         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1696         /* FIXME: calculate reference field picture polarity */
1697         assert(0);
1698         ref_field_pic_polarity = 0;
1699     }
1700
1701     if (pic_param->b_picture_fraction < 21)
1702         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1703
1704     picture_type = va_to_gen6_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1705     
1706     if (profile == GEN6_VC1_ADVANCED_PROFILE && 
1707         picture_type == GEN6_VC1_I_PICTURE)
1708         picture_type = GEN6_VC1_BI_PICTURE;
1709
1710     if (picture_type == GEN6_VC1_I_PICTURE || picture_type == GEN6_VC1_BI_PICTURE) /* I picture */
1711         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1712     else
1713         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1714
1715
1716     if (picture_type == GEN6_VC1_B_PICTURE) {
1717         struct gen6_vc1_surface *gen6_vc1_surface = NULL;
1718
1719         obj_surface = SURFACE(pic_param->backward_reference_picture);
1720         assert(obj_surface);
1721         gen6_vc1_surface = obj_surface->private_data;
1722
1723         if (!gen6_vc1_surface || 
1724             (va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_I_PICTURE ||
1725              va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_BI_PICTURE))
1726             dmv_surface_valid = 0;
1727         else
1728             dmv_surface_valid = 1;
1729     }
1730
1731     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1732
1733     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1734         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1735     else {
1736         if (pic_param->picture_fields.bits.top_field_first)
1737             fcm = 2;
1738         else
1739             fcm = 3;
1740     }
1741
1742     if (pic_param->picture_fields.bits.picture_type == GEN6_VC1_B_PICTURE) { /* B picture */
1743         brfd = pic_param->reference_fields.bits.reference_distance;
1744         brfd = (scale_factor * brfd) >> 8;
1745         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1746
1747         if (brfd < 0)
1748             brfd = 0;
1749     }
1750
1751     overlap = pic_param->sequence_fields.bits.overlap;
1752     if (profile != GEN6_VC1_ADVANCED_PROFILE && pic_param->pic_quantizer_fields.bits.pic_quantizer_scale < 9)
1753         overlap = 0;
1754
1755     assert(pic_param->conditional_overlap_flag < 3);
1756     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1757
1758     BEGIN_BCS_BATCH(batch, 6);
1759     OUT_BCS_BATCH(batch, MFX_VC1_PIC_STATE | (6 - 2));
1760     OUT_BCS_BATCH(batch,
1761                   (ALIGN(pic_param->coded_height, 16) / 16) << 16 |
1762                   (ALIGN(pic_param->coded_width, 16) / 16));
1763     OUT_BCS_BATCH(batch,
1764                   pic_param->sequence_fields.bits.syncmarker << 31 |
1765                   1 << 29 | /* concealment */
1766                   alt_pq << 24 |
1767                   pic_param->entrypoint_fields.bits.loopfilter << 23 |
1768                   overlap << 22 |
1769                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 21 | /* implicit quantizer */
1770                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 16 |
1771                   alt_pquant_edge_mask << 12 |
1772                   alt_pquant_config << 10 |
1773                   pic_param->pic_quantizer_fields.bits.half_qp << 9 |
1774                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 8 |
1775                   va_to_gen6_vc1_condover[pic_param->conditional_overlap_flag] << 6 |
1776                   !pic_param->picture_fields.bits.is_first_field << 5 |
1777                   picture_type << 2 |
1778                   fcm << 0);
1779     OUT_BCS_BATCH(batch,
1780                   !!pic_param->bitplane_present.value << 23 |
1781                   !pic_param->bitplane_present.flags.bp_forward_mb << 22 |
1782                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 21 |
1783                   !pic_param->bitplane_present.flags.bp_skip_mb << 20 |
1784                   !pic_param->bitplane_present.flags.bp_direct_mb << 19 |
1785                   !pic_param->bitplane_present.flags.bp_overflags << 18 |
1786                   !pic_param->bitplane_present.flags.bp_ac_pred << 17 |
1787                   !pic_param->bitplane_present.flags.bp_field_tx << 16 |
1788                   pic_param->mv_fields.bits.extended_dmv_range << 14 |
1789                   pic_param->mv_fields.bits.extended_mv_range << 12 |
1790                   pic_param->mv_fields.bits.four_mv_switch << 11 |
1791                   pic_param->fast_uvmc_flag << 10 |
1792                   unified_mv_mode << 8 |
1793                   ref_field_pic_polarity << 6 |
1794                   pic_param->reference_fields.bits.num_reference_pictures << 5 |
1795                   pic_param->reference_fields.bits.reference_distance << 0);
1796     OUT_BCS_BATCH(batch,
1797                   scale_factor << 24 |
1798                   pic_param->mv_fields.bits.mv_table << 20 |
1799                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1800                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1801                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1802                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1803                   pic_param->mb_mode_table << 8 |
1804                   trans_ac_y << 6 |
1805                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1806                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1807                   pic_param->cbp_table << 0);
1808     OUT_BCS_BATCH(batch,
1809                   dmv_surface_valid << 13 |
1810                   brfd << 8 |
1811                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1));
1812     ADVANCE_BCS_BATCH(batch);
1813 }
1814
1815 static void
1816 gen6_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1817                              struct decode_state *decode_state,
1818                              struct gen6_mfd_context *gen6_mfd_context)
1819 {
1820     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1821     VAPictureParameterBufferVC1 *pic_param;
1822     int interpolation_mode = 0;
1823     int intensitycomp_single;
1824
1825     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1826     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1827
1828     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1829         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1830          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1831         interpolation_mode = 2; /* Half-pel bilinear */
1832     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1833              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1834               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1835         interpolation_mode = 0; /* Half-pel bicubic */
1836     else
1837         interpolation_mode = 1; /* Quarter-pel bicubic */
1838
1839     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1840     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1841     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1842
1843     BEGIN_BCS_BATCH(batch, 7);
1844     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (7 - 2));
1845     OUT_BCS_BATCH(batch,
1846                   0 << 8 | /* FIXME: interlace mode */
1847                   pic_param->rounding_control << 4 |
1848                   va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile] << 2);
1849     OUT_BCS_BATCH(batch,
1850                   pic_param->luma_shift << 16 |
1851                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1852     OUT_BCS_BATCH(batch, 0);
1853     OUT_BCS_BATCH(batch, 0);
1854     OUT_BCS_BATCH(batch, 0);
1855     OUT_BCS_BATCH(batch,
1856                   interpolation_mode << 19 |
1857                   pic_param->fast_uvmc_flag << 18 |
1858                   0 << 17 | /* FIXME: scale up or down ??? */
1859                   pic_param->range_reduction_frame << 16 |
1860                   0 << 6 | /* FIXME: double ??? */
1861                   0 << 4 |
1862                   intensitycomp_single << 2 |
1863                   intensitycomp_single << 0);
1864     ADVANCE_BCS_BATCH(batch);
1865 }
1866
1867
1868 static void
1869 gen6_mfd_vc1_directmode_state(VADriverContextP ctx,
1870                               struct decode_state *decode_state,
1871                               struct gen6_mfd_context *gen6_mfd_context)
1872 {
1873     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1874     VAPictureParameterBufferVC1 *pic_param;
1875     struct i965_driver_data *i965 = i965_driver_data(ctx);
1876     struct object_surface *obj_surface;
1877     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1878
1879     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1880     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1881
1882     obj_surface = SURFACE(decode_state->current_render_target);
1883
1884     if (obj_surface && obj_surface->private_data) {
1885         dmv_write_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1886     }
1887
1888     obj_surface = SURFACE(pic_param->backward_reference_picture);
1889
1890     if (obj_surface && obj_surface->private_data) {
1891         dmv_read_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1892     }
1893
1894     BEGIN_BCS_BATCH(batch, 3);
1895     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
1896
1897     if (dmv_write_buffer)
1898         OUT_BCS_RELOC(batch, dmv_write_buffer,
1899                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1900                       0);
1901     else
1902         OUT_BCS_BATCH(batch, 0);
1903
1904     if (dmv_read_buffer)
1905         OUT_BCS_RELOC(batch, dmv_read_buffer,
1906                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1907                       0);
1908     else
1909         OUT_BCS_BATCH(batch, 0);
1910                   
1911     ADVANCE_BCS_BATCH(batch);
1912 }
1913
1914 static int
1915 gen6_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1916 {
1917     int out_slice_data_bit_offset;
1918     int slice_header_size = in_slice_data_bit_offset / 8;
1919     int i, j;
1920
1921     if (profile != 3)
1922         out_slice_data_bit_offset = in_slice_data_bit_offset;
1923     else {
1924         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1925             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1926                 i++, j += 2;
1927             }
1928         }
1929
1930         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1931     }
1932
1933     return out_slice_data_bit_offset;
1934 }
1935
1936 static void
1937 gen6_mfd_vc1_bsd_object(VADriverContextP ctx,
1938                         VAPictureParameterBufferVC1 *pic_param,
1939                         VASliceParameterBufferVC1 *slice_param,
1940                         VASliceParameterBufferVC1 *next_slice_param,
1941                         dri_bo *slice_data_bo,
1942                         struct gen6_mfd_context *gen6_mfd_context)
1943 {
1944     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1945     int next_slice_start_vert_pos;
1946     int macroblock_offset;
1947     uint8_t *slice_data = NULL;
1948
1949     dri_bo_map(slice_data_bo, 0);
1950     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1951     macroblock_offset = gen6_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1952                                                                slice_param->macroblock_offset,
1953                                                                pic_param->sequence_fields.bits.profile);
1954     dri_bo_unmap(slice_data_bo);
1955
1956     if (next_slice_param)
1957         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1958     else
1959         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1960
1961     BEGIN_BCS_BATCH(batch, 4);
1962     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (4 - 2));
1963     OUT_BCS_BATCH(batch, 
1964                   slice_param->slice_data_size - (macroblock_offset >> 3));
1965     OUT_BCS_BATCH(batch, 
1966                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1967     OUT_BCS_BATCH(batch,
1968                   slice_param->slice_vertical_position << 24 |
1969                   next_slice_start_vert_pos << 16 |
1970                   (macroblock_offset & 0x7));
1971     ADVANCE_BCS_BATCH(batch);
1972 }
1973
1974 static void
1975 gen6_mfd_vc1_decode_picture(VADriverContextP ctx,
1976                             struct decode_state *decode_state,
1977                             struct gen6_mfd_context *gen6_mfd_context)
1978 {
1979     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1980     VAPictureParameterBufferVC1 *pic_param;
1981     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1982     dri_bo *slice_data_bo;
1983     int i, j;
1984
1985     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1986     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1987
1988     gen6_mfd_vc1_decode_init(ctx, decode_state, gen6_mfd_context);
1989     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1990     intel_batchbuffer_emit_mi_flush(batch);
1991     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1992     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1993     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1994     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1995     gen6_mfd_vc1_pic_state(ctx, decode_state, gen6_mfd_context);
1996     gen6_mfd_vc1_pred_pipe_state(ctx, decode_state, gen6_mfd_context);
1997     gen6_mfd_vc1_directmode_state(ctx, decode_state, gen6_mfd_context);
1998
1999     for (j = 0; j < decode_state->num_slice_params; j++) {
2000         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2001         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
2002         slice_data_bo = decode_state->slice_datas[j]->bo;
2003         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen6_mfd_context);
2004
2005         if (j == decode_state->num_slice_params - 1)
2006             next_slice_group_param = NULL;
2007         else
2008             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
2009
2010         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2011             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2012
2013             if (i < decode_state->slice_params[j]->num_elements - 1)
2014                 next_slice_param = slice_param + 1;
2015             else
2016                 next_slice_param = next_slice_group_param;
2017
2018             gen6_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen6_mfd_context);
2019             slice_param++;
2020         }
2021     }
2022
2023     intel_batchbuffer_end_atomic(batch);
2024     intel_batchbuffer_flush(batch);
2025 }
2026
2027 static void 
2028 gen6_mfd_decode_picture(VADriverContextP ctx, 
2029                         VAProfile profile, 
2030                         union codec_state *codec_state,
2031                         struct hw_context *hw_context)
2032
2033 {
2034     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
2035     struct decode_state *decode_state = &codec_state->dec;
2036
2037     assert(gen6_mfd_context);
2038
2039     switch (profile) {
2040     case VAProfileMPEG2Simple:
2041     case VAProfileMPEG2Main:
2042         gen6_mfd_mpeg2_decode_picture(ctx, decode_state, gen6_mfd_context);
2043         break;
2044         
2045     case VAProfileH264Baseline:
2046     case VAProfileH264Main:
2047     case VAProfileH264High:
2048         gen6_mfd_avc_decode_picture(ctx, decode_state, gen6_mfd_context);
2049         break;
2050
2051     case VAProfileVC1Simple:
2052     case VAProfileVC1Main:
2053     case VAProfileVC1Advanced:
2054         gen6_mfd_vc1_decode_picture(ctx, decode_state, gen6_mfd_context);
2055         break;
2056
2057     default:
2058         assert(0);
2059         break;
2060     }
2061 }
2062
2063 static void
2064 gen6_mfd_context_destroy(void *hw_context)
2065 {
2066     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
2067
2068     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
2069     gen6_mfd_context->post_deblocking_output.bo = NULL;
2070
2071     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
2072     gen6_mfd_context->pre_deblocking_output.bo = NULL;
2073
2074     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
2075     gen6_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2076
2077     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2078     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2079
2080     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2081     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2082
2083     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
2084     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2085
2086     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
2087     gen6_mfd_context->bitplane_read_buffer.bo = NULL;
2088
2089     intel_batchbuffer_free(gen6_mfd_context->base.batch);
2090     free(gen6_mfd_context);
2091 }
2092
2093 struct hw_context *
2094 gen6_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
2095 {
2096     struct intel_driver_data *intel = intel_driver_data(ctx);
2097     struct gen6_mfd_context *gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context));
2098     int i;
2099
2100     gen6_mfd_context->base.destroy = gen6_mfd_context_destroy;
2101     gen6_mfd_context->base.run = gen6_mfd_decode_picture;
2102     gen6_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
2103
2104     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
2105         gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
2106         gen6_mfd_context->reference_surface[i].frame_store_id = -1;
2107     }
2108     
2109     return (struct hw_context *)gen6_mfd_context;
2110 }