Merge branch 'master' into vaapi-ext
[profile/ivi/vaapi-intel-driver.git] / src / gen6_mfd.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include <va/va_backend.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38
39 #include "i965_defines.h"
40 #include "i965_drv_video.h"
41
42 #include "gen6_mfd.h"
43
44 #define DMV_SIZE        0x88000 /* 557056 bytes for a frame */
45
46 static const uint32_t zigzag_direct[64] = {
47     0,   1,  8, 16,  9,  2,  3, 10,
48     17, 24, 32, 25, 18, 11,  4,  5,
49     12, 19, 26, 33, 40, 48, 41, 34,
50     27, 20, 13,  6,  7, 14, 21, 28,
51     35, 42, 49, 56, 57, 50, 43, 36,
52     29, 22, 15, 23, 30, 37, 44, 51,
53     58, 59, 52, 45, 38, 31, 39, 46,
54     53, 60, 61, 54, 47, 55, 62, 63
55 };
56
57 static void
58 gen6_mfd_avc_frame_store_index(VADriverContextP ctx,
59                                VAPictureParameterBufferH264 *pic_param,
60                                struct gen6_mfd_context *gen6_mfd_context)
61 {
62     struct i965_driver_data *i965 = i965_driver_data(ctx);
63     int i, j;
64
65     assert(ARRAY_ELEMS(gen6_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
66
67     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
68         int found = 0;
69
70         if (gen6_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
71             continue;
72
73         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
74             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
75             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
76                 continue;
77
78             if (gen6_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
79                 found = 1;
80                 break;
81             }
82         }
83
84         if (!found) {
85             struct object_surface *obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
86             obj_surface->flags &= ~SURFACE_REFERENCED;
87
88             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
89                 dri_bo_unreference(obj_surface->bo);
90                 obj_surface->bo = NULL;
91                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
92             }
93
94             if (obj_surface->free_private_data)
95                 obj_surface->free_private_data(&obj_surface->private_data);
96
97             gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
98             gen6_mfd_context->reference_surface[i].frame_store_id = -1;
99         }
100     }
101
102     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
103         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
104         int found = 0;
105
106         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
107             continue;
108
109         for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
110             if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
111                 continue;
112             
113             if (gen6_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
114                 found = 1;
115                 break;
116             }
117         }
118
119         if (!found) {
120             int frame_idx;
121             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
122             
123             assert(obj_surface);
124             i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'));
125
126             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) {
127                 for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
128                     if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
129                         continue;
130
131                     if (gen6_mfd_context->reference_surface[j].frame_store_id == frame_idx)
132                         break;
133                 }
134
135                 if (j == ARRAY_ELEMS(gen6_mfd_context->reference_surface))
136                     break;
137             }
138
139             assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface));
140
141             for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
142                 if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
143                     gen6_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
144                     gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
145                     break;
146                 }
147             }
148         }
149     }
150
151     /* sort */
152     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface) - 1; i++) {
153         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
154             gen6_mfd_context->reference_surface[i].frame_store_id == i)
155             continue;
156
157         for (j = i + 1; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
158             if (gen6_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
159                 gen6_mfd_context->reference_surface[j].frame_store_id == i) {
160                 VASurfaceID id = gen6_mfd_context->reference_surface[i].surface_id;
161                 int frame_idx = gen6_mfd_context->reference_surface[i].frame_store_id;
162
163                 gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[j].surface_id;
164                 gen6_mfd_context->reference_surface[i].frame_store_id = gen6_mfd_context->reference_surface[j].frame_store_id;
165                 gen6_mfd_context->reference_surface[j].surface_id = id;
166                 gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
167                 break;
168             }
169         }
170     }
171 }
172
173 static void 
174 gen6_mfd_free_avc_surface(void **data)
175 {
176     struct gen6_avc_surface *gen6_avc_surface = *data;
177
178     if (!gen6_avc_surface)
179         return;
180
181     dri_bo_unreference(gen6_avc_surface->dmv_top);
182     gen6_avc_surface->dmv_top = NULL;
183     dri_bo_unreference(gen6_avc_surface->dmv_bottom);
184     gen6_avc_surface->dmv_bottom = NULL;
185
186     free(gen6_avc_surface);
187     *data = NULL;
188 }
189
190 static void
191 gen6_mfd_init_avc_surface(VADriverContextP ctx, 
192                           VAPictureParameterBufferH264 *pic_param,
193                           struct object_surface *obj_surface)
194 {
195     struct i965_driver_data *i965 = i965_driver_data(ctx);
196     struct gen6_avc_surface *gen6_avc_surface = obj_surface->private_data;
197
198     obj_surface->free_private_data = gen6_mfd_free_avc_surface;
199
200     if (!gen6_avc_surface) {
201         gen6_avc_surface = calloc(sizeof(struct gen6_avc_surface), 1);
202         assert((obj_surface->size & 0x3f) == 0);
203         obj_surface->private_data = gen6_avc_surface;
204     }
205
206     gen6_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
207                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
208
209     if (gen6_avc_surface->dmv_top == NULL) {
210         gen6_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
211                                                  "direct mv w/r buffer",
212                                                  DMV_SIZE,
213                                                  0x1000);
214     }
215
216     if (gen6_avc_surface->dmv_bottom_flag &&
217         gen6_avc_surface->dmv_bottom == NULL) {
218         gen6_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
219                                                     "direct mv w/r buffer",
220                                                     DMV_SIZE,
221                                                     0x1000);
222     }
223 }
224
225 static void
226 gen6_mfd_pipe_mode_select(VADriverContextP ctx,
227                           struct decode_state *decode_state,
228                           int standard_select,
229                           struct gen6_mfd_context *gen6_mfd_context)
230 {
231     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
232
233     assert(standard_select == MFX_FORMAT_MPEG2 ||
234            standard_select == MFX_FORMAT_AVC ||
235            standard_select == MFX_FORMAT_VC1);
236
237     BEGIN_BCS_BATCH(batch, 4);
238     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
239     OUT_BCS_BATCH(batch,
240                   (MFD_MODE_VLD << 16) | /* VLD mode */
241                   (0 << 10) | /* disable Stream-Out */
242                   (gen6_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
243                   (gen6_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
244                   (0 << 7)  | /* disable TLB prefectch */
245                   (0 << 5)  | /* not in stitch mode */
246                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
247                   (standard_select << 0));
248     OUT_BCS_BATCH(batch,
249                   (0 << 20) | /* round flag in PB slice */
250                   (0 << 19) | /* round flag in Intra8x8 */
251                   (0 << 7)  | /* expand NOA bus flag */
252                   (1 << 6)  | /* must be 1 */
253                   (0 << 5)  | /* disable clock gating for NOA */
254                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
255                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
256                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
257                   (0 << 1)  | /* AVC long field motion vector */
258                   (1 << 0));  /* always calculate AVC ILDB boundary strength */
259     OUT_BCS_BATCH(batch, 0);
260     ADVANCE_BCS_BATCH(batch);
261 }
262
263 static void
264 gen6_mfd_surface_state(VADriverContextP ctx,
265                        struct decode_state *decode_state,
266                        int standard_select,
267                        struct gen6_mfd_context *gen6_mfd_context)
268 {
269     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
270     struct i965_driver_data *i965 = i965_driver_data(ctx);
271     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
272     assert(obj_surface);
273     
274     BEGIN_BCS_BATCH(batch, 6);
275     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
276     OUT_BCS_BATCH(batch, 0);
277     OUT_BCS_BATCH(batch,
278                   ((obj_surface->orig_height - 1) << 19) |
279                   ((obj_surface->orig_width - 1) << 6));
280     OUT_BCS_BATCH(batch,
281                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
282                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
283                   (0 << 22) | /* surface object control state, FIXME??? */
284                   ((obj_surface->width - 1) << 3) | /* pitch */
285                   (0 << 2)  | /* must be 0 for interleave U/V */
286                   (1 << 1)  | /* must be y-tiled */
287                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, FIXME: must be 1 ??? */
288     OUT_BCS_BATCH(batch,
289                   (0 << 16) | /* must be 0 for interleave U/V */
290                   (obj_surface->height)); /* y offset for U(cb) */
291     OUT_BCS_BATCH(batch, 0);
292     ADVANCE_BCS_BATCH(batch);
293 }
294
295 static void
296 gen6_mfd_pipe_buf_addr_state(VADriverContextP ctx,
297                              struct decode_state *decode_state,
298                              int standard_select,
299                              struct gen6_mfd_context *gen6_mfd_context)
300 {
301     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
302     struct i965_driver_data *i965 = i965_driver_data(ctx);
303     int i;
304
305     BEGIN_BCS_BATCH(batch, 24);
306     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
307     if (gen6_mfd_context->pre_deblocking_output.valid)
308         OUT_BCS_RELOC(batch, gen6_mfd_context->pre_deblocking_output.bo,
309                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
310                       0);
311     else
312         OUT_BCS_BATCH(batch, 0);
313
314     if (gen6_mfd_context->post_deblocking_output.valid)
315         OUT_BCS_RELOC(batch, gen6_mfd_context->post_deblocking_output.bo,
316                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
317                       0);
318     else
319         OUT_BCS_BATCH(batch, 0);
320
321     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
322     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
323
324     if (gen6_mfd_context->intra_row_store_scratch_buffer.valid)
325         OUT_BCS_RELOC(batch, gen6_mfd_context->intra_row_store_scratch_buffer.bo,
326                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
327                       0);
328     else
329         OUT_BCS_BATCH(batch, 0);
330
331     if (gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
332         OUT_BCS_RELOC(batch, gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
333                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
334                       0);
335     else
336         OUT_BCS_BATCH(batch, 0);
337
338     /* DW 7..22 */
339     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
340         struct object_surface *obj_surface;
341
342         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
343             obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
344             assert(obj_surface && obj_surface->bo);
345
346             OUT_BCS_RELOC(batch, obj_surface->bo,
347                           I915_GEM_DOMAIN_INSTRUCTION, 0,
348                           0);
349         } else {
350             OUT_BCS_BATCH(batch, 0);
351         }
352     }
353
354     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
355     ADVANCE_BCS_BATCH(batch);
356 }
357
358 static void
359 gen6_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
360                                  dri_bo *slice_data_bo,
361                                  int standard_select,
362                                  struct gen6_mfd_context *gen6_mfd_context)
363 {
364     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
365
366     BEGIN_BCS_BATCH(batch, 11);
367     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
368     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
369     OUT_BCS_BATCH(batch, 0);
370     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
371     OUT_BCS_BATCH(batch, 0);
372     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
373     OUT_BCS_BATCH(batch, 0);
374     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
375     OUT_BCS_BATCH(batch, 0);
376     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
377     OUT_BCS_BATCH(batch, 0);
378     ADVANCE_BCS_BATCH(batch);
379 }
380
381 static void
382 gen6_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
383                                  struct decode_state *decode_state,
384                                  int standard_select,
385                                  struct gen6_mfd_context *gen6_mfd_context)
386 {
387     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
388
389     BEGIN_BCS_BATCH(batch, 4);
390     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
391
392     if (gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
393         OUT_BCS_RELOC(batch, gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
394                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
395                       0);
396     else
397         OUT_BCS_BATCH(batch, 0);
398
399     if (gen6_mfd_context->mpr_row_store_scratch_buffer.valid)
400         OUT_BCS_RELOC(batch, gen6_mfd_context->mpr_row_store_scratch_buffer.bo,
401                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
402                       0);
403     else
404         OUT_BCS_BATCH(batch, 0);
405
406     if (gen6_mfd_context->bitplane_read_buffer.valid)
407         OUT_BCS_RELOC(batch, gen6_mfd_context->bitplane_read_buffer.bo,
408                       I915_GEM_DOMAIN_INSTRUCTION, 0,
409                       0);
410     else
411         OUT_BCS_BATCH(batch, 0);
412
413     ADVANCE_BCS_BATCH(batch);
414 }
415
416 #if 0
417 static void
418 gen6_mfd_aes_state(VADriverContextP ctx,
419                    struct decode_state *decode_state,
420                    int standard_select)
421 {
422     /* FIXME */
423 }
424
425 static void
426 gen6_mfd_wait(VADriverContextP ctx,
427               struct decode_state *decode_state,
428               int standard_select,
429               struct gen6_mfd_context *gen6_mfd_context)
430 {
431     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
432
433     BEGIN_BCS_BATCH(batch, 1);
434     OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8));
435     ADVANCE_BCS_BATCH(batch);
436 }
437 #endif
438
439 static void
440 gen6_mfd_avc_img_state(VADriverContextP ctx,
441                        struct decode_state *decode_state,
442                        struct gen6_mfd_context *gen6_mfd_context)
443 {
444     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
445     int qm_present_flag;
446     int img_struct;
447     int mbaff_frame_flag;
448     unsigned int width_in_mbs, height_in_mbs;
449     VAPictureParameterBufferH264 *pic_param;
450
451     assert(decode_state->pic_param && decode_state->pic_param->buffer);
452     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
453     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
454
455     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
456         qm_present_flag = 1;
457     else
458         qm_present_flag = 0; /* built-in QM matrices */
459
460     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
461         img_struct = 1;
462     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
463         img_struct = 3;
464     else
465         img_struct = 0;
466
467     if ((img_struct & 0x1) == 0x1) {
468         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
469     } else {
470         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
471     }
472
473     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
474         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
475         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
476     } else {
477         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
478     }
479
480     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
481                         !pic_param->pic_fields.bits.field_pic_flag);
482
483     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
484     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
485     assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */
486
487     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
488     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
489            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
490     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
491
492     BEGIN_BCS_BATCH(batch, 13);
493     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
494     OUT_BCS_BATCH(batch, 
495                   ((width_in_mbs * height_in_mbs) & 0x7fff));
496     OUT_BCS_BATCH(batch, 
497                   (height_in_mbs << 16) | 
498                   (width_in_mbs << 0));
499     OUT_BCS_BATCH(batch, 
500                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
501                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
502                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
503                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
504                   (1 << 12) | /* always 1, hardware requirement */
505                   (qm_present_flag << 10) |
506                   (img_struct << 8) |
507                   (16 << 0));
508     OUT_BCS_BATCH(batch,
509                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
510                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
511                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
512                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
513                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
514                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
515                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
516                   (mbaff_frame_flag << 1) |
517                   (pic_param->pic_fields.bits.field_pic_flag << 0));
518     OUT_BCS_BATCH(batch, 0);
519     OUT_BCS_BATCH(batch, 0);
520     OUT_BCS_BATCH(batch, 0);
521     OUT_BCS_BATCH(batch, 0);
522     OUT_BCS_BATCH(batch, 0);
523     OUT_BCS_BATCH(batch, 0);
524     OUT_BCS_BATCH(batch, 0);
525     OUT_BCS_BATCH(batch, 0);
526     ADVANCE_BCS_BATCH(batch);
527 }
528
529 static void
530 gen6_mfd_avc_qm_state(VADriverContextP ctx,
531                       struct decode_state *decode_state,
532                       struct gen6_mfd_context *gen6_mfd_context)
533 {
534     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
535     int cmd_len;
536     VAIQMatrixBufferH264 *iq_matrix;
537     VAPictureParameterBufferH264 *pic_param;
538
539     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
540         return;
541
542     iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
543
544     assert(decode_state->pic_param && decode_state->pic_param->buffer);
545     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
546
547     cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */
548
549     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
550         cmd_len += 2 * 16; /* load two 8x8 scaling matrices */
551
552     BEGIN_BCS_BATCH(batch, cmd_len);
553     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | (cmd_len - 2));
554
555     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
556         OUT_BCS_BATCH(batch, 
557                       (0x0  << 8) | /* don't use default built-in matrices */
558                       (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */
559     else
560         OUT_BCS_BATCH(batch, 
561                       (0x0  << 8) | /* don't use default built-in matrices */
562                       (0x3f << 0)); /* six 4x4 scaling matrices */
563
564     intel_batchbuffer_data(batch, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4);
565
566     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
567         intel_batchbuffer_data(batch, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4);
568
569     ADVANCE_BCS_BATCH(batch);
570 }
571
572 static void
573 gen6_mfd_avc_directmode_state(VADriverContextP ctx,
574                               VAPictureParameterBufferH264 *pic_param,
575                               VASliceParameterBufferH264 *slice_param,
576                               struct gen6_mfd_context *gen6_mfd_context)
577 {
578     struct i965_driver_data *i965 = i965_driver_data(ctx);
579     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
580     struct object_surface *obj_surface;
581     struct gen6_avc_surface *gen6_avc_surface;
582     VAPictureH264 *va_pic;
583     int i, j;
584
585     BEGIN_BCS_BATCH(batch, 69);
586     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
587
588     /* reference surfaces 0..15 */
589     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
590         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
591             obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
592             assert(obj_surface);
593             gen6_avc_surface = obj_surface->private_data;
594
595             if (gen6_avc_surface == NULL) {
596                 OUT_BCS_BATCH(batch, 0);
597                 OUT_BCS_BATCH(batch, 0);
598             } else {
599                 OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
600                               I915_GEM_DOMAIN_INSTRUCTION, 0,
601                               0);
602
603                 if (gen6_avc_surface->dmv_bottom_flag == 1)
604                     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
605                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
606                                   0);
607                 else
608                     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
609                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
610                                   0);
611             }
612         } else {
613             OUT_BCS_BATCH(batch, 0);
614             OUT_BCS_BATCH(batch, 0);
615         }
616     }
617
618     /* the current decoding frame/field */
619     va_pic = &pic_param->CurrPic;
620     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
621     obj_surface = SURFACE(va_pic->picture_id);
622     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
623     gen6_avc_surface = obj_surface->private_data;
624
625     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
626                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
627                   0);
628
629     if (gen6_avc_surface->dmv_bottom_flag == 1)
630         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
631                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
632                       0);
633     else
634         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
635                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
636                       0);
637
638     /* POC List */
639     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
640         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
641             int found = 0;
642             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
643                 va_pic = &pic_param->ReferenceFrames[j];
644                 
645                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
646                     continue;
647
648                 if (va_pic->picture_id == gen6_mfd_context->reference_surface[i].surface_id) {
649                     found = 1;
650                     break;
651                 }
652             }
653
654             assert(found == 1);
655             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
656             
657             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
658             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
659         } else {
660             OUT_BCS_BATCH(batch, 0);
661             OUT_BCS_BATCH(batch, 0);
662         }
663     }
664
665     va_pic = &pic_param->CurrPic;
666     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
667     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
668
669     ADVANCE_BCS_BATCH(batch);
670 }
671
672 static void
673 gen6_mfd_avc_slice_state(VADriverContextP ctx,
674                          VAPictureParameterBufferH264 *pic_param,
675                          VASliceParameterBufferH264 *slice_param,
676                          VASliceParameterBufferH264 *next_slice_param,
677                          struct gen6_mfd_context *gen6_mfd_context)
678 {
679     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
680     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
681     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
682     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
683     int num_ref_idx_l0, num_ref_idx_l1;
684     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
685                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
686     int weighted_pred_idc = 0;
687     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
688     int slice_type;
689
690     if (slice_param->slice_type == SLICE_TYPE_I ||
691         slice_param->slice_type == SLICE_TYPE_SI) {
692         slice_type = SLICE_TYPE_I;
693     } else if (slice_param->slice_type == SLICE_TYPE_P ||
694                slice_param->slice_type == SLICE_TYPE_SP) {
695         slice_type = SLICE_TYPE_P;
696     } else { 
697         assert(slice_param->slice_type == SLICE_TYPE_B);
698         slice_type = SLICE_TYPE_B;
699     }
700
701     if (slice_type == SLICE_TYPE_I) {
702         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
703         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
704         num_ref_idx_l0 = 0;
705         num_ref_idx_l1 = 0;
706     } else if (slice_type == SLICE_TYPE_P) {
707         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
708         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
709         num_ref_idx_l1 = 0;
710         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_pred_flag == 1);
711     } else {
712         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
713         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
714         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_bipred_idc == 1);
715     }
716
717     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
718     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
719     slice_ver_pos = first_mb_in_slice / width_in_mbs;
720
721     if (next_slice_param) {
722         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
723         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
724         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
725     } else {
726         next_slice_hor_pos = 0;
727         next_slice_ver_pos = height_in_mbs;
728     }
729
730     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
731     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
732     OUT_BCS_BATCH(batch, slice_type);
733     OUT_BCS_BATCH(batch, 
734                   (num_ref_idx_l1 << 24) |
735                   (num_ref_idx_l0 << 16) |
736                   (slice_param->chroma_log2_weight_denom << 8) |
737                   (slice_param->luma_log2_weight_denom << 0));
738     OUT_BCS_BATCH(batch, 
739                   (weighted_pred_idc << 30) |
740                   (slice_param->direct_spatial_mv_pred_flag << 29) |
741                   (slice_param->disable_deblocking_filter_idc << 27) |
742                   (slice_param->cabac_init_idc << 24) |
743                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
744                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
745                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
746     OUT_BCS_BATCH(batch, 
747                   (slice_ver_pos << 24) |
748                   (slice_hor_pos << 16) | 
749                   (first_mb_in_slice << 0));
750     OUT_BCS_BATCH(batch,
751                   (next_slice_ver_pos << 16) |
752                   (next_slice_hor_pos << 0));
753     OUT_BCS_BATCH(batch, 
754                   (next_slice_param == NULL) << 19); /* last slice flag */
755     OUT_BCS_BATCH(batch, 0);
756     OUT_BCS_BATCH(batch, 0);
757     OUT_BCS_BATCH(batch, 0);
758     OUT_BCS_BATCH(batch, 0);
759     ADVANCE_BCS_BATCH(batch);
760 }
761
762 static void
763 gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx,
764                                  VAPictureParameterBufferH264 *pic_param,
765                                  struct gen6_mfd_context *gen6_mfd_context)
766 {
767     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
768     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
769     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
770
771     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
772     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
773     OUT_BCS_BATCH(batch, 0);
774     OUT_BCS_BATCH(batch, 0);
775     OUT_BCS_BATCH(batch, 0);
776     OUT_BCS_BATCH(batch,
777                   height_in_mbs << 24 |
778                   width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
779     OUT_BCS_BATCH(batch, 0);
780     OUT_BCS_BATCH(batch, 0);
781     OUT_BCS_BATCH(batch, 0);
782     OUT_BCS_BATCH(batch, 0);
783     OUT_BCS_BATCH(batch, 0);
784     OUT_BCS_BATCH(batch, 0);
785     ADVANCE_BCS_BATCH(batch);
786 }
787
788 static void
789 gen6_mfd_avc_ref_idx_state(VADriverContextP ctx,
790                            VAPictureParameterBufferH264 *pic_param,
791                            VASliceParameterBufferH264 *slice_param,
792                            struct gen6_mfd_context *gen6_mfd_context)
793 {
794     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
795     int i, j, num_ref_list;
796     struct {
797         unsigned char bottom_idc:1;
798         unsigned char frame_store_index:4;
799         unsigned char field_picture:1;
800         unsigned char long_term:1;
801         unsigned char non_exist:1;
802     } refs[32];
803
804     if (slice_param->slice_type == SLICE_TYPE_I ||
805         slice_param->slice_type == SLICE_TYPE_SI)
806         return;
807
808     if (slice_param->slice_type == SLICE_TYPE_P ||
809         slice_param->slice_type == SLICE_TYPE_SP) {
810         num_ref_list = 1;
811     } else {
812         num_ref_list = 2;
813     }
814
815     for (i = 0; i < num_ref_list; i++) {
816         VAPictureH264 *va_pic;
817
818         if (i == 0) {
819             va_pic = slice_param->RefPicList0;
820         } else {
821             va_pic = slice_param->RefPicList1;
822         }
823
824         BEGIN_BCS_BATCH(batch, 10);
825         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | (10 - 2));
826         OUT_BCS_BATCH(batch, i);
827
828         for (j = 0; j < 32; j++) {
829             if (va_pic->flags & VA_PICTURE_H264_INVALID) {
830                 refs[j].non_exist = 1;
831                 refs[j].long_term = 1;
832                 refs[j].field_picture = 1;
833                 refs[j].frame_store_index = 0xf;
834                 refs[j].bottom_idc = 1;
835             } else {
836                 int frame_idx;
837                 
838                 for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) {
839                     if (gen6_mfd_context->reference_surface[frame_idx].surface_id != VA_INVALID_ID &&
840                         va_pic->picture_id == gen6_mfd_context->reference_surface[frame_idx].surface_id) {
841                         assert(frame_idx == gen6_mfd_context->reference_surface[frame_idx].frame_store_id);
842                         break;
843                     }
844                 }
845
846                 assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface));
847                 
848                 refs[j].non_exist = 0;
849                 refs[j].long_term = !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
850                 refs[j].field_picture = !!(va_pic->flags & 
851                                            (VA_PICTURE_H264_TOP_FIELD | 
852                                             VA_PICTURE_H264_BOTTOM_FIELD));
853                 refs[j].frame_store_index = frame_idx;
854                 refs[j].bottom_idc = !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
855             }
856
857             va_pic++;
858         }
859         
860         intel_batchbuffer_data(batch, refs, sizeof(refs));
861         ADVANCE_BCS_BATCH(batch);
862     }
863 }
864
865 static void
866 gen6_mfd_avc_weightoffset_state(VADriverContextP ctx,
867                                 VAPictureParameterBufferH264 *pic_param,
868                                 VASliceParameterBufferH264 *slice_param,
869                                 struct gen6_mfd_context *gen6_mfd_context)
870 {
871     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
872     int i, j, num_weight_offset_table = 0;
873     short weightoffsets[32 * 6];
874
875     if ((slice_param->slice_type == SLICE_TYPE_P ||
876          slice_param->slice_type == SLICE_TYPE_SP) &&
877         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
878         num_weight_offset_table = 1;
879     }
880     
881     if ((slice_param->slice_type == SLICE_TYPE_B) &&
882         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
883         num_weight_offset_table = 2;
884     }
885
886     for (i = 0; i < num_weight_offset_table; i++) {
887         BEGIN_BCS_BATCH(batch, 98);
888         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
889         OUT_BCS_BATCH(batch, i);
890
891         if (i == 0) {
892             for (j = 0; j < 32; j++) {
893                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
894                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
895                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
896                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
897                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
898                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
899             }
900         } else {
901             for (j = 0; j < 32; j++) {
902                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
903                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
904                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
905                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
906                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
907                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
908             }
909         }
910
911         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
912         ADVANCE_BCS_BATCH(batch);
913     }
914 }
915
916 static int
917 gen6_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
918 {
919     int out_slice_data_bit_offset;
920     int slice_header_size = in_slice_data_bit_offset / 8;
921     int i, j;
922
923     for (i = 0, j = 0; i < slice_header_size; i++, j++) {
924         if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
925             i++, j += 2;
926         }
927     }
928
929     out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
930
931     if (mode_flag == ENTROPY_CABAC)
932         out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
933
934     return out_slice_data_bit_offset;
935 }
936
937 static void
938 gen6_mfd_avc_bsd_object(VADriverContextP ctx,
939                         VAPictureParameterBufferH264 *pic_param,
940                         VASliceParameterBufferH264 *slice_param,
941                         dri_bo *slice_data_bo,
942                         struct gen6_mfd_context *gen6_mfd_context)
943 {
944     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
945     int slice_data_bit_offset;
946     uint8_t *slice_data = NULL;
947
948     dri_bo_map(slice_data_bo, 0);
949     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
950     slice_data_bit_offset = gen6_mfd_avc_get_slice_bit_offset(slice_data,
951                                                               pic_param->pic_fields.bits.entropy_coding_mode_flag,
952                                                               slice_param->slice_data_bit_offset);
953     dri_bo_unmap(slice_data_bo);
954
955     BEGIN_BCS_BATCH(batch, 6);
956     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
957     OUT_BCS_BATCH(batch, 
958                   ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
959     OUT_BCS_BATCH(batch, slice_param->slice_data_offset + (slice_data_bit_offset >> 3));
960     OUT_BCS_BATCH(batch,
961                   (0 << 31) |
962                   (0 << 14) |
963                   (0 << 12) |
964                   (0 << 10) |
965                   (0 << 8));
966     OUT_BCS_BATCH(batch,
967                   (0 << 16) |
968                   (0 << 6)  |
969                   ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
970     OUT_BCS_BATCH(batch, 0);
971     ADVANCE_BCS_BATCH(batch);
972 }
973
974 static void
975 gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx,
976                                       VAPictureParameterBufferH264 *pic_param,
977                                       struct gen6_mfd_context *gen6_mfd_context)
978 {
979     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
980
981     BEGIN_BCS_BATCH(batch, 6);
982     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
983     OUT_BCS_BATCH(batch, 0);
984     OUT_BCS_BATCH(batch, 0);
985     OUT_BCS_BATCH(batch, 0);
986     OUT_BCS_BATCH(batch, 0);
987     OUT_BCS_BATCH(batch, 0);
988     ADVANCE_BCS_BATCH(batch);
989 }
990
991 static void
992 gen6_mfd_avc_phantom_slice(VADriverContextP ctx,
993                            VAPictureParameterBufferH264 *pic_param,
994                            struct gen6_mfd_context *gen6_mfd_context)
995 {
996     gen6_mfd_avc_phantom_slice_state(ctx, pic_param, gen6_mfd_context);
997     gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param, gen6_mfd_context);
998 }
999
1000 static void
1001 gen6_mfd_avc_decode_init(VADriverContextP ctx,
1002                          struct decode_state *decode_state,
1003                          struct gen6_mfd_context *gen6_mfd_context)
1004 {
1005     VAPictureParameterBufferH264 *pic_param;
1006     VASliceParameterBufferH264 *slice_param;
1007     VAPictureH264 *va_pic;
1008     struct i965_driver_data *i965 = i965_driver_data(ctx);
1009     struct object_surface *obj_surface;
1010     dri_bo *bo;
1011     int i, j, enable_avc_ildb = 0;
1012
1013     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
1014         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1015         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1016
1017         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1018             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1019             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1020                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1021                    (slice_param->slice_type == SLICE_TYPE_P) ||
1022                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1023                    (slice_param->slice_type == SLICE_TYPE_B));
1024
1025             if (slice_param->disable_deblocking_filter_idc != 1) {
1026                 enable_avc_ildb = 1;
1027                 break;
1028             }
1029
1030             slice_param++;
1031         }
1032     }
1033
1034     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1035     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1036     gen6_mfd_avc_frame_store_index(ctx, pic_param, gen6_mfd_context);
1037
1038     /* Current decoded picture */
1039     va_pic = &pic_param->CurrPic;
1040     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
1041     obj_surface = SURFACE(va_pic->picture_id);
1042     assert(obj_surface);
1043     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
1044     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
1045     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
1046     gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
1047
1048     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
1049     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1050     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
1051     gen6_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
1052
1053     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1054     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1055     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1056     gen6_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
1057
1058     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
1059     bo = dri_bo_alloc(i965->intel.bufmgr,
1060                       "intra row store",
1061                       128 * 64,
1062                       0x1000);
1063     assert(bo);
1064     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1065     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1066
1067     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1068     bo = dri_bo_alloc(i965->intel.bufmgr,
1069                       "deblocking filter row store",
1070                       30720, /* 4 * 120 * 64 */
1071                       0x1000);
1072     assert(bo);
1073     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1074     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1075
1076     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1077     bo = dri_bo_alloc(i965->intel.bufmgr,
1078                       "bsd mpc row store",
1079                       11520, /* 1.5 * 120 * 64 */
1080                       0x1000);
1081     assert(bo);
1082     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1083     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1084
1085     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
1086     bo = dri_bo_alloc(i965->intel.bufmgr,
1087                       "mpr row store",
1088                       7680, /* 1. 0 * 120 * 64 */
1089                       0x1000);
1090     assert(bo);
1091     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
1092     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
1093
1094     gen6_mfd_context->bitplane_read_buffer.valid = 0;
1095 }
1096
1097 static void
1098 gen6_mfd_avc_decode_picture(VADriverContextP ctx,
1099                             struct decode_state *decode_state,
1100                             struct gen6_mfd_context *gen6_mfd_context)
1101 {
1102     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1103     VAPictureParameterBufferH264 *pic_param;
1104     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
1105     dri_bo *slice_data_bo;
1106     int i, j;
1107
1108     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1109     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1110     gen6_mfd_avc_decode_init(ctx, decode_state, gen6_mfd_context);
1111
1112     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1113     intel_batchbuffer_emit_mi_flush(batch);
1114     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1115     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1116     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1117     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
1118     gen6_mfd_avc_img_state(ctx, decode_state, gen6_mfd_context);
1119     gen6_mfd_avc_qm_state(ctx, decode_state, gen6_mfd_context);
1120
1121     for (j = 0; j < decode_state->num_slice_params; j++) {
1122         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1123         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1124         slice_data_bo = decode_state->slice_datas[j]->bo;
1125         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen6_mfd_context);
1126
1127         if (j == decode_state->num_slice_params - 1)
1128             next_slice_group_param = NULL;
1129         else
1130             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1131
1132         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1133             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1134             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1135                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1136                    (slice_param->slice_type == SLICE_TYPE_P) ||
1137                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1138                    (slice_param->slice_type == SLICE_TYPE_B));
1139
1140             if (i < decode_state->slice_params[j]->num_elements - 1)
1141                 next_slice_param = slice_param + 1;
1142             else
1143                 next_slice_param = next_slice_group_param;
1144
1145             gen6_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen6_mfd_context);
1146             gen6_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
1147             gen6_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen6_mfd_context);
1148             gen6_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen6_mfd_context);
1149             gen6_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen6_mfd_context);
1150             slice_param++;
1151         }
1152     }
1153     
1154     gen6_mfd_avc_phantom_slice(ctx, pic_param, gen6_mfd_context);
1155     intel_batchbuffer_end_atomic(batch);
1156     intel_batchbuffer_flush(batch);
1157 }
1158
1159 static void
1160 gen6_mfd_mpeg2_decode_init(VADriverContextP ctx,
1161                            struct decode_state *decode_state,
1162                            struct gen6_mfd_context *gen6_mfd_context)
1163 {
1164     VAPictureParameterBufferMPEG2 *pic_param;
1165     struct i965_driver_data *i965 = i965_driver_data(ctx);
1166     struct object_surface *obj_surface;
1167     int i;
1168     dri_bo *bo;
1169
1170     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1171     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1172
1173     /* reference picture */
1174     obj_surface = SURFACE(pic_param->forward_reference_picture);
1175
1176     if (obj_surface && obj_surface->bo)
1177         gen6_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1178     else
1179         gen6_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1180
1181     obj_surface = SURFACE(pic_param->backward_reference_picture);
1182
1183     if (obj_surface && obj_surface->bo)
1184         gen6_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1185     else
1186         gen6_mfd_context->reference_surface[1].surface_id = gen6_mfd_context->reference_surface[0].surface_id;
1187
1188     /* must do so !!! */
1189     for (i = 2; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++)
1190         gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[i % 2].surface_id;
1191
1192     /* Current decoded picture */
1193     obj_surface = SURFACE(decode_state->current_render_target);
1194     assert(obj_surface);
1195     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
1196
1197     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1198     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1199     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1200     gen6_mfd_context->pre_deblocking_output.valid = 1;
1201
1202     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1203     bo = dri_bo_alloc(i965->intel.bufmgr,
1204                       "bsd mpc row store",
1205                       11520, /* 1.5 * 120 * 64 */
1206                       0x1000);
1207     assert(bo);
1208     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1209     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1210
1211     gen6_mfd_context->post_deblocking_output.valid = 0;
1212     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1213     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1214     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1215     gen6_mfd_context->bitplane_read_buffer.valid = 0;
1216 }
1217
1218 static void
1219 gen6_mfd_mpeg2_pic_state(VADriverContextP ctx,
1220                          struct decode_state *decode_state,
1221                          struct gen6_mfd_context *gen6_mfd_context)
1222 {
1223     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1224     VAPictureParameterBufferMPEG2 *pic_param;
1225
1226     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1227     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1228
1229     BEGIN_BCS_BATCH(batch, 4);
1230     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (4 - 2));
1231     OUT_BCS_BATCH(batch,
1232                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1233                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1234                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1235                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1236                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1237                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1238                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1239                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1240                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1241                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1242                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1243                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1244     OUT_BCS_BATCH(batch,
1245                   pic_param->picture_coding_type << 9);
1246     OUT_BCS_BATCH(batch,
1247                   (ALIGN(pic_param->vertical_size, 16) / 16) << 16 |
1248                   (ALIGN(pic_param->horizontal_size, 16) / 16));
1249     ADVANCE_BCS_BATCH(batch);
1250 }
1251
1252 static void
1253 gen6_mfd_mpeg2_qm_state(VADriverContextP ctx,
1254                         struct decode_state *decode_state,
1255                         struct gen6_mfd_context *gen6_mfd_context)
1256 {
1257     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1258     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen6_mfd_context->iq_matrix.mpeg2;
1259     int i, j;
1260
1261     /* Update internal QM state */
1262     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1263         VAIQMatrixBufferMPEG2 * const iq_matrix =
1264             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1265
1266         gen_iq_matrix->load_intra_quantiser_matrix =
1267             iq_matrix->load_intra_quantiser_matrix;
1268         if (iq_matrix->load_intra_quantiser_matrix) {
1269             for (j = 0; j < 64; j++)
1270                 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1271                     iq_matrix->intra_quantiser_matrix[j];
1272         }
1273
1274         gen_iq_matrix->load_non_intra_quantiser_matrix =
1275             iq_matrix->load_non_intra_quantiser_matrix;
1276         if (iq_matrix->load_non_intra_quantiser_matrix) {
1277             for (j = 0; j < 64; j++)
1278                 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1279                     iq_matrix->non_intra_quantiser_matrix[j];
1280         }
1281     }
1282
1283     /* Commit QM state to HW */
1284     for (i = 0; i < 2; i++) {
1285         unsigned char *qm = NULL;
1286
1287         if (i == 0) {
1288             if (gen_iq_matrix->load_intra_quantiser_matrix)
1289                 qm = gen_iq_matrix->intra_quantiser_matrix;
1290         } else {
1291             if (gen_iq_matrix->load_non_intra_quantiser_matrix)
1292                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1293         }
1294
1295         if (!qm)
1296             continue;
1297
1298         BEGIN_BCS_BATCH(batch, 18);
1299         OUT_BCS_BATCH(batch, MFX_MPEG2_QM_STATE | (18 - 2));
1300         OUT_BCS_BATCH(batch, i);
1301         intel_batchbuffer_data(batch, qm, 64);
1302         ADVANCE_BCS_BATCH(batch);
1303     }
1304 }
1305
1306 static void
1307 gen6_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1308                           VAPictureParameterBufferMPEG2 *pic_param,
1309                           VASliceParameterBufferMPEG2 *slice_param,
1310                           VASliceParameterBufferMPEG2 *next_slice_param,
1311                           struct gen6_mfd_context *gen6_mfd_context)
1312 {
1313     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1314     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1315     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic = 0;
1316
1317     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1318         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1319         is_field_pic = 1;
1320
1321     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic);
1322     hpos0 = slice_param->slice_horizontal_position;
1323
1324     if (next_slice_param == NULL) {
1325         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1326         hpos1 = 0;
1327     } else {
1328         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic);
1329         hpos1 = next_slice_param->slice_horizontal_position;
1330     }
1331
1332     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1333
1334     BEGIN_BCS_BATCH(batch, 5);
1335     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1336     OUT_BCS_BATCH(batch, 
1337                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1338     OUT_BCS_BATCH(batch, 
1339                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1340     OUT_BCS_BATCH(batch,
1341                   hpos0 << 24 |
1342                   vpos0 << 16 |
1343                   mb_count << 8 |
1344                   (next_slice_param == NULL) << 5 |
1345                   (next_slice_param == NULL) << 3 |
1346                   (slice_param->macroblock_offset & 0x7));
1347     OUT_BCS_BATCH(batch,
1348                   slice_param->quantiser_scale_code << 24);
1349     ADVANCE_BCS_BATCH(batch);
1350 }
1351
1352 static void
1353 gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1354                               struct decode_state *decode_state,
1355                               struct gen6_mfd_context *gen6_mfd_context)
1356 {
1357     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1358     VAPictureParameterBufferMPEG2 *pic_param;
1359     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1360     dri_bo *slice_data_bo;
1361     int i, j;
1362
1363     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1364     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1365
1366     gen6_mfd_mpeg2_decode_init(ctx, decode_state, gen6_mfd_context);
1367     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1368     intel_batchbuffer_emit_mi_flush(batch);
1369     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1370     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1371     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1372     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1373     gen6_mfd_mpeg2_pic_state(ctx, decode_state, gen6_mfd_context);
1374     gen6_mfd_mpeg2_qm_state(ctx, decode_state, gen6_mfd_context);
1375
1376     for (j = 0; j < decode_state->num_slice_params; j++) {
1377         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1378         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1379         slice_data_bo = decode_state->slice_datas[j]->bo;
1380         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen6_mfd_context);
1381
1382         if (j == decode_state->num_slice_params - 1)
1383             next_slice_group_param = NULL;
1384         else
1385             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1386
1387         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1388             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1389
1390             if (i < decode_state->slice_params[j]->num_elements - 1)
1391                 next_slice_param = slice_param + 1;
1392             else
1393                 next_slice_param = next_slice_group_param;
1394
1395             gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
1396             slice_param++;
1397         }
1398     }
1399
1400     intel_batchbuffer_end_atomic(batch);
1401     intel_batchbuffer_flush(batch);
1402 }
1403
1404 static const int va_to_gen6_vc1_pic_type[5] = {
1405     GEN6_VC1_I_PICTURE,
1406     GEN6_VC1_P_PICTURE,
1407     GEN6_VC1_B_PICTURE,
1408     GEN6_VC1_BI_PICTURE,
1409     GEN6_VC1_P_PICTURE,
1410 };
1411
1412 static const int va_to_gen6_vc1_mv[4] = {
1413     1, /* 1-MV */
1414     2, /* 1-MV half-pel */
1415     3, /* 1-MV half-pef bilinear */
1416     0, /* Mixed MV */
1417 };
1418
1419 static const int b_picture_scale_factor[21] = {
1420     128, 85,  170, 64,  192,
1421     51,  102, 153, 204, 43,
1422     215, 37,  74,  111, 148,
1423     185, 222, 32,  96,  160, 
1424     224,
1425 };
1426
1427 static const int va_to_gen6_vc1_condover[3] = {
1428     0,
1429     2,
1430     3
1431 };
1432
1433 static const int va_to_gen6_vc1_profile[4] = {
1434     GEN6_VC1_SIMPLE_PROFILE,
1435     GEN6_VC1_MAIN_PROFILE,
1436     GEN6_VC1_RESERVED_PROFILE,
1437     GEN6_VC1_ADVANCED_PROFILE
1438 };
1439
1440 static void 
1441 gen6_mfd_free_vc1_surface(void **data)
1442 {
1443     struct gen6_vc1_surface *gen6_vc1_surface = *data;
1444
1445     if (!gen6_vc1_surface)
1446         return;
1447
1448     dri_bo_unreference(gen6_vc1_surface->dmv);
1449     free(gen6_vc1_surface);
1450     *data = NULL;
1451 }
1452
1453 static void
1454 gen6_mfd_init_vc1_surface(VADriverContextP ctx, 
1455                           VAPictureParameterBufferVC1 *pic_param,
1456                           struct object_surface *obj_surface)
1457 {
1458     struct i965_driver_data *i965 = i965_driver_data(ctx);
1459     struct gen6_vc1_surface *gen6_vc1_surface = obj_surface->private_data;
1460
1461     obj_surface->free_private_data = gen6_mfd_free_vc1_surface;
1462
1463     if (!gen6_vc1_surface) {
1464         gen6_vc1_surface = calloc(sizeof(struct gen6_vc1_surface), 1);
1465         assert((obj_surface->size & 0x3f) == 0);
1466         obj_surface->private_data = gen6_vc1_surface;
1467     }
1468
1469     gen6_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1470
1471     if (gen6_vc1_surface->dmv == NULL) {
1472         gen6_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1473                                              "direct mv w/r buffer",
1474                                              557056,    /* 64 * 128 * 64 */
1475                                              0x1000);
1476     }
1477 }
1478
1479 static void
1480 gen6_mfd_vc1_decode_init(VADriverContextP ctx,
1481                          struct decode_state *decode_state,
1482                          struct gen6_mfd_context *gen6_mfd_context)
1483 {
1484     VAPictureParameterBufferVC1 *pic_param;
1485     struct i965_driver_data *i965 = i965_driver_data(ctx);
1486     struct object_surface *obj_surface;
1487     int i;
1488     dri_bo *bo;
1489
1490     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1491     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1492
1493     /* reference picture */
1494     obj_surface = SURFACE(pic_param->forward_reference_picture);
1495
1496     if (obj_surface && obj_surface->bo)
1497         gen6_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1498     else
1499         gen6_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1500
1501     obj_surface = SURFACE(pic_param->backward_reference_picture);
1502
1503     if (obj_surface && obj_surface->bo)
1504         gen6_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1505     else
1506         gen6_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1507
1508     /* must do so !!! */
1509     for (i = 2; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++)
1510         gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[i % 2].surface_id;
1511
1512     /* Current decoded picture */
1513     obj_surface = SURFACE(decode_state->current_render_target);
1514     assert(obj_surface);
1515     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
1516     gen6_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1517
1518     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
1519     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1520     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
1521     gen6_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1522
1523     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1524     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1525     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1526     gen6_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1527
1528     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
1529     bo = dri_bo_alloc(i965->intel.bufmgr,
1530                       "intra row store",
1531                       128 * 64,
1532                       0x1000);
1533     assert(bo);
1534     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1535     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1536
1537     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1538     bo = dri_bo_alloc(i965->intel.bufmgr,
1539                       "deblocking filter row store",
1540                       46080, /* 6 * 120 * 64 */
1541                       0x1000);
1542     assert(bo);
1543     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1544     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1545
1546     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1547     bo = dri_bo_alloc(i965->intel.bufmgr,
1548                       "bsd mpc row store",
1549                       11520, /* 1.5 * 120 * 64 */
1550                       0x1000);
1551     assert(bo);
1552     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1553     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1554
1555     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1556
1557     gen6_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1558     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
1559     
1560     if (gen6_mfd_context->bitplane_read_buffer.valid) {
1561         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1562         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1563         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1564         int src_w, src_h;
1565         uint8_t *src = NULL, *dst = NULL;
1566
1567         assert(decode_state->bit_plane->buffer);
1568         src = decode_state->bit_plane->buffer;
1569
1570         bo = dri_bo_alloc(i965->intel.bufmgr,
1571                           "VC-1 Bitplane",
1572                           bitplane_width * bitplane_width,
1573                           0x1000);
1574         assert(bo);
1575         gen6_mfd_context->bitplane_read_buffer.bo = bo;
1576
1577         dri_bo_map(bo, True);
1578         assert(bo->virtual);
1579         dst = bo->virtual;
1580
1581         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1582             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1583                 int src_index, dst_index;
1584                 int src_shift;
1585                 uint8_t src_value;
1586
1587                 src_index = (src_h * width_in_mbs + src_w) / 2;
1588                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1589                 src_value = ((src[src_index] >> src_shift) & 0xf);
1590
1591                 dst_index = src_w / 2;
1592                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1593             }
1594
1595             if (src_w & 1)
1596                 dst[src_w / 2] >>= 4;
1597
1598             dst += bitplane_width;
1599         }
1600
1601         dri_bo_unmap(bo);
1602     } else
1603         gen6_mfd_context->bitplane_read_buffer.bo = NULL;
1604 }
1605
1606 static void
1607 gen6_mfd_vc1_pic_state(VADriverContextP ctx,
1608                        struct decode_state *decode_state,
1609                        struct gen6_mfd_context *gen6_mfd_context)
1610 {
1611     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1612     VAPictureParameterBufferVC1 *pic_param;
1613     struct i965_driver_data *i965 = i965_driver_data(ctx);
1614     struct object_surface *obj_surface;
1615     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1616     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1617     int unified_mv_mode;
1618     int ref_field_pic_polarity = 0;
1619     int scale_factor = 0;
1620     int trans_ac_y = 0;
1621     int dmv_surface_valid = 0;
1622     int brfd = 0;
1623     int fcm = 0;
1624     int picture_type;
1625     int profile;
1626     int overlap;
1627
1628     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1629     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1630
1631     profile = va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile];
1632     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1633     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1634     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1635     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1636     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1637     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1638     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1639
1640     if (dquant == 0) {
1641         alt_pquant_config = 0;
1642         alt_pquant_edge_mask = 0;
1643     } else if (dquant == 2) {
1644         alt_pquant_config = 1;
1645         alt_pquant_edge_mask = 0xf;
1646     } else {
1647         assert(dquant == 1);
1648         if (dquantfrm == 0) {
1649             alt_pquant_config = 0;
1650             alt_pquant_edge_mask = 0;
1651             alt_pq = 0;
1652         } else {
1653             assert(dquantfrm == 1);
1654             alt_pquant_config = 1;
1655
1656             switch (dqprofile) {
1657             case 3:
1658                 if (dqbilevel == 0) {
1659                     alt_pquant_config = 2;
1660                     alt_pquant_edge_mask = 0;
1661                 } else {
1662                     assert(dqbilevel == 1);
1663                     alt_pquant_config = 3;
1664                     alt_pquant_edge_mask = 0;
1665                 }
1666                 break;
1667                 
1668             case 0:
1669                 alt_pquant_edge_mask = 0xf;
1670                 break;
1671
1672             case 1:
1673                 if (dqdbedge == 3)
1674                     alt_pquant_edge_mask = 0x9;
1675                 else
1676                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1677
1678                 break;
1679
1680             case 2:
1681                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1682                 break;
1683
1684             default:
1685                 assert(0);
1686             }
1687         }
1688     }
1689
1690     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1691         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1692         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1693     } else {
1694         assert(pic_param->mv_fields.bits.mv_mode < 4);
1695         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1696     }
1697
1698     if (pic_param->sequence_fields.bits.interlace == 1 &&
1699         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1700         /* FIXME: calculate reference field picture polarity */
1701         assert(0);
1702         ref_field_pic_polarity = 0;
1703     }
1704
1705     if (pic_param->b_picture_fraction < 21)
1706         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1707
1708     picture_type = va_to_gen6_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1709     
1710     if (profile == GEN6_VC1_ADVANCED_PROFILE && 
1711         picture_type == GEN6_VC1_I_PICTURE)
1712         picture_type = GEN6_VC1_BI_PICTURE;
1713
1714     if (picture_type == GEN6_VC1_I_PICTURE || picture_type == GEN6_VC1_BI_PICTURE) /* I picture */
1715         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1716     else
1717         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1718
1719
1720     if (picture_type == GEN6_VC1_B_PICTURE) {
1721         struct gen6_vc1_surface *gen6_vc1_surface = NULL;
1722
1723         obj_surface = SURFACE(pic_param->backward_reference_picture);
1724         assert(obj_surface);
1725         gen6_vc1_surface = obj_surface->private_data;
1726
1727         if (!gen6_vc1_surface || 
1728             (va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_I_PICTURE ||
1729              va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_BI_PICTURE))
1730             dmv_surface_valid = 0;
1731         else
1732             dmv_surface_valid = 1;
1733     }
1734
1735     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1736
1737     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1738         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1739     else {
1740         if (pic_param->picture_fields.bits.top_field_first)
1741             fcm = 2;
1742         else
1743             fcm = 3;
1744     }
1745
1746     if (pic_param->picture_fields.bits.picture_type == GEN6_VC1_B_PICTURE) { /* B picture */
1747         brfd = pic_param->reference_fields.bits.reference_distance;
1748         brfd = (scale_factor * brfd) >> 8;
1749         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1750
1751         if (brfd < 0)
1752             brfd = 0;
1753     }
1754
1755     overlap = pic_param->sequence_fields.bits.overlap;
1756     if (profile != GEN6_VC1_ADVANCED_PROFILE && pic_param->pic_quantizer_fields.bits.pic_quantizer_scale < 9)
1757         overlap = 0;
1758
1759     assert(pic_param->conditional_overlap_flag < 3);
1760     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1761
1762     BEGIN_BCS_BATCH(batch, 6);
1763     OUT_BCS_BATCH(batch, MFX_VC1_PIC_STATE | (6 - 2));
1764     OUT_BCS_BATCH(batch,
1765                   (ALIGN(pic_param->coded_height, 16) / 16) << 16 |
1766                   (ALIGN(pic_param->coded_width, 16) / 16));
1767     OUT_BCS_BATCH(batch,
1768                   pic_param->sequence_fields.bits.syncmarker << 31 |
1769                   1 << 29 | /* concealment */
1770                   alt_pq << 24 |
1771                   pic_param->entrypoint_fields.bits.loopfilter << 23 |
1772                   overlap << 22 |
1773                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 21 | /* implicit quantizer */
1774                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 16 |
1775                   alt_pquant_edge_mask << 12 |
1776                   alt_pquant_config << 10 |
1777                   pic_param->pic_quantizer_fields.bits.half_qp << 9 |
1778                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 8 |
1779                   va_to_gen6_vc1_condover[pic_param->conditional_overlap_flag] << 6 |
1780                   !pic_param->picture_fields.bits.is_first_field << 5 |
1781                   picture_type << 2 |
1782                   fcm << 0);
1783     OUT_BCS_BATCH(batch,
1784                   !!pic_param->bitplane_present.value << 23 |
1785                   !pic_param->bitplane_present.flags.bp_forward_mb << 22 |
1786                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 21 |
1787                   !pic_param->bitplane_present.flags.bp_skip_mb << 20 |
1788                   !pic_param->bitplane_present.flags.bp_direct_mb << 19 |
1789                   !pic_param->bitplane_present.flags.bp_overflags << 18 |
1790                   !pic_param->bitplane_present.flags.bp_ac_pred << 17 |
1791                   !pic_param->bitplane_present.flags.bp_field_tx << 16 |
1792                   pic_param->mv_fields.bits.extended_dmv_range << 14 |
1793                   pic_param->mv_fields.bits.extended_mv_range << 12 |
1794                   pic_param->mv_fields.bits.four_mv_switch << 11 |
1795                   pic_param->fast_uvmc_flag << 10 |
1796                   unified_mv_mode << 8 |
1797                   ref_field_pic_polarity << 6 |
1798                   pic_param->reference_fields.bits.num_reference_pictures << 5 |
1799                   pic_param->reference_fields.bits.reference_distance << 0);
1800     OUT_BCS_BATCH(batch,
1801                   scale_factor << 24 |
1802                   pic_param->mv_fields.bits.mv_table << 20 |
1803                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1804                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1805                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1806                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1807                   pic_param->mb_mode_table << 8 |
1808                   trans_ac_y << 6 |
1809                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1810                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1811                   pic_param->cbp_table << 0);
1812     OUT_BCS_BATCH(batch,
1813                   dmv_surface_valid << 13 |
1814                   brfd << 8 |
1815                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1));
1816     ADVANCE_BCS_BATCH(batch);
1817 }
1818
1819 static void
1820 gen6_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1821                              struct decode_state *decode_state,
1822                              struct gen6_mfd_context *gen6_mfd_context)
1823 {
1824     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1825     VAPictureParameterBufferVC1 *pic_param;
1826     int interpolation_mode = 0;
1827     int intensitycomp_single;
1828
1829     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1830     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1831
1832     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1833         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1834          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1835         interpolation_mode = 2; /* Half-pel bilinear */
1836     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1837              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1838               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1839         interpolation_mode = 0; /* Half-pel bicubic */
1840     else
1841         interpolation_mode = 1; /* Quarter-pel bicubic */
1842
1843     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1844     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1845     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1846
1847     BEGIN_BCS_BATCH(batch, 7);
1848     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (7 - 2));
1849     OUT_BCS_BATCH(batch,
1850                   0 << 8 | /* FIXME: interlace mode */
1851                   pic_param->rounding_control << 4 |
1852                   va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile] << 2);
1853     OUT_BCS_BATCH(batch,
1854                   pic_param->luma_shift << 16 |
1855                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1856     OUT_BCS_BATCH(batch, 0);
1857     OUT_BCS_BATCH(batch, 0);
1858     OUT_BCS_BATCH(batch, 0);
1859     OUT_BCS_BATCH(batch,
1860                   interpolation_mode << 19 |
1861                   pic_param->fast_uvmc_flag << 18 |
1862                   0 << 17 | /* FIXME: scale up or down ??? */
1863                   pic_param->range_reduction_frame << 16 |
1864                   0 << 6 | /* FIXME: double ??? */
1865                   0 << 4 |
1866                   intensitycomp_single << 2 |
1867                   intensitycomp_single << 0);
1868     ADVANCE_BCS_BATCH(batch);
1869 }
1870
1871
1872 static void
1873 gen6_mfd_vc1_directmode_state(VADriverContextP ctx,
1874                               struct decode_state *decode_state,
1875                               struct gen6_mfd_context *gen6_mfd_context)
1876 {
1877     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1878     VAPictureParameterBufferVC1 *pic_param;
1879     struct i965_driver_data *i965 = i965_driver_data(ctx);
1880     struct object_surface *obj_surface;
1881     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1882
1883     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1884     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1885
1886     obj_surface = SURFACE(decode_state->current_render_target);
1887
1888     if (obj_surface && obj_surface->private_data) {
1889         dmv_write_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1890     }
1891
1892     obj_surface = SURFACE(pic_param->backward_reference_picture);
1893
1894     if (obj_surface && obj_surface->private_data) {
1895         dmv_read_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1896     }
1897
1898     BEGIN_BCS_BATCH(batch, 3);
1899     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
1900
1901     if (dmv_write_buffer)
1902         OUT_BCS_RELOC(batch, dmv_write_buffer,
1903                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1904                       0);
1905     else
1906         OUT_BCS_BATCH(batch, 0);
1907
1908     if (dmv_read_buffer)
1909         OUT_BCS_RELOC(batch, dmv_read_buffer,
1910                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1911                       0);
1912     else
1913         OUT_BCS_BATCH(batch, 0);
1914                   
1915     ADVANCE_BCS_BATCH(batch);
1916 }
1917
1918 static int
1919 gen6_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1920 {
1921     int out_slice_data_bit_offset;
1922     int slice_header_size = in_slice_data_bit_offset / 8;
1923     int i, j;
1924
1925     if (profile != 3)
1926         out_slice_data_bit_offset = in_slice_data_bit_offset;
1927     else {
1928         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1929             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1930                 i++, j += 2;
1931             }
1932         }
1933
1934         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1935     }
1936
1937     return out_slice_data_bit_offset;
1938 }
1939
1940 static void
1941 gen6_mfd_vc1_bsd_object(VADriverContextP ctx,
1942                         VAPictureParameterBufferVC1 *pic_param,
1943                         VASliceParameterBufferVC1 *slice_param,
1944                         VASliceParameterBufferVC1 *next_slice_param,
1945                         dri_bo *slice_data_bo,
1946                         struct gen6_mfd_context *gen6_mfd_context)
1947 {
1948     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1949     int next_slice_start_vert_pos;
1950     int macroblock_offset;
1951     uint8_t *slice_data = NULL;
1952
1953     dri_bo_map(slice_data_bo, 0);
1954     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1955     macroblock_offset = gen6_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1956                                                                slice_param->macroblock_offset,
1957                                                                pic_param->sequence_fields.bits.profile);
1958     dri_bo_unmap(slice_data_bo);
1959
1960     if (next_slice_param)
1961         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1962     else
1963         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1964
1965     BEGIN_BCS_BATCH(batch, 4);
1966     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (4 - 2));
1967     OUT_BCS_BATCH(batch, 
1968                   slice_param->slice_data_size - (macroblock_offset >> 3));
1969     OUT_BCS_BATCH(batch, 
1970                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1971     OUT_BCS_BATCH(batch,
1972                   slice_param->slice_vertical_position << 24 |
1973                   next_slice_start_vert_pos << 16 |
1974                   (macroblock_offset & 0x7));
1975     ADVANCE_BCS_BATCH(batch);
1976 }
1977
1978 static void
1979 gen6_mfd_vc1_decode_picture(VADriverContextP ctx,
1980                             struct decode_state *decode_state,
1981                             struct gen6_mfd_context *gen6_mfd_context)
1982 {
1983     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1984     VAPictureParameterBufferVC1 *pic_param;
1985     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1986     dri_bo *slice_data_bo;
1987     int i, j;
1988
1989     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1990     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1991
1992     gen6_mfd_vc1_decode_init(ctx, decode_state, gen6_mfd_context);
1993     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1994     intel_batchbuffer_emit_mi_flush(batch);
1995     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1996     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1997     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1998     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1999     gen6_mfd_vc1_pic_state(ctx, decode_state, gen6_mfd_context);
2000     gen6_mfd_vc1_pred_pipe_state(ctx, decode_state, gen6_mfd_context);
2001     gen6_mfd_vc1_directmode_state(ctx, decode_state, gen6_mfd_context);
2002
2003     for (j = 0; j < decode_state->num_slice_params; j++) {
2004         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2005         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
2006         slice_data_bo = decode_state->slice_datas[j]->bo;
2007         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen6_mfd_context);
2008
2009         if (j == decode_state->num_slice_params - 1)
2010             next_slice_group_param = NULL;
2011         else
2012             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
2013
2014         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2015             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2016
2017             if (i < decode_state->slice_params[j]->num_elements - 1)
2018                 next_slice_param = slice_param + 1;
2019             else
2020                 next_slice_param = next_slice_group_param;
2021
2022             gen6_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen6_mfd_context);
2023             slice_param++;
2024         }
2025     }
2026
2027     intel_batchbuffer_end_atomic(batch);
2028     intel_batchbuffer_flush(batch);
2029 }
2030
2031 static void 
2032 gen6_mfd_decode_picture(VADriverContextP ctx, 
2033                         VAProfile profile, 
2034                         union codec_state *codec_state,
2035                         struct hw_context *hw_context)
2036
2037 {
2038     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
2039     struct decode_state *decode_state = &codec_state->decode;
2040
2041     assert(gen6_mfd_context);
2042
2043     switch (profile) {
2044     case VAProfileMPEG2Simple:
2045     case VAProfileMPEG2Main:
2046         gen6_mfd_mpeg2_decode_picture(ctx, decode_state, gen6_mfd_context);
2047         break;
2048         
2049     case VAProfileH264Baseline:
2050     case VAProfileH264Main:
2051     case VAProfileH264High:
2052         gen6_mfd_avc_decode_picture(ctx, decode_state, gen6_mfd_context);
2053         break;
2054
2055     case VAProfileVC1Simple:
2056     case VAProfileVC1Main:
2057     case VAProfileVC1Advanced:
2058         gen6_mfd_vc1_decode_picture(ctx, decode_state, gen6_mfd_context);
2059         break;
2060
2061     default:
2062         assert(0);
2063         break;
2064     }
2065 }
2066
2067 static void
2068 gen6_mfd_context_destroy(void *hw_context)
2069 {
2070     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
2071
2072     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
2073     gen6_mfd_context->post_deblocking_output.bo = NULL;
2074
2075     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
2076     gen6_mfd_context->pre_deblocking_output.bo = NULL;
2077
2078     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
2079     gen6_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2080
2081     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2082     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2083
2084     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2085     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2086
2087     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
2088     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2089
2090     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
2091     gen6_mfd_context->bitplane_read_buffer.bo = NULL;
2092
2093     intel_batchbuffer_free(gen6_mfd_context->base.batch);
2094     free(gen6_mfd_context);
2095 }
2096
2097 struct hw_context *
2098 gen6_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
2099 {
2100     struct intel_driver_data *intel = intel_driver_data(ctx);
2101     struct gen6_mfd_context *gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context));
2102     int i;
2103
2104     gen6_mfd_context->base.destroy = gen6_mfd_context_destroy;
2105     gen6_mfd_context->base.run = gen6_mfd_decode_picture;
2106     gen6_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
2107
2108     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
2109         gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
2110         gen6_mfd_context->reference_surface[i].frame_store_id = -1;
2111     }
2112     
2113     return (struct hw_context *)gen6_mfd_context;
2114 }