fix typo in upstream code
[profile/ivi/vaapi-intel-driver.git] / src / gen6_mfd.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36
37 #include "i965_defines.h"
38 #include "i965_drv_video.h"
39 #include "i965_decoder_utils.h"
40
41 #include "gen6_mfd.h"
42 #include "intel_media.h"
43
44 static const uint32_t zigzag_direct[64] = {
45     0,   1,  8, 16,  9,  2,  3, 10,
46     17, 24, 32, 25, 18, 11,  4,  5,
47     12, 19, 26, 33, 40, 48, 41, 34,
48     27, 20, 13,  6,  7, 14, 21, 28,
49     35, 42, 49, 56, 57, 50, 43, 36,
50     29, 22, 15, 23, 30, 37, 44, 51,
51     58, 59, 52, 45, 38, 31, 39, 46,
52     53, 60, 61, 54, 47, 55, 62, 63
53 };
54
55 static void
56 gen6_mfd_init_avc_surface(VADriverContextP ctx, 
57                           VAPictureParameterBufferH264 *pic_param,
58                           struct object_surface *obj_surface)
59 {
60     struct i965_driver_data *i965 = i965_driver_data(ctx);
61     GenAvcSurface *gen6_avc_surface = obj_surface->private_data;
62     int height_in_mbs;
63
64     obj_surface->free_private_data = gen_free_avc_surface;
65     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
66
67     if (!gen6_avc_surface) {
68         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
69         assert((obj_surface->size & 0x3f) == 0);
70         obj_surface->private_data = gen6_avc_surface;
71     }
72
73     gen6_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
74                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
75
76     if (gen6_avc_surface->dmv_top == NULL) {
77         gen6_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
78                                                  "direct mv w/r buffer",
79                                                  128 * height_in_mbs * 64,      /* scalable with frame height */
80                                                  0x1000);
81     }
82
83     if (gen6_avc_surface->dmv_bottom_flag &&
84         gen6_avc_surface->dmv_bottom == NULL) {
85         gen6_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
86                                                     "direct mv w/r buffer",
87                                                     128 * height_in_mbs * 64,   /* scalable with frame height */
88                                                     0x1000);
89     }
90 }
91
92 static void
93 gen6_mfd_pipe_mode_select(VADriverContextP ctx,
94                           struct decode_state *decode_state,
95                           int standard_select,
96                           struct gen6_mfd_context *gen6_mfd_context)
97 {
98     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
99
100     assert(standard_select == MFX_FORMAT_MPEG2 ||
101            standard_select == MFX_FORMAT_AVC ||
102            standard_select == MFX_FORMAT_VC1);
103
104     BEGIN_BCS_BATCH(batch, 4);
105     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
106     OUT_BCS_BATCH(batch,
107                   (MFD_MODE_VLD << 16) | /* VLD mode */
108                   (0 << 10) | /* disable Stream-Out */
109                   (gen6_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
110                   (gen6_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
111                   (0 << 7)  | /* disable TLB prefectch */
112                   (0 << 5)  | /* not in stitch mode */
113                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
114                   (standard_select << 0));
115     OUT_BCS_BATCH(batch,
116                   (0 << 20) | /* round flag in PB slice */
117                   (0 << 19) | /* round flag in Intra8x8 */
118                   (0 << 7)  | /* expand NOA bus flag */
119                   (1 << 6)  | /* must be 1 */
120                   (0 << 5)  | /* disable clock gating for NOA */
121                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
122                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
123                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
124                   (0 << 1)  | /* AVC long field motion vector */
125                   (1 << 0));  /* always calculate AVC ILDB boundary strength */
126     OUT_BCS_BATCH(batch, 0);
127     ADVANCE_BCS_BATCH(batch);
128 }
129
130 static void
131 gen6_mfd_surface_state(VADriverContextP ctx,
132                        struct decode_state *decode_state,
133                        int standard_select,
134                        struct gen6_mfd_context *gen6_mfd_context)
135 {
136     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
137     struct object_surface *obj_surface = decode_state->render_object;
138     
139     BEGIN_BCS_BATCH(batch, 6);
140     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
141     OUT_BCS_BATCH(batch, 0);
142     OUT_BCS_BATCH(batch,
143                   ((obj_surface->orig_height - 1) << 19) |
144                   ((obj_surface->orig_width - 1) << 6));
145     OUT_BCS_BATCH(batch,
146                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
147                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
148                   (0 << 22) | /* surface object control state, FIXME??? */
149                   ((obj_surface->width - 1) << 3) | /* pitch */
150                   (0 << 2)  | /* must be 0 for interleave U/V */
151                   (1 << 1)  | /* must be y-tiled */
152                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, FIXME: must be 1 ??? */
153     OUT_BCS_BATCH(batch,
154                   (0 << 16) | /* must be 0 for interleave U/V */
155                   (obj_surface->height)); /* y offset for U(cb) */
156     OUT_BCS_BATCH(batch, 0);
157     ADVANCE_BCS_BATCH(batch);
158 }
159
160 static void
161 gen6_mfd_pipe_buf_addr_state(VADriverContextP ctx,
162                              struct decode_state *decode_state,
163                              int standard_select,
164                              struct gen6_mfd_context *gen6_mfd_context)
165 {
166     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
167     int i;
168
169     BEGIN_BCS_BATCH(batch, 24);
170     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
171     if (gen6_mfd_context->pre_deblocking_output.valid)
172         OUT_BCS_RELOC(batch, gen6_mfd_context->pre_deblocking_output.bo,
173                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
174                       0);
175     else
176         OUT_BCS_BATCH(batch, 0);
177
178     if (gen6_mfd_context->post_deblocking_output.valid)
179         OUT_BCS_RELOC(batch, gen6_mfd_context->post_deblocking_output.bo,
180                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
181                       0);
182     else
183         OUT_BCS_BATCH(batch, 0);
184
185     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
186     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
187
188     if (gen6_mfd_context->intra_row_store_scratch_buffer.valid)
189         OUT_BCS_RELOC(batch, gen6_mfd_context->intra_row_store_scratch_buffer.bo,
190                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
191                       0);
192     else
193         OUT_BCS_BATCH(batch, 0);
194
195     if (gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
196         OUT_BCS_RELOC(batch, gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
197                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
198                       0);
199     else
200         OUT_BCS_BATCH(batch, 0);
201
202     /* DW 7..22 */
203     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
204         struct object_surface *obj_surface;
205
206         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
207             gen6_mfd_context->reference_surface[i].obj_surface &&
208             gen6_mfd_context->reference_surface[i].obj_surface->bo) {
209             obj_surface = gen6_mfd_context->reference_surface[i].obj_surface;
210
211             OUT_BCS_RELOC(batch, obj_surface->bo,
212                           I915_GEM_DOMAIN_INSTRUCTION, 0,
213                           0);
214         } else {
215             OUT_BCS_BATCH(batch, 0);
216         }
217     }
218
219     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
220     ADVANCE_BCS_BATCH(batch);
221 }
222
223 static void
224 gen6_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
225                                  dri_bo *slice_data_bo,
226                                  int standard_select,
227                                  struct gen6_mfd_context *gen6_mfd_context)
228 {
229     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
230
231     BEGIN_BCS_BATCH(batch, 11);
232     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
233     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
234     OUT_BCS_BATCH(batch, 0);
235     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
236     OUT_BCS_BATCH(batch, 0);
237     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
238     OUT_BCS_BATCH(batch, 0);
239     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
240     OUT_BCS_BATCH(batch, 0);
241     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
242     OUT_BCS_BATCH(batch, 0);
243     ADVANCE_BCS_BATCH(batch);
244 }
245
246 static void
247 gen6_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
248                                  struct decode_state *decode_state,
249                                  int standard_select,
250                                  struct gen6_mfd_context *gen6_mfd_context)
251 {
252     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
253
254     BEGIN_BCS_BATCH(batch, 4);
255     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
256
257     if (gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
258         OUT_BCS_RELOC(batch, gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
259                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
260                       0);
261     else
262         OUT_BCS_BATCH(batch, 0);
263
264     if (gen6_mfd_context->mpr_row_store_scratch_buffer.valid)
265         OUT_BCS_RELOC(batch, gen6_mfd_context->mpr_row_store_scratch_buffer.bo,
266                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
267                       0);
268     else
269         OUT_BCS_BATCH(batch, 0);
270
271     if (gen6_mfd_context->bitplane_read_buffer.valid)
272         OUT_BCS_RELOC(batch, gen6_mfd_context->bitplane_read_buffer.bo,
273                       I915_GEM_DOMAIN_INSTRUCTION, 0,
274                       0);
275     else
276         OUT_BCS_BATCH(batch, 0);
277
278     ADVANCE_BCS_BATCH(batch);
279 }
280
281 #if 0
282 static void
283 gen6_mfd_aes_state(VADriverContextP ctx,
284                    struct decode_state *decode_state,
285                    int standard_select)
286 {
287     /* FIXME */
288 }
289
290 static void
291 gen6_mfd_wait(VADriverContextP ctx,
292               struct decode_state *decode_state,
293               int standard_select,
294               struct gen6_mfd_context *gen6_mfd_context)
295 {
296     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
297
298     BEGIN_BCS_BATCH(batch, 1);
299     OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8));
300     ADVANCE_BCS_BATCH(batch);
301 }
302 #endif
303
304 static void
305 gen6_mfd_avc_img_state(VADriverContextP ctx,
306                        struct decode_state *decode_state,
307                        struct gen6_mfd_context *gen6_mfd_context)
308 {
309     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
310     int qm_present_flag;
311     int img_struct;
312     int mbaff_frame_flag;
313     unsigned int width_in_mbs, height_in_mbs;
314     VAPictureParameterBufferH264 *pic_param;
315
316     assert(decode_state->pic_param && decode_state->pic_param->buffer);
317     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
318
319     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
320         qm_present_flag = 1;
321     else
322         qm_present_flag = 0; /* built-in QM matrices */
323
324     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
325         img_struct = 1;
326     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
327         img_struct = 3;
328     else
329         img_struct = 0;
330
331     if ((img_struct & 0x1) == 0x1) {
332         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
333     } else {
334         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
335     }
336
337     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
338         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
339         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
340     } else {
341         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
342     }
343
344     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
345                         !pic_param->pic_fields.bits.field_pic_flag);
346
347     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
348     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
349     assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */
350
351     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
352     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
353            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
354     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
355
356     BEGIN_BCS_BATCH(batch, 13);
357     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
358     OUT_BCS_BATCH(batch, 
359                   ((width_in_mbs * height_in_mbs) & 0x7fff));
360     OUT_BCS_BATCH(batch, 
361                   (height_in_mbs << 16) | 
362                   (width_in_mbs << 0));
363     OUT_BCS_BATCH(batch, 
364                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
365                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
366                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
367                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
368                   (1 << 12) | /* always 1, hardware requirement */
369                   (qm_present_flag << 10) |
370                   (img_struct << 8) |
371                   (16 << 0));
372     OUT_BCS_BATCH(batch,
373                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
374                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
375                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
376                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
377                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
378                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
379                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
380                   (mbaff_frame_flag << 1) |
381                   (pic_param->pic_fields.bits.field_pic_flag << 0));
382     OUT_BCS_BATCH(batch, 0);
383     OUT_BCS_BATCH(batch, 0);
384     OUT_BCS_BATCH(batch, 0);
385     OUT_BCS_BATCH(batch, 0);
386     OUT_BCS_BATCH(batch, 0);
387     OUT_BCS_BATCH(batch, 0);
388     OUT_BCS_BATCH(batch, 0);
389     OUT_BCS_BATCH(batch, 0);
390     ADVANCE_BCS_BATCH(batch);
391 }
392
393 static void
394 gen6_mfd_avc_qm_state(VADriverContextP ctx,
395                       struct decode_state *decode_state,
396                       struct gen6_mfd_context *gen6_mfd_context)
397 {
398     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
399     int cmd_len;
400     VAIQMatrixBufferH264 *iq_matrix;
401     VAPictureParameterBufferH264 *pic_param;
402
403     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
404         return;
405
406     iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
407
408     assert(decode_state->pic_param && decode_state->pic_param->buffer);
409     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
410
411     cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */
412
413     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
414         cmd_len += 2 * 16; /* load two 8x8 scaling matrices */
415
416     BEGIN_BCS_BATCH(batch, cmd_len);
417     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | (cmd_len - 2));
418
419     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
420         OUT_BCS_BATCH(batch, 
421                       (0x0  << 8) | /* don't use default built-in matrices */
422                       (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */
423     else
424         OUT_BCS_BATCH(batch, 
425                       (0x0  << 8) | /* don't use default built-in matrices */
426                       (0x3f << 0)); /* six 4x4 scaling matrices */
427
428     intel_batchbuffer_data(batch, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4);
429
430     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
431         intel_batchbuffer_data(batch, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4);
432
433     ADVANCE_BCS_BATCH(batch);
434 }
435
436 static void
437 gen6_mfd_avc_directmode_state(VADriverContextP ctx,
438                               struct decode_state *decode_state,
439                               VAPictureParameterBufferH264 *pic_param,
440                               VASliceParameterBufferH264 *slice_param,
441                               struct gen6_mfd_context *gen6_mfd_context)
442 {
443     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
444     struct object_surface *obj_surface;
445     GenAvcSurface *gen6_avc_surface;
446     VAPictureH264 *va_pic;
447     int i, j;
448
449     BEGIN_BCS_BATCH(batch, 69);
450     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
451
452     /* reference surfaces 0..15 */
453     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
454         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
455             gen6_mfd_context->reference_surface[i].obj_surface &&
456             gen6_mfd_context->reference_surface[i].obj_surface->private_data) {
457
458             obj_surface = gen6_mfd_context->reference_surface[i].obj_surface;
459             gen6_avc_surface = obj_surface->private_data;
460             OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
461                           I915_GEM_DOMAIN_INSTRUCTION, 0,
462                           0);
463
464             if (gen6_avc_surface->dmv_bottom_flag == 1)
465                 OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
466                               I915_GEM_DOMAIN_INSTRUCTION, 0,
467                               0);
468             else
469                 OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
470                               I915_GEM_DOMAIN_INSTRUCTION, 0,
471                               0);
472         } else {
473             OUT_BCS_BATCH(batch, 0);
474             OUT_BCS_BATCH(batch, 0);
475         }
476     }
477
478     /* the current decoding frame/field */
479     va_pic = &pic_param->CurrPic;
480     obj_surface = decode_state->render_object;
481     assert(obj_surface->bo && obj_surface->private_data);
482     gen6_avc_surface = obj_surface->private_data;
483
484     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
485                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
486                   0);
487
488     if (gen6_avc_surface->dmv_bottom_flag == 1)
489         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
490                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
491                       0);
492     else
493         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
494                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
495                       0);
496
497     /* POC List */
498     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
499         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
500             int found = 0;
501
502             assert(gen6_mfd_context->reference_surface[i].obj_surface != NULL);
503
504             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
505                 va_pic = &pic_param->ReferenceFrames[j];
506                 
507                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
508                     continue;
509
510                 if (va_pic->picture_id == gen6_mfd_context->reference_surface[i].surface_id) {
511                     found = 1;
512                     break;
513                 }
514             }
515
516             assert(found == 1);
517             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
518             
519             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
520             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
521         } else {
522             OUT_BCS_BATCH(batch, 0);
523             OUT_BCS_BATCH(batch, 0);
524         }
525     }
526
527     va_pic = &pic_param->CurrPic;
528     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
529     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
530
531     ADVANCE_BCS_BATCH(batch);
532 }
533
534 static void
535 gen6_mfd_avc_slice_state(VADriverContextP ctx,
536                          VAPictureParameterBufferH264 *pic_param,
537                          VASliceParameterBufferH264 *slice_param,
538                          VASliceParameterBufferH264 *next_slice_param,
539                          struct gen6_mfd_context *gen6_mfd_context)
540 {
541     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
542     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
543     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
544     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
545     int num_ref_idx_l0, num_ref_idx_l1;
546     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
547                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
548     int weighted_pred_idc = 0;
549     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
550     int slice_type;
551
552     if (slice_param->slice_type == SLICE_TYPE_I ||
553         slice_param->slice_type == SLICE_TYPE_SI) {
554         slice_type = SLICE_TYPE_I;
555     } else if (slice_param->slice_type == SLICE_TYPE_P ||
556                slice_param->slice_type == SLICE_TYPE_SP) {
557         slice_type = SLICE_TYPE_P;
558     } else { 
559         assert(slice_param->slice_type == SLICE_TYPE_B);
560         slice_type = SLICE_TYPE_B;
561     }
562
563     if (slice_type == SLICE_TYPE_I) {
564         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
565         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
566         num_ref_idx_l0 = 0;
567         num_ref_idx_l1 = 0;
568     } else if (slice_type == SLICE_TYPE_P) {
569         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
570         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
571         num_ref_idx_l1 = 0;
572         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_pred_flag == 1);
573     } else {
574         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
575         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
576         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_bipred_idc == 1);
577     }
578
579     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
580     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
581     slice_ver_pos = first_mb_in_slice / width_in_mbs;
582
583     if (next_slice_param) {
584         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
585         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
586         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
587     } else {
588         next_slice_hor_pos = 0;
589         next_slice_ver_pos = height_in_mbs;
590     }
591
592     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
593     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
594     OUT_BCS_BATCH(batch, slice_type);
595     OUT_BCS_BATCH(batch, 
596                   (num_ref_idx_l1 << 24) |
597                   (num_ref_idx_l0 << 16) |
598                   (slice_param->chroma_log2_weight_denom << 8) |
599                   (slice_param->luma_log2_weight_denom << 0));
600     OUT_BCS_BATCH(batch, 
601                   (weighted_pred_idc << 30) |
602                   (slice_param->direct_spatial_mv_pred_flag << 29) |
603                   (slice_param->disable_deblocking_filter_idc << 27) |
604                   (slice_param->cabac_init_idc << 24) |
605                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
606                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
607                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
608     OUT_BCS_BATCH(batch, 
609                   (slice_ver_pos << 24) |
610                   (slice_hor_pos << 16) | 
611                   (first_mb_in_slice << 0));
612     OUT_BCS_BATCH(batch,
613                   (next_slice_ver_pos << 16) |
614                   (next_slice_hor_pos << 0));
615     OUT_BCS_BATCH(batch, 
616                   (next_slice_param == NULL) << 19); /* last slice flag */
617     OUT_BCS_BATCH(batch, 0);
618     OUT_BCS_BATCH(batch, 0);
619     OUT_BCS_BATCH(batch, 0);
620     OUT_BCS_BATCH(batch, 0);
621     ADVANCE_BCS_BATCH(batch);
622 }
623
624 static void
625 gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx,
626                                  VAPictureParameterBufferH264 *pic_param,
627                                  struct gen6_mfd_context *gen6_mfd_context)
628 {
629     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
630     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
631     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
632
633     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
634     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
635     OUT_BCS_BATCH(batch, 0);
636     OUT_BCS_BATCH(batch, 0);
637     OUT_BCS_BATCH(batch, 0);
638     OUT_BCS_BATCH(batch,
639                   height_in_mbs << 24 |
640                   width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
641     OUT_BCS_BATCH(batch, 0);
642     OUT_BCS_BATCH(batch, 0);
643     OUT_BCS_BATCH(batch, 0);
644     OUT_BCS_BATCH(batch, 0);
645     OUT_BCS_BATCH(batch, 0);
646     OUT_BCS_BATCH(batch, 0);
647     ADVANCE_BCS_BATCH(batch);
648 }
649
650 static inline void
651 gen6_mfd_avc_ref_idx_state(VADriverContextP ctx,
652                            VAPictureParameterBufferH264 *pic_param,
653                            VASliceParameterBufferH264 *slice_param,
654                            struct gen6_mfd_context *gen6_mfd_context)
655 {
656     gen6_send_avc_ref_idx_state(
657         gen6_mfd_context->base.batch,
658         slice_param,
659         gen6_mfd_context->reference_surface
660     );
661 }
662
663 static void
664 gen6_mfd_avc_weightoffset_state(VADriverContextP ctx,
665                                 VAPictureParameterBufferH264 *pic_param,
666                                 VASliceParameterBufferH264 *slice_param,
667                                 struct gen6_mfd_context *gen6_mfd_context)
668 {
669     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
670     int i, j, num_weight_offset_table = 0;
671     short weightoffsets[32 * 6];
672
673     if ((slice_param->slice_type == SLICE_TYPE_P ||
674          slice_param->slice_type == SLICE_TYPE_SP) &&
675         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
676         num_weight_offset_table = 1;
677     }
678     
679     if ((slice_param->slice_type == SLICE_TYPE_B) &&
680         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
681         num_weight_offset_table = 2;
682     }
683
684     for (i = 0; i < num_weight_offset_table; i++) {
685         BEGIN_BCS_BATCH(batch, 98);
686         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
687         OUT_BCS_BATCH(batch, i);
688
689         if (i == 0) {
690             for (j = 0; j < 32; j++) {
691                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
692                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
693                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
694                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
695                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
696                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
697             }
698         } else {
699             for (j = 0; j < 32; j++) {
700                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
701                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
702                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
703                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
704                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
705                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
706             }
707         }
708
709         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
710         ADVANCE_BCS_BATCH(batch);
711     }
712 }
713
714 static void
715 gen6_mfd_avc_bsd_object(VADriverContextP ctx,
716                         VAPictureParameterBufferH264 *pic_param,
717                         VASliceParameterBufferH264 *slice_param,
718                         dri_bo *slice_data_bo,
719                         struct gen6_mfd_context *gen6_mfd_context)
720 {
721     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
722     unsigned int slice_data_bit_offset;
723
724     slice_data_bit_offset = avc_get_first_mb_bit_offset(
725         slice_data_bo,
726         slice_param,
727         pic_param->pic_fields.bits.entropy_coding_mode_flag
728     );
729
730     BEGIN_BCS_BATCH(batch, 6);
731     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
732     OUT_BCS_BATCH(batch, 
733                   (slice_param->slice_data_size - slice_param->slice_data_offset));
734     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
735     OUT_BCS_BATCH(batch,
736                   (0 << 31) |
737                   (0 << 14) |
738                   (0 << 12) |
739                   (0 << 10) |
740                   (0 << 8));
741     OUT_BCS_BATCH(batch,
742                   ((slice_data_bit_offset >> 3) << 16) |
743                   (1 << 7)  |
744                   (1 << 6)  |
745                   ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
746     OUT_BCS_BATCH(batch, 0);
747     ADVANCE_BCS_BATCH(batch);
748 }
749
750 static void
751 gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx,
752                                       VAPictureParameterBufferH264 *pic_param,
753                                       struct gen6_mfd_context *gen6_mfd_context)
754 {
755     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
756
757     BEGIN_BCS_BATCH(batch, 6);
758     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
759     OUT_BCS_BATCH(batch, 0);
760     OUT_BCS_BATCH(batch, 0);
761     OUT_BCS_BATCH(batch, 0);
762     OUT_BCS_BATCH(batch, 0);
763     OUT_BCS_BATCH(batch, 0);
764     ADVANCE_BCS_BATCH(batch);
765 }
766
767 static void
768 gen6_mfd_avc_phantom_slice(VADriverContextP ctx,
769                            VAPictureParameterBufferH264 *pic_param,
770                            struct gen6_mfd_context *gen6_mfd_context)
771 {
772     gen6_mfd_avc_phantom_slice_state(ctx, pic_param, gen6_mfd_context);
773     gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param, gen6_mfd_context);
774 }
775
776 static void
777 gen6_mfd_avc_decode_init(VADriverContextP ctx,
778                          struct decode_state *decode_state,
779                          struct gen6_mfd_context *gen6_mfd_context)
780 {
781     VAPictureParameterBufferH264 *pic_param;
782     VASliceParameterBufferH264 *slice_param;
783     struct i965_driver_data *i965 = i965_driver_data(ctx);
784     struct object_surface *obj_surface;
785     dri_bo *bo;
786     int i, j, enable_avc_ildb = 0;
787     int width_in_mbs;
788
789     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
790         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
791         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
792
793         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
794             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
795             assert((slice_param->slice_type == SLICE_TYPE_I) ||
796                    (slice_param->slice_type == SLICE_TYPE_SI) ||
797                    (slice_param->slice_type == SLICE_TYPE_P) ||
798                    (slice_param->slice_type == SLICE_TYPE_SP) ||
799                    (slice_param->slice_type == SLICE_TYPE_B));
800
801             if (slice_param->disable_deblocking_filter_idc != 1) {
802                 enable_avc_ildb = 1;
803                 break;
804             }
805
806             slice_param++;
807         }
808     }
809
810     assert(decode_state->pic_param && decode_state->pic_param->buffer);
811     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
812     intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen6_mfd_context->reference_surface);
813     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
814
815     /* Current decoded picture */
816     obj_surface = decode_state->render_object;
817     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
818     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
819     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
820
821     /* initial uv component for YUV400 case */
822     if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
823          unsigned int uv_offset = obj_surface->width * obj_surface->height; 
824          unsigned int uv_size   = obj_surface->width * obj_surface->height / 2; 
825
826          drm_intel_gem_bo_map_gtt(obj_surface->bo);
827          memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
828          drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
829     }
830
831     gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
832
833     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
834     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
835     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
836     gen6_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
837
838     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
839     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
840     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
841     gen6_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
842
843     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
844     bo = dri_bo_alloc(i965->intel.bufmgr,
845                       "intra row store",
846                       width_in_mbs * 64,
847                       0x1000);
848     assert(bo);
849     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
850     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
851
852     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
853     bo = dri_bo_alloc(i965->intel.bufmgr,
854                       "deblocking filter row store",
855                       width_in_mbs * 64 * 4,
856                       0x1000);
857     assert(bo);
858     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
859     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
860
861     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
862     bo = dri_bo_alloc(i965->intel.bufmgr,
863                       "bsd mpc row store",
864                       width_in_mbs * 96,
865                       0x1000);
866     assert(bo);
867     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
868     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
869
870     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
871     bo = dri_bo_alloc(i965->intel.bufmgr,
872                       "mpr row store",
873                       width_in_mbs * 64,
874                       0x1000);
875     assert(bo);
876     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
877     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
878
879     gen6_mfd_context->bitplane_read_buffer.valid = 0;
880 }
881
882 static void
883 gen6_mfd_avc_decode_picture(VADriverContextP ctx,
884                             struct decode_state *decode_state,
885                             struct gen6_mfd_context *gen6_mfd_context)
886 {
887     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
888     VAPictureParameterBufferH264 *pic_param;
889     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
890     dri_bo *slice_data_bo;
891     int i, j;
892
893     assert(decode_state->pic_param && decode_state->pic_param->buffer);
894     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
895     gen6_mfd_avc_decode_init(ctx, decode_state, gen6_mfd_context);
896
897     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
898     intel_batchbuffer_emit_mi_flush(batch);
899     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
900     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
901     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
902     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
903     gen6_mfd_avc_img_state(ctx, decode_state, gen6_mfd_context);
904     gen6_mfd_avc_qm_state(ctx, decode_state, gen6_mfd_context);
905
906     for (j = 0; j < decode_state->num_slice_params; j++) {
907         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
908         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
909         slice_data_bo = decode_state->slice_datas[j]->bo;
910         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen6_mfd_context);
911
912         if (j == decode_state->num_slice_params - 1)
913             next_slice_group_param = NULL;
914         else
915             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
916
917         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
918             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
919             assert((slice_param->slice_type == SLICE_TYPE_I) ||
920                    (slice_param->slice_type == SLICE_TYPE_SI) ||
921                    (slice_param->slice_type == SLICE_TYPE_P) ||
922                    (slice_param->slice_type == SLICE_TYPE_SP) ||
923                    (slice_param->slice_type == SLICE_TYPE_B));
924
925             if (i < decode_state->slice_params[j]->num_elements - 1)
926                 next_slice_param = slice_param + 1;
927             else
928                 next_slice_param = next_slice_group_param;
929
930             gen6_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen6_mfd_context);
931             gen6_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
932             gen6_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen6_mfd_context);
933             gen6_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen6_mfd_context);
934             gen6_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen6_mfd_context);
935             slice_param++;
936         }
937     }
938     
939     gen6_mfd_avc_phantom_slice(ctx, pic_param, gen6_mfd_context);
940     intel_batchbuffer_end_atomic(batch);
941     intel_batchbuffer_flush(batch);
942 }
943
944 static void
945 gen6_mfd_mpeg2_decode_init(VADriverContextP ctx,
946                            struct decode_state *decode_state,
947                            struct gen6_mfd_context *gen6_mfd_context)
948 {
949     VAPictureParameterBufferMPEG2 *pic_param;
950     struct i965_driver_data *i965 = i965_driver_data(ctx);
951     struct object_surface *obj_surface;
952     dri_bo *bo;
953     unsigned int width_in_mbs;
954
955     assert(decode_state->pic_param && decode_state->pic_param->buffer);
956     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
957     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
958
959     mpeg2_set_reference_surfaces(
960         ctx,
961         gen6_mfd_context->reference_surface,
962         decode_state,
963         pic_param
964     );
965
966     /* Current decoded picture */
967     obj_surface = decode_state->render_object;
968     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
969
970     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
971     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
972     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
973     gen6_mfd_context->pre_deblocking_output.valid = 1;
974
975     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
976     bo = dri_bo_alloc(i965->intel.bufmgr,
977                       "bsd mpc row store",
978                       width_in_mbs * 96,
979                       0x1000);
980     assert(bo);
981     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
982     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
983
984     gen6_mfd_context->post_deblocking_output.valid = 0;
985     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 0;
986     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
987     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
988     gen6_mfd_context->bitplane_read_buffer.valid = 0;
989 }
990
991 static void
992 gen6_mfd_mpeg2_pic_state(VADriverContextP ctx,
993                          struct decode_state *decode_state,
994                          struct gen6_mfd_context *gen6_mfd_context)
995 {
996     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
997     VAPictureParameterBufferMPEG2 *pic_param;
998     unsigned int tff, pic_structure;
999
1000     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1001     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1002
1003     pic_structure = pic_param->picture_coding_extension.bits.picture_structure;
1004     if (pic_structure == MPEG_FRAME)
1005         tff = pic_param->picture_coding_extension.bits.top_field_first;
1006     else
1007         tff = !(pic_param->picture_coding_extension.bits.is_first_field ^
1008                 (pic_structure & MPEG_TOP_FIELD));
1009
1010     BEGIN_BCS_BATCH(batch, 4);
1011     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (4 - 2));
1012     OUT_BCS_BATCH(batch,
1013                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1014                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1015                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1016                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1017                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1018                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1019                   tff << 11 |
1020                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1021                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1022                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1023                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1024                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1025     OUT_BCS_BATCH(batch,
1026                   pic_param->picture_coding_type << 9);
1027     OUT_BCS_BATCH(batch,
1028                   (ALIGN(pic_param->vertical_size, 16) / 16) << 16 |
1029                   (ALIGN(pic_param->horizontal_size, 16) / 16));
1030     ADVANCE_BCS_BATCH(batch);
1031 }
1032
1033 static void
1034 gen6_mfd_mpeg2_qm_state(VADriverContextP ctx,
1035                         struct decode_state *decode_state,
1036                         struct gen6_mfd_context *gen6_mfd_context)
1037 {
1038     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1039     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen6_mfd_context->iq_matrix.mpeg2;
1040     int i, j;
1041
1042     /* Update internal QM state */
1043     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1044         VAIQMatrixBufferMPEG2 * const iq_matrix =
1045             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1046
1047         gen_iq_matrix->load_intra_quantiser_matrix =
1048             iq_matrix->load_intra_quantiser_matrix;
1049         if (iq_matrix->load_intra_quantiser_matrix) {
1050             for (j = 0; j < 64; j++)
1051                 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1052                     iq_matrix->intra_quantiser_matrix[j];
1053         }
1054
1055         gen_iq_matrix->load_non_intra_quantiser_matrix =
1056             iq_matrix->load_non_intra_quantiser_matrix;
1057         if (iq_matrix->load_non_intra_quantiser_matrix) {
1058             for (j = 0; j < 64; j++)
1059                 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1060                     iq_matrix->non_intra_quantiser_matrix[j];
1061         }
1062     }
1063
1064     /* Commit QM state to HW */
1065     for (i = 0; i < 2; i++) {
1066         unsigned char *qm = NULL;
1067
1068         if (i == 0) {
1069             if (gen_iq_matrix->load_intra_quantiser_matrix)
1070                 qm = gen_iq_matrix->intra_quantiser_matrix;
1071         } else {
1072             if (gen_iq_matrix->load_non_intra_quantiser_matrix)
1073                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1074         }
1075
1076         if (!qm)
1077             continue;
1078
1079         BEGIN_BCS_BATCH(batch, 18);
1080         OUT_BCS_BATCH(batch, MFX_MPEG2_QM_STATE | (18 - 2));
1081         OUT_BCS_BATCH(batch, i);
1082         intel_batchbuffer_data(batch, qm, 64);
1083         ADVANCE_BCS_BATCH(batch);
1084     }
1085 }
1086
1087 static void
1088 gen6_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1089                           VAPictureParameterBufferMPEG2 *pic_param,
1090                           VASliceParameterBufferMPEG2 *slice_param,
1091                           VASliceParameterBufferMPEG2 *next_slice_param,
1092                           struct gen6_mfd_context *gen6_mfd_context)
1093 {
1094     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1095     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1096     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1097
1098     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1099         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1100         is_field_pic = 1;
1101     is_field_pic_wa = is_field_pic &&
1102         gen6_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1103
1104     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1105     hpos0 = slice_param->slice_horizontal_position;
1106
1107     if (next_slice_param == NULL) {
1108         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1109         hpos1 = 0;
1110     } else {
1111         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1112         hpos1 = next_slice_param->slice_horizontal_position;
1113     }
1114
1115     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1116
1117     BEGIN_BCS_BATCH(batch, 5);
1118     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1119     OUT_BCS_BATCH(batch, 
1120                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1121     OUT_BCS_BATCH(batch, 
1122                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1123     OUT_BCS_BATCH(batch,
1124                   hpos0 << 24 |
1125                   vpos0 << 16 |
1126                   mb_count << 8 |
1127                   (next_slice_param == NULL) << 5 |
1128                   (next_slice_param == NULL) << 3 |
1129                   (slice_param->macroblock_offset & 0x7));
1130     OUT_BCS_BATCH(batch,
1131                   slice_param->quantiser_scale_code << 24);
1132     ADVANCE_BCS_BATCH(batch);
1133 }
1134
1135 static void
1136 gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1137                               struct decode_state *decode_state,
1138                               struct gen6_mfd_context *gen6_mfd_context)
1139 {
1140     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1141     VAPictureParameterBufferMPEG2 *pic_param;
1142     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1143     dri_bo *slice_data_bo;
1144     int i, j;
1145
1146     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1147     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1148
1149     gen6_mfd_mpeg2_decode_init(ctx, decode_state, gen6_mfd_context);
1150     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1151     intel_batchbuffer_emit_mi_flush(batch);
1152     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1153     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1154     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1155     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1156     gen6_mfd_mpeg2_pic_state(ctx, decode_state, gen6_mfd_context);
1157     gen6_mfd_mpeg2_qm_state(ctx, decode_state, gen6_mfd_context);
1158
1159     if (gen6_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1160         gen6_mfd_context->wa_mpeg2_slice_vertical_position =
1161             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1162
1163     for (j = 0; j < decode_state->num_slice_params; j++) {
1164         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1165         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1166         slice_data_bo = decode_state->slice_datas[j]->bo;
1167         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen6_mfd_context);
1168
1169         if (j == decode_state->num_slice_params - 1)
1170             next_slice_group_param = NULL;
1171         else
1172             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1173
1174         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1175             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1176
1177             if (i < decode_state->slice_params[j]->num_elements - 1)
1178                 next_slice_param = slice_param + 1;
1179             else
1180                 next_slice_param = next_slice_group_param;
1181
1182             gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
1183             slice_param++;
1184         }
1185     }
1186
1187     intel_batchbuffer_end_atomic(batch);
1188     intel_batchbuffer_flush(batch);
1189 }
1190
1191 static const int va_to_gen6_vc1_pic_type[5] = {
1192     GEN6_VC1_I_PICTURE,
1193     GEN6_VC1_P_PICTURE,
1194     GEN6_VC1_B_PICTURE,
1195     GEN6_VC1_BI_PICTURE,
1196     GEN6_VC1_P_PICTURE,
1197 };
1198
1199 static const int va_to_gen6_vc1_mv[4] = {
1200     1, /* 1-MV */
1201     2, /* 1-MV half-pel */
1202     3, /* 1-MV half-pef bilinear */
1203     0, /* Mixed MV */
1204 };
1205
1206 static const int b_picture_scale_factor[21] = {
1207     128, 85,  170, 64,  192,
1208     51,  102, 153, 204, 43,
1209     215, 37,  74,  111, 148,
1210     185, 222, 32,  96,  160, 
1211     224,
1212 };
1213
1214 static const int va_to_gen6_vc1_condover[3] = {
1215     0,
1216     2,
1217     3
1218 };
1219
1220 static const int va_to_gen6_vc1_profile[4] = {
1221     GEN6_VC1_SIMPLE_PROFILE,
1222     GEN6_VC1_MAIN_PROFILE,
1223     GEN6_VC1_RESERVED_PROFILE,
1224     GEN6_VC1_ADVANCED_PROFILE
1225 };
1226
1227 static void 
1228 gen6_mfd_free_vc1_surface(void **data)
1229 {
1230     struct gen6_vc1_surface *gen6_vc1_surface = *data;
1231
1232     if (!gen6_vc1_surface)
1233         return;
1234
1235     dri_bo_unreference(gen6_vc1_surface->dmv);
1236     free(gen6_vc1_surface);
1237     *data = NULL;
1238 }
1239
1240 static void
1241 gen6_mfd_init_vc1_surface(VADriverContextP ctx, 
1242                           VAPictureParameterBufferVC1 *pic_param,
1243                           struct object_surface *obj_surface)
1244 {
1245     struct i965_driver_data *i965 = i965_driver_data(ctx);
1246     struct gen6_vc1_surface *gen6_vc1_surface = obj_surface->private_data;
1247     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1248
1249     obj_surface->free_private_data = gen6_mfd_free_vc1_surface;
1250
1251     if (!gen6_vc1_surface) {
1252         gen6_vc1_surface = calloc(sizeof(struct gen6_vc1_surface), 1);
1253         assert((obj_surface->size & 0x3f) == 0);
1254         obj_surface->private_data = gen6_vc1_surface;
1255     }
1256
1257     gen6_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1258
1259     if (gen6_vc1_surface->dmv == NULL) {
1260         gen6_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1261                                              "direct mv w/r buffer",
1262                                              128 * height_in_mbs * 64,  /* scalable with frame height */
1263                                              0x1000);
1264     }
1265 }
1266
1267 static void
1268 gen6_mfd_vc1_decode_init(VADriverContextP ctx,
1269                          struct decode_state *decode_state,
1270                          struct gen6_mfd_context *gen6_mfd_context)
1271 {
1272     VAPictureParameterBufferVC1 *pic_param;
1273     struct i965_driver_data *i965 = i965_driver_data(ctx);
1274     struct object_surface *obj_surface;
1275     dri_bo *bo;
1276     int width_in_mbs;
1277     int picture_type;
1278
1279     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1280     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1281     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1282     picture_type = pic_param->picture_fields.bits.picture_type;
1283
1284     intel_update_vc1_frame_store_index(ctx,
1285                                        decode_state,
1286                                        pic_param,
1287                                        gen6_mfd_context->reference_surface);
1288
1289     /* Current decoded picture */
1290     obj_surface = decode_state->render_object;
1291     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1292     gen6_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1293
1294     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
1295     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1296     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
1297     gen6_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1298
1299     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1300     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1301     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1302     gen6_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1303
1304     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
1305     bo = dri_bo_alloc(i965->intel.bufmgr,
1306                       "intra row store",
1307                       width_in_mbs * 64,
1308                       0x1000);
1309     assert(bo);
1310     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1311     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1312
1313     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1314     bo = dri_bo_alloc(i965->intel.bufmgr,
1315                       "deblocking filter row store",
1316                       width_in_mbs * 7 * 64,
1317                       0x1000);
1318     assert(bo);
1319     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1320     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1321
1322     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1323     bo = dri_bo_alloc(i965->intel.bufmgr,
1324                       "bsd mpc row store",
1325                       width_in_mbs * 96,
1326                       0x1000);
1327     assert(bo);
1328     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1329     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1330
1331     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1332
1333     gen6_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1334     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
1335     
1336     if (gen6_mfd_context->bitplane_read_buffer.valid) {
1337         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1338         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1339         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1340         int src_w, src_h;
1341         uint8_t *src = NULL, *dst = NULL;
1342
1343         assert(decode_state->bit_plane->buffer);
1344         src = decode_state->bit_plane->buffer;
1345
1346         bo = dri_bo_alloc(i965->intel.bufmgr,
1347                           "VC-1 Bitplane",
1348                           bitplane_width * height_in_mbs,
1349                           0x1000);
1350         assert(bo);
1351         gen6_mfd_context->bitplane_read_buffer.bo = bo;
1352
1353         dri_bo_map(bo, True);
1354         assert(bo->virtual);
1355         dst = bo->virtual;
1356
1357         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1358             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1359                 int src_index, dst_index;
1360                 int src_shift;
1361                 uint8_t src_value;
1362
1363                 src_index = (src_h * width_in_mbs + src_w) / 2;
1364                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1365                 src_value = ((src[src_index] >> src_shift) & 0xf);
1366
1367                 if (picture_type == GEN6_VC1_SKIPPED_PICTURE){
1368                     src_value |= 0x2;
1369                 }
1370
1371                 dst_index = src_w / 2;
1372                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1373             }
1374
1375             if (src_w & 1)
1376                 dst[src_w / 2] >>= 4;
1377
1378             dst += bitplane_width;
1379         }
1380
1381         dri_bo_unmap(bo);
1382     } else
1383         gen6_mfd_context->bitplane_read_buffer.bo = NULL;
1384 }
1385
1386 static void
1387 gen6_mfd_vc1_pic_state(VADriverContextP ctx,
1388                        struct decode_state *decode_state,
1389                        struct gen6_mfd_context *gen6_mfd_context)
1390 {
1391     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1392     VAPictureParameterBufferVC1 *pic_param;
1393     struct object_surface *obj_surface;
1394     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1395     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1396     int unified_mv_mode;
1397     int ref_field_pic_polarity = 0;
1398     int scale_factor = 0;
1399     int trans_ac_y = 0;
1400     int dmv_surface_valid = 0;
1401     int brfd = 0;
1402     int fcm = 0;
1403     int picture_type;
1404     int profile;
1405     int overlap;
1406
1407     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1408     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1409
1410     profile = va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile];
1411     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1412     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1413     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1414     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1415     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1416     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1417     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1418
1419     if (dquant == 0) {
1420         alt_pquant_config = 0;
1421         alt_pquant_edge_mask = 0;
1422     } else if (dquant == 2) {
1423         alt_pquant_config = 1;
1424         alt_pquant_edge_mask = 0xf;
1425     } else {
1426         assert(dquant == 1);
1427         if (dquantfrm == 0) {
1428             alt_pquant_config = 0;
1429             alt_pquant_edge_mask = 0;
1430             alt_pq = 0;
1431         } else {
1432             assert(dquantfrm == 1);
1433             alt_pquant_config = 1;
1434
1435             switch (dqprofile) {
1436             case 3:
1437                 if (dqbilevel == 0) {
1438                     alt_pquant_config = 2;
1439                     alt_pquant_edge_mask = 0;
1440                 } else {
1441                     assert(dqbilevel == 1);
1442                     alt_pquant_config = 3;
1443                     alt_pquant_edge_mask = 0;
1444                 }
1445                 break;
1446                 
1447             case 0:
1448                 alt_pquant_edge_mask = 0xf;
1449                 break;
1450
1451             case 1:
1452                 if (dqdbedge == 3)
1453                     alt_pquant_edge_mask = 0x9;
1454                 else
1455                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1456
1457                 break;
1458
1459             case 2:
1460                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1461                 break;
1462
1463             default:
1464                 assert(0);
1465             }
1466         }
1467     }
1468
1469     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1470         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1471         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1472     } else {
1473         assert(pic_param->mv_fields.bits.mv_mode < 4);
1474         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1475     }
1476
1477     if (pic_param->sequence_fields.bits.interlace == 1 &&
1478         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1479         /* FIXME: calculate reference field picture polarity */
1480         assert(0);
1481         ref_field_pic_polarity = 0;
1482     }
1483
1484     if (pic_param->b_picture_fraction < 21)
1485         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1486
1487     picture_type = va_to_gen6_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1488     
1489     if (profile == GEN6_VC1_ADVANCED_PROFILE && 
1490         picture_type == GEN6_VC1_I_PICTURE)
1491         picture_type = GEN6_VC1_BI_PICTURE;
1492
1493     if (picture_type == GEN6_VC1_I_PICTURE || picture_type == GEN6_VC1_BI_PICTURE) /* I picture */
1494         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1495     else {
1496         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1497         /*
1498          * 8.3.6.2.1 Transform Type Selection
1499          * If variable-sized transform coding is not enabled,
1500          * then the 8x8 transform shall be used for all blocks.
1501          * it is also MFX_VC1_PIC_STATE requirement.
1502          */
1503         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1504             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1505             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1506         }
1507     }
1508
1509     if (picture_type == GEN6_VC1_B_PICTURE) {
1510         struct gen6_vc1_surface *gen6_vc1_surface = NULL;
1511
1512         obj_surface = decode_state->reference_objects[1];
1513
1514         if (obj_surface)
1515             gen6_vc1_surface = obj_surface->private_data;
1516
1517         if (!gen6_vc1_surface || 
1518             (va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_I_PICTURE ||
1519              va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_BI_PICTURE))
1520             dmv_surface_valid = 0;
1521         else
1522             dmv_surface_valid = 1;
1523     }
1524
1525     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1526
1527     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1528         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1529     else {
1530         if (pic_param->picture_fields.bits.top_field_first)
1531             fcm = 2;
1532         else
1533             fcm = 3;
1534     }
1535
1536     if (pic_param->picture_fields.bits.picture_type == GEN6_VC1_B_PICTURE) { /* B picture */
1537         brfd = pic_param->reference_fields.bits.reference_distance;
1538         brfd = (scale_factor * brfd) >> 8;
1539         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1540
1541         if (brfd < 0)
1542             brfd = 0;
1543     }
1544
1545     overlap = 0;
1546     if (profile != GEN6_VC1_ADVANCED_PROFILE){
1547         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1548             pic_param->picture_fields.bits.picture_type != GEN6_VC1_B_PICTURE) {
1549             overlap = 1; 
1550         }
1551     }else {
1552         if (pic_param->picture_fields.bits.picture_type == GEN6_VC1_P_PICTURE &&
1553              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1554               overlap = 1; 
1555         }
1556         if (pic_param->picture_fields.bits.picture_type == GEN6_VC1_I_PICTURE ||
1557             pic_param->picture_fields.bits.picture_type == GEN6_VC1_BI_PICTURE){
1558              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1559                 overlap = 1; 
1560              } else if (va_to_gen6_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1561                         va_to_gen6_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1562                  overlap = 1;
1563              }
1564         }
1565     } 
1566
1567     assert(pic_param->conditional_overlap_flag < 3);
1568     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1569
1570     BEGIN_BCS_BATCH(batch, 6);
1571     OUT_BCS_BATCH(batch, MFX_VC1_PIC_STATE | (6 - 2));
1572     OUT_BCS_BATCH(batch,
1573                   (ALIGN(pic_param->coded_height, 16) / 16) << 16 |
1574                   (ALIGN(pic_param->coded_width, 16) / 16));
1575     OUT_BCS_BATCH(batch,
1576                   pic_param->sequence_fields.bits.syncmarker << 31 |
1577                   1 << 29 | /* concealment */
1578                   alt_pq << 24 |
1579                   pic_param->entrypoint_fields.bits.loopfilter << 23 |
1580                   overlap << 22 |
1581                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 21 | /* implicit quantizer */
1582                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 16 |
1583                   alt_pquant_edge_mask << 12 |
1584                   alt_pquant_config << 10 |
1585                   pic_param->pic_quantizer_fields.bits.half_qp << 9 |
1586                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 8 |
1587                   va_to_gen6_vc1_condover[pic_param->conditional_overlap_flag] << 6 |
1588                   !pic_param->picture_fields.bits.is_first_field << 5 |
1589                   picture_type << 2 |
1590                   fcm << 0);
1591     OUT_BCS_BATCH(batch,
1592                   !!pic_param->bitplane_present.value << 23 |
1593                   !pic_param->bitplane_present.flags.bp_forward_mb << 22 |
1594                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 21 |
1595                   !pic_param->bitplane_present.flags.bp_skip_mb << 20 |
1596                   !pic_param->bitplane_present.flags.bp_direct_mb << 19 |
1597                   !pic_param->bitplane_present.flags.bp_overflags << 18 |
1598                   !pic_param->bitplane_present.flags.bp_ac_pred << 17 |
1599                   !pic_param->bitplane_present.flags.bp_field_tx << 16 |
1600                   pic_param->mv_fields.bits.extended_dmv_range << 14 |
1601                   pic_param->mv_fields.bits.extended_mv_range << 12 |
1602                   pic_param->mv_fields.bits.four_mv_switch << 11 |
1603                   pic_param->fast_uvmc_flag << 10 |
1604                   unified_mv_mode << 8 |
1605                   ref_field_pic_polarity << 6 |
1606                   pic_param->reference_fields.bits.num_reference_pictures << 5 |
1607                   pic_param->reference_fields.bits.reference_distance << 0);
1608     OUT_BCS_BATCH(batch,
1609                   scale_factor << 24 |
1610                   pic_param->mv_fields.bits.mv_table << 20 |
1611                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1612                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1613                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1614                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1615                   pic_param->mb_mode_table << 8 |
1616                   trans_ac_y << 6 |
1617                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1618                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1619                   pic_param->cbp_table << 0);
1620     OUT_BCS_BATCH(batch,
1621                   dmv_surface_valid << 13 |
1622                   brfd << 8 |
1623                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1));
1624     ADVANCE_BCS_BATCH(batch);
1625 }
1626
1627 static void
1628 gen6_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1629                              struct decode_state *decode_state,
1630                              struct gen6_mfd_context *gen6_mfd_context)
1631 {
1632     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1633     VAPictureParameterBufferVC1 *pic_param;
1634     int interpolation_mode = 0;
1635     int intensitycomp_single;
1636
1637     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1638     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1639
1640     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1641         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1642          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1643         interpolation_mode = 2; /* Half-pel bilinear */
1644     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1645              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1646               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1647         interpolation_mode = 0; /* Half-pel bicubic */
1648     else
1649         interpolation_mode = 1; /* Quarter-pel bicubic */
1650
1651     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1652     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1653     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1654
1655     BEGIN_BCS_BATCH(batch, 7);
1656     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (7 - 2));
1657     OUT_BCS_BATCH(batch,
1658                   0 << 8 | /* FIXME: interlace mode */
1659                   pic_param->rounding_control << 4 |
1660                   va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile] << 2);
1661     OUT_BCS_BATCH(batch,
1662                   pic_param->luma_shift << 16 |
1663                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1664     OUT_BCS_BATCH(batch, 0);
1665     OUT_BCS_BATCH(batch, 0);
1666     OUT_BCS_BATCH(batch, 0);
1667     OUT_BCS_BATCH(batch,
1668                   interpolation_mode << 19 |
1669                   pic_param->fast_uvmc_flag << 18 |
1670                   0 << 17 | /* FIXME: scale up or down ??? */
1671                   pic_param->range_reduction_frame << 16 |
1672                   0 << 6 | /* FIXME: double ??? */
1673                   0 << 4 |
1674                   intensitycomp_single << 2 |
1675                   intensitycomp_single << 0);
1676     ADVANCE_BCS_BATCH(batch);
1677 }
1678
1679
1680 static void
1681 gen6_mfd_vc1_directmode_state(VADriverContextP ctx,
1682                               struct decode_state *decode_state,
1683                               struct gen6_mfd_context *gen6_mfd_context)
1684 {
1685     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1686     struct object_surface *obj_surface;
1687     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1688
1689     obj_surface = decode_state->render_object;
1690
1691     if (obj_surface && obj_surface->private_data) {
1692         dmv_write_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1693     }
1694
1695     obj_surface = decode_state->reference_objects[1];
1696
1697     if (obj_surface && obj_surface->private_data) {
1698         dmv_read_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1699     }
1700
1701     BEGIN_BCS_BATCH(batch, 3);
1702     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
1703
1704     if (dmv_write_buffer)
1705         OUT_BCS_RELOC(batch, dmv_write_buffer,
1706                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1707                       0);
1708     else
1709         OUT_BCS_BATCH(batch, 0);
1710
1711     if (dmv_read_buffer)
1712         OUT_BCS_RELOC(batch, dmv_read_buffer,
1713                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1714                       0);
1715     else
1716         OUT_BCS_BATCH(batch, 0);
1717                   
1718     ADVANCE_BCS_BATCH(batch);
1719 }
1720
1721 static int
1722 gen6_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1723 {
1724     int out_slice_data_bit_offset;
1725     int slice_header_size = in_slice_data_bit_offset / 8;
1726     int i, j;
1727
1728     if (profile != 3)
1729         out_slice_data_bit_offset = in_slice_data_bit_offset;
1730     else {
1731         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1732             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1733                 i++, j += 2;
1734             }
1735         }
1736
1737         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1738     }
1739
1740     return out_slice_data_bit_offset;
1741 }
1742
1743 static void
1744 gen6_mfd_vc1_bsd_object(VADriverContextP ctx,
1745                         VAPictureParameterBufferVC1 *pic_param,
1746                         VASliceParameterBufferVC1 *slice_param,
1747                         VASliceParameterBufferVC1 *next_slice_param,
1748                         dri_bo *slice_data_bo,
1749                         struct gen6_mfd_context *gen6_mfd_context)
1750 {
1751     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1752     int next_slice_start_vert_pos;
1753     int macroblock_offset;
1754     uint8_t *slice_data = NULL;
1755
1756     dri_bo_map(slice_data_bo, 0);
1757     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1758     macroblock_offset = gen6_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1759                                                                slice_param->macroblock_offset,
1760                                                                pic_param->sequence_fields.bits.profile);
1761     dri_bo_unmap(slice_data_bo);
1762
1763     if (next_slice_param)
1764         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1765     else
1766         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1767
1768     BEGIN_BCS_BATCH(batch, 4);
1769     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (4 - 2));
1770     OUT_BCS_BATCH(batch, 
1771                   slice_param->slice_data_size - (macroblock_offset >> 3));
1772     OUT_BCS_BATCH(batch, 
1773                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1774     OUT_BCS_BATCH(batch,
1775                   slice_param->slice_vertical_position << 24 |
1776                   next_slice_start_vert_pos << 16 |
1777                   (macroblock_offset & 0x7));
1778     ADVANCE_BCS_BATCH(batch);
1779 }
1780
1781 static void
1782 gen6_mfd_vc1_decode_picture(VADriverContextP ctx,
1783                             struct decode_state *decode_state,
1784                             struct gen6_mfd_context *gen6_mfd_context)
1785 {
1786     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1787     VAPictureParameterBufferVC1 *pic_param;
1788     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1789     dri_bo *slice_data_bo;
1790     int i, j;
1791
1792     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1793     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1794
1795     gen6_mfd_vc1_decode_init(ctx, decode_state, gen6_mfd_context);
1796     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1797     intel_batchbuffer_emit_mi_flush(batch);
1798     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1799     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1800     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1801     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1802     gen6_mfd_vc1_pic_state(ctx, decode_state, gen6_mfd_context);
1803     gen6_mfd_vc1_pred_pipe_state(ctx, decode_state, gen6_mfd_context);
1804     gen6_mfd_vc1_directmode_state(ctx, decode_state, gen6_mfd_context);
1805
1806     for (j = 0; j < decode_state->num_slice_params; j++) {
1807         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1808         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1809         slice_data_bo = decode_state->slice_datas[j]->bo;
1810         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen6_mfd_context);
1811
1812         if (j == decode_state->num_slice_params - 1)
1813             next_slice_group_param = NULL;
1814         else
1815             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1816
1817         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1818             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1819
1820             if (i < decode_state->slice_params[j]->num_elements - 1)
1821                 next_slice_param = slice_param + 1;
1822             else
1823                 next_slice_param = next_slice_group_param;
1824
1825             gen6_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen6_mfd_context);
1826             slice_param++;
1827         }
1828     }
1829
1830     intel_batchbuffer_end_atomic(batch);
1831     intel_batchbuffer_flush(batch);
1832 }
1833
1834 static VAStatus
1835 gen6_mfd_decode_picture(VADriverContextP ctx, 
1836                         VAProfile profile, 
1837                         union codec_state *codec_state,
1838                         struct hw_context *hw_context)
1839
1840 {
1841     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
1842     struct decode_state *decode_state = &codec_state->decode;
1843     VAStatus vaStatus;
1844
1845     assert(gen6_mfd_context);
1846
1847     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
1848
1849     if (vaStatus != VA_STATUS_SUCCESS)
1850         goto out;
1851
1852     switch (profile) {
1853     case VAProfileMPEG2Simple:
1854     case VAProfileMPEG2Main:
1855         gen6_mfd_mpeg2_decode_picture(ctx, decode_state, gen6_mfd_context);
1856         break;
1857         
1858     case VAProfileH264Baseline:
1859     case VAProfileH264Main:
1860     case VAProfileH264High:
1861         gen6_mfd_avc_decode_picture(ctx, decode_state, gen6_mfd_context);
1862         break;
1863
1864     case VAProfileVC1Simple:
1865     case VAProfileVC1Main:
1866     case VAProfileVC1Advanced:
1867         gen6_mfd_vc1_decode_picture(ctx, decode_state, gen6_mfd_context);
1868         break;
1869
1870     default:
1871         assert(0);
1872         break;
1873     }
1874
1875     vaStatus = VA_STATUS_SUCCESS;
1876
1877 out:
1878     return vaStatus;
1879 }
1880
1881 static void
1882 gen6_mfd_context_destroy(void *hw_context)
1883 {
1884     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
1885
1886     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
1887     gen6_mfd_context->post_deblocking_output.bo = NULL;
1888
1889     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1890     gen6_mfd_context->pre_deblocking_output.bo = NULL;
1891
1892     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
1893     gen6_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1894
1895     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1896     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1897
1898     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1899     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1900
1901     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
1902     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1903
1904     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
1905     gen6_mfd_context->bitplane_read_buffer.bo = NULL;
1906
1907     intel_batchbuffer_free(gen6_mfd_context->base.batch);
1908     free(gen6_mfd_context);
1909 }
1910
1911 struct hw_context *
1912 gen6_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
1913 {
1914     struct intel_driver_data *intel = intel_driver_data(ctx);
1915     struct gen6_mfd_context *gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context));
1916     int i;
1917
1918     gen6_mfd_context->base.destroy = gen6_mfd_context_destroy;
1919     gen6_mfd_context->base.run = gen6_mfd_decode_picture;
1920     gen6_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
1921
1922     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
1923         gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
1924         gen6_mfd_context->reference_surface[i].frame_store_id = -1;
1925         gen6_mfd_context->reference_surface[i].obj_surface = NULL;
1926     }
1927
1928     gen6_mfd_context->wa_mpeg2_slice_vertical_position = -1;
1929     
1930     return (struct hw_context *)gen6_mfd_context;
1931 }