VPP: Fix Coverity alert on unitialized vpp_kernels
[platform/upstream/libva-intel-driver.git] / src / gen6_mfd.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include "sysdeps.h"
30 #include "intel_batchbuffer.h"
31 #include "intel_driver.h"
32 #include "i965_defines.h"
33 #include "i965_drv_video.h"
34 #include "i965_decoder_utils.h"
35
36 #include "gen6_mfd.h"
37 #include "intel_media.h"
38
39 static const uint32_t zigzag_direct[64] = {
40     0,   1,  8, 16,  9,  2,  3, 10,
41     17, 24, 32, 25, 18, 11,  4,  5,
42     12, 19, 26, 33, 40, 48, 41, 34,
43     27, 20, 13,  6,  7, 14, 21, 28,
44     35, 42, 49, 56, 57, 50, 43, 36,
45     29, 22, 15, 23, 30, 37, 44, 51,
46     58, 59, 52, 45, 38, 31, 39, 46,
47     53, 60, 61, 54, 47, 55, 62, 63
48 };
49
50 static void
51 gen6_mfd_init_avc_surface(VADriverContextP ctx, 
52                           VAPictureParameterBufferH264 *pic_param,
53                           struct object_surface *obj_surface)
54 {
55     struct i965_driver_data *i965 = i965_driver_data(ctx);
56     GenAvcSurface *gen6_avc_surface = obj_surface->private_data;
57     int height_in_mbs;
58
59     obj_surface->free_private_data = gen_free_avc_surface;
60     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
61
62     if (!gen6_avc_surface) {
63         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
64         gen6_avc_surface->frame_store_id = -1;
65         assert((obj_surface->size & 0x3f) == 0);
66         obj_surface->private_data = gen6_avc_surface;
67     }
68
69     gen6_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
70                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
71
72     if (gen6_avc_surface->dmv_top == NULL) {
73         gen6_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
74                                                  "direct mv w/r buffer",
75                                                  128 * height_in_mbs * 64,      /* scalable with frame height */
76                                                  0x1000);
77     }
78
79     if (gen6_avc_surface->dmv_bottom_flag &&
80         gen6_avc_surface->dmv_bottom == NULL) {
81         gen6_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
82                                                     "direct mv w/r buffer",
83                                                     128 * height_in_mbs * 64,   /* scalable with frame height */
84                                                     0x1000);
85     }
86 }
87
88 static void
89 gen6_mfd_pipe_mode_select(VADriverContextP ctx,
90                           struct decode_state *decode_state,
91                           int standard_select,
92                           struct gen6_mfd_context *gen6_mfd_context)
93 {
94     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
95
96     assert(standard_select == MFX_FORMAT_MPEG2 ||
97            standard_select == MFX_FORMAT_AVC ||
98            standard_select == MFX_FORMAT_VC1);
99
100     BEGIN_BCS_BATCH(batch, 4);
101     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
102     OUT_BCS_BATCH(batch,
103                   (MFD_MODE_VLD << 16) | /* VLD mode */
104                   (0 << 10) | /* disable Stream-Out */
105                   (gen6_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
106                   (gen6_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
107                   (0 << 7)  | /* disable TLB prefectch */
108                   (0 << 5)  | /* not in stitch mode */
109                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
110                   (standard_select << 0));
111     OUT_BCS_BATCH(batch,
112                   (0 << 20) | /* round flag in PB slice */
113                   (0 << 19) | /* round flag in Intra8x8 */
114                   (0 << 7)  | /* expand NOA bus flag */
115                   (1 << 6)  | /* must be 1 */
116                   (0 << 5)  | /* disable clock gating for NOA */
117                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
118                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
119                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
120                   (0 << 1)  | /* AVC long field motion vector */
121                   (1 << 0));  /* always calculate AVC ILDB boundary strength */
122     OUT_BCS_BATCH(batch, 0);
123     ADVANCE_BCS_BATCH(batch);
124 }
125
126 static void
127 gen6_mfd_surface_state(VADriverContextP ctx,
128                        struct decode_state *decode_state,
129                        int standard_select,
130                        struct gen6_mfd_context *gen6_mfd_context)
131 {
132     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
133     struct object_surface *obj_surface = decode_state->render_object;
134     unsigned int surface_format;
135
136     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
137         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
138
139     BEGIN_BCS_BATCH(batch, 6);
140     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
141     OUT_BCS_BATCH(batch, 0);
142     OUT_BCS_BATCH(batch,
143                   ((obj_surface->orig_height - 1) << 19) |
144                   ((obj_surface->orig_width - 1) << 6));
145     OUT_BCS_BATCH(batch,
146                   (surface_format << 28) | /* 420 planar YUV surface */
147                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
148                   (0 << 22) | /* surface object control state, FIXME??? */
149                   ((obj_surface->width - 1) << 3) | /* pitch */
150                   (0 << 2)  | /* must be 0 for interleave U/V */
151                   (1 << 1)  | /* must be y-tiled */
152                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, FIXME: must be 1 ??? */
153     OUT_BCS_BATCH(batch,
154                   (0 << 16) | /* must be 0 for interleave U/V */
155                   (obj_surface->height)); /* y offset for U(cb) */
156     OUT_BCS_BATCH(batch, 0);
157     ADVANCE_BCS_BATCH(batch);
158 }
159
160 static void
161 gen6_mfd_pipe_buf_addr_state(VADriverContextP ctx,
162                              struct decode_state *decode_state,
163                              int standard_select,
164                              struct gen6_mfd_context *gen6_mfd_context)
165 {
166     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
167     int i;
168
169     BEGIN_BCS_BATCH(batch, 24);
170     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
171     if (gen6_mfd_context->pre_deblocking_output.valid)
172         OUT_BCS_RELOC(batch, gen6_mfd_context->pre_deblocking_output.bo,
173                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
174                       0);
175     else
176         OUT_BCS_BATCH(batch, 0);
177
178     if (gen6_mfd_context->post_deblocking_output.valid)
179         OUT_BCS_RELOC(batch, gen6_mfd_context->post_deblocking_output.bo,
180                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
181                       0);
182     else
183         OUT_BCS_BATCH(batch, 0);
184
185     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
186     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
187
188     if (gen6_mfd_context->intra_row_store_scratch_buffer.valid)
189         OUT_BCS_RELOC(batch, gen6_mfd_context->intra_row_store_scratch_buffer.bo,
190                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
191                       0);
192     else
193         OUT_BCS_BATCH(batch, 0);
194
195     if (gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
196         OUT_BCS_RELOC(batch, gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
197                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
198                       0);
199     else
200         OUT_BCS_BATCH(batch, 0);
201
202     /* DW 7..22 */
203     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
204         struct object_surface *obj_surface;
205
206         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
207             gen6_mfd_context->reference_surface[i].obj_surface &&
208             gen6_mfd_context->reference_surface[i].obj_surface->bo) {
209             obj_surface = gen6_mfd_context->reference_surface[i].obj_surface;
210
211             OUT_BCS_RELOC(batch, obj_surface->bo,
212                           I915_GEM_DOMAIN_INSTRUCTION, 0,
213                           0);
214         } else {
215             OUT_BCS_BATCH(batch, 0);
216         }
217     }
218
219     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
220     ADVANCE_BCS_BATCH(batch);
221 }
222
223 static void
224 gen6_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
225                                  dri_bo *slice_data_bo,
226                                  int standard_select,
227                                  struct gen6_mfd_context *gen6_mfd_context)
228 {
229     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
230
231     BEGIN_BCS_BATCH(batch, 11);
232     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
233     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
234     OUT_BCS_BATCH(batch, 0);
235     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
236     OUT_BCS_BATCH(batch, 0);
237     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
238     OUT_BCS_BATCH(batch, 0);
239     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
240     OUT_BCS_BATCH(batch, 0);
241     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
242     OUT_BCS_BATCH(batch, 0);
243     ADVANCE_BCS_BATCH(batch);
244 }
245
246 static void
247 gen6_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
248                                  struct decode_state *decode_state,
249                                  int standard_select,
250                                  struct gen6_mfd_context *gen6_mfd_context)
251 {
252     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
253
254     BEGIN_BCS_BATCH(batch, 4);
255     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
256
257     if (gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
258         OUT_BCS_RELOC(batch, gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
259                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
260                       0);
261     else
262         OUT_BCS_BATCH(batch, 0);
263
264     if (gen6_mfd_context->mpr_row_store_scratch_buffer.valid)
265         OUT_BCS_RELOC(batch, gen6_mfd_context->mpr_row_store_scratch_buffer.bo,
266                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
267                       0);
268     else
269         OUT_BCS_BATCH(batch, 0);
270
271     if (gen6_mfd_context->bitplane_read_buffer.valid)
272         OUT_BCS_RELOC(batch, gen6_mfd_context->bitplane_read_buffer.bo,
273                       I915_GEM_DOMAIN_INSTRUCTION, 0,
274                       0);
275     else
276         OUT_BCS_BATCH(batch, 0);
277
278     ADVANCE_BCS_BATCH(batch);
279 }
280
281 static void
282 gen6_mfd_avc_img_state(VADriverContextP ctx,
283                        struct decode_state *decode_state,
284                        struct gen6_mfd_context *gen6_mfd_context)
285 {
286     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
287     int qm_present_flag;
288     int img_struct;
289     int mbaff_frame_flag;
290     unsigned int width_in_mbs, height_in_mbs;
291     VAPictureParameterBufferH264 *pic_param;
292
293     assert(decode_state->pic_param && decode_state->pic_param->buffer);
294     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
295
296     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
297         qm_present_flag = 1;
298     else
299         qm_present_flag = 0; /* built-in QM matrices */
300
301     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
302         img_struct = 1;
303     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
304         img_struct = 3;
305     else
306         img_struct = 0;
307
308     if ((img_struct & 0x1) == 0x1) {
309         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
310     } else {
311         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
312     }
313
314     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
315         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
316         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
317     } else {
318         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
319     }
320
321     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
322                         !pic_param->pic_fields.bits.field_pic_flag);
323
324     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
325     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
326     assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */
327
328     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
329     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
330            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
331     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
332
333     BEGIN_BCS_BATCH(batch, 13);
334     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
335     OUT_BCS_BATCH(batch, 
336                   ((width_in_mbs * height_in_mbs) & 0x7fff));
337     OUT_BCS_BATCH(batch, 
338                   (height_in_mbs << 16) | 
339                   (width_in_mbs << 0));
340     OUT_BCS_BATCH(batch, 
341                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
342                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
343                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
344                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
345                   (1 << 12) | /* always 1, hardware requirement */
346                   (qm_present_flag << 10) |
347                   (img_struct << 8) |
348                   (16 << 0));
349     OUT_BCS_BATCH(batch,
350                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
351                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
352                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
353                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
354                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
355                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
356                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
357                   (mbaff_frame_flag << 1) |
358                   (pic_param->pic_fields.bits.field_pic_flag << 0));
359     OUT_BCS_BATCH(batch, 0);
360     OUT_BCS_BATCH(batch, 0);
361     OUT_BCS_BATCH(batch, 0);
362     OUT_BCS_BATCH(batch, 0);
363     OUT_BCS_BATCH(batch, 0);
364     OUT_BCS_BATCH(batch, 0);
365     OUT_BCS_BATCH(batch, 0);
366     OUT_BCS_BATCH(batch, 0);
367     ADVANCE_BCS_BATCH(batch);
368 }
369
370 static void
371 gen6_mfd_avc_qm_state(VADriverContextP ctx,
372                       struct decode_state *decode_state,
373                       struct gen6_mfd_context *gen6_mfd_context)
374 {
375     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
376     int cmd_len;
377     VAIQMatrixBufferH264 *iq_matrix;
378     VAPictureParameterBufferH264 *pic_param;
379
380     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
381         return;
382
383     iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
384
385     assert(decode_state->pic_param && decode_state->pic_param->buffer);
386     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
387
388     cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */
389
390     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
391         cmd_len += 2 * 16; /* load two 8x8 scaling matrices */
392
393     BEGIN_BCS_BATCH(batch, cmd_len);
394     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | (cmd_len - 2));
395
396     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
397         OUT_BCS_BATCH(batch, 
398                       (0x0  << 8) | /* don't use default built-in matrices */
399                       (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */
400     else
401         OUT_BCS_BATCH(batch, 
402                       (0x0  << 8) | /* don't use default built-in matrices */
403                       (0x3f << 0)); /* six 4x4 scaling matrices */
404
405     intel_batchbuffer_data(batch, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4);
406
407     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
408         intel_batchbuffer_data(batch, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4);
409
410     ADVANCE_BCS_BATCH(batch);
411 }
412
413 static void
414 gen6_mfd_avc_directmode_state(VADriverContextP ctx,
415                               struct decode_state *decode_state,
416                               VAPictureParameterBufferH264 *pic_param,
417                               VASliceParameterBufferH264 *slice_param,
418                               struct gen6_mfd_context *gen6_mfd_context)
419 {
420     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
421     struct object_surface *obj_surface;
422     GenAvcSurface *gen6_avc_surface;
423     VAPictureH264 *va_pic;
424     int i;
425
426     BEGIN_BCS_BATCH(batch, 69);
427     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
428
429     /* reference surfaces 0..15 */
430     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
431         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
432             gen6_mfd_context->reference_surface[i].obj_surface &&
433             gen6_mfd_context->reference_surface[i].obj_surface->private_data) {
434
435             obj_surface = gen6_mfd_context->reference_surface[i].obj_surface;
436             gen6_avc_surface = obj_surface->private_data;
437             OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
438                           I915_GEM_DOMAIN_INSTRUCTION, 0,
439                           0);
440
441             if (gen6_avc_surface->dmv_bottom_flag == 1)
442                 OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
443                               I915_GEM_DOMAIN_INSTRUCTION, 0,
444                               0);
445             else
446                 OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
447                               I915_GEM_DOMAIN_INSTRUCTION, 0,
448                               0);
449         } else {
450             OUT_BCS_BATCH(batch, 0);
451             OUT_BCS_BATCH(batch, 0);
452         }
453     }
454
455     /* the current decoding frame/field */
456     va_pic = &pic_param->CurrPic;
457     obj_surface = decode_state->render_object;
458     assert(obj_surface->bo && obj_surface->private_data);
459     gen6_avc_surface = obj_surface->private_data;
460
461     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
462                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
463                   0);
464
465     if (gen6_avc_surface->dmv_bottom_flag == 1)
466         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
467                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
468                       0);
469     else
470         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
471                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
472                       0);
473
474     /* POC List */
475     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
476         obj_surface = gen6_mfd_context->reference_surface[i].obj_surface;
477
478         if (obj_surface) {
479             const VAPictureH264 * const va_pic = avc_find_picture(
480                 obj_surface->base.id, pic_param->ReferenceFrames,
481                 ARRAY_ELEMS(pic_param->ReferenceFrames));
482
483             assert(va_pic != NULL);
484             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
485             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
486         } else {
487             OUT_BCS_BATCH(batch, 0);
488             OUT_BCS_BATCH(batch, 0);
489         }
490     }
491
492     va_pic = &pic_param->CurrPic;
493     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
494     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
495
496     ADVANCE_BCS_BATCH(batch);
497 }
498
499 static void
500 gen6_mfd_avc_slice_state(VADriverContextP ctx,
501                          VAPictureParameterBufferH264 *pic_param,
502                          VASliceParameterBufferH264 *slice_param,
503                          VASliceParameterBufferH264 *next_slice_param,
504                          struct gen6_mfd_context *gen6_mfd_context)
505 {
506     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
507     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
508     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
509     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
510     int num_ref_idx_l0, num_ref_idx_l1;
511     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
512                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
513     int weighted_pred_idc = 0;
514     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
515     unsigned int chroma_log2_weight_denom, luma_log2_weight_denom;
516     int slice_type;
517
518     if (slice_param->slice_type == SLICE_TYPE_I ||
519         slice_param->slice_type == SLICE_TYPE_SI) {
520         slice_type = SLICE_TYPE_I;
521     } else if (slice_param->slice_type == SLICE_TYPE_P ||
522                slice_param->slice_type == SLICE_TYPE_SP) {
523         slice_type = SLICE_TYPE_P;
524     } else { 
525         assert(slice_param->slice_type == SLICE_TYPE_B);
526         slice_type = SLICE_TYPE_B;
527     }
528
529     luma_log2_weight_denom   = slice_param->luma_log2_weight_denom;
530     chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
531
532     if (slice_type == SLICE_TYPE_I) {
533         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
534         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
535         num_ref_idx_l0 = 0;
536         num_ref_idx_l1 = 0;
537     } else if (slice_type == SLICE_TYPE_P) {
538         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
539         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
540         num_ref_idx_l1 = 0;
541         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_pred_flag == 1);
542     } else {
543         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
544         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
545         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
546
547         if (weighted_pred_idc == 2) {
548             /* 8.4.3 - Derivation process for prediction weights (8-279) */
549             luma_log2_weight_denom   = 5;
550             chroma_log2_weight_denom = 5;
551         }
552     }
553
554     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
555     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
556     slice_ver_pos = first_mb_in_slice / width_in_mbs;
557
558     if (next_slice_param) {
559         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
560         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
561         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
562     } else {
563         next_slice_hor_pos = 0;
564         next_slice_ver_pos = height_in_mbs;
565     }
566
567     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
568     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
569     OUT_BCS_BATCH(batch, slice_type);
570     OUT_BCS_BATCH(batch, 
571                   (num_ref_idx_l1 << 24) |
572                   (num_ref_idx_l0 << 16) |
573                   (chroma_log2_weight_denom << 8) |
574                   (luma_log2_weight_denom << 0));
575     OUT_BCS_BATCH(batch, 
576                   (weighted_pred_idc << 30) |
577                   (slice_param->direct_spatial_mv_pred_flag << 29) |
578                   (slice_param->disable_deblocking_filter_idc << 27) |
579                   (slice_param->cabac_init_idc << 24) |
580                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
581                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
582                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
583     OUT_BCS_BATCH(batch, 
584                   (slice_ver_pos << 24) |
585                   (slice_hor_pos << 16) | 
586                   (first_mb_in_slice << 0));
587     OUT_BCS_BATCH(batch,
588                   (next_slice_ver_pos << 16) |
589                   (next_slice_hor_pos << 0));
590     OUT_BCS_BATCH(batch, 
591                   (next_slice_param == NULL) << 19); /* last slice flag */
592     OUT_BCS_BATCH(batch, 0);
593     OUT_BCS_BATCH(batch, 0);
594     OUT_BCS_BATCH(batch, 0);
595     OUT_BCS_BATCH(batch, 0);
596     ADVANCE_BCS_BATCH(batch);
597 }
598
599 static inline void
600 gen6_mfd_avc_ref_idx_state(VADriverContextP ctx,
601                            VAPictureParameterBufferH264 *pic_param,
602                            VASliceParameterBufferH264 *slice_param,
603                            struct gen6_mfd_context *gen6_mfd_context)
604 {
605     gen6_send_avc_ref_idx_state(
606         gen6_mfd_context->base.batch,
607         slice_param,
608         gen6_mfd_context->reference_surface
609     );
610 }
611
612 static void
613 gen6_mfd_avc_weightoffset_state(VADriverContextP ctx,
614                                 VAPictureParameterBufferH264 *pic_param,
615                                 VASliceParameterBufferH264 *slice_param,
616                                 struct gen6_mfd_context *gen6_mfd_context)
617 {
618     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
619     int i, j, num_weight_offset_table = 0;
620     short weightoffsets[32 * 6];
621
622     if ((slice_param->slice_type == SLICE_TYPE_P ||
623          slice_param->slice_type == SLICE_TYPE_SP) &&
624         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
625         num_weight_offset_table = 1;
626     }
627     
628     if ((slice_param->slice_type == SLICE_TYPE_B) &&
629         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
630         num_weight_offset_table = 2;
631     }
632
633     for (i = 0; i < num_weight_offset_table; i++) {
634         BEGIN_BCS_BATCH(batch, 98);
635         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
636         OUT_BCS_BATCH(batch, i);
637
638         if (i == 0) {
639             for (j = 0; j < 32; j++) {
640                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
641                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
642                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
643                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
644                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
645                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
646             }
647         } else {
648             for (j = 0; j < 32; j++) {
649                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
650                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
651                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
652                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
653                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
654                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
655             }
656         }
657
658         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
659         ADVANCE_BCS_BATCH(batch);
660     }
661 }
662
663 static void
664 gen6_mfd_avc_bsd_object(VADriverContextP ctx,
665                         VAPictureParameterBufferH264 *pic_param,
666                         VASliceParameterBufferH264 *slice_param,
667                         dri_bo *slice_data_bo,
668                         struct gen6_mfd_context *gen6_mfd_context)
669 {
670     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
671     unsigned int slice_data_bit_offset;
672
673     slice_data_bit_offset = avc_get_first_mb_bit_offset(
674         slice_data_bo,
675         slice_param,
676         pic_param->pic_fields.bits.entropy_coding_mode_flag
677     );
678
679     BEGIN_BCS_BATCH(batch, 6);
680     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
681     OUT_BCS_BATCH(batch, 
682                   (slice_param->slice_data_size - slice_param->slice_data_offset));
683     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
684     OUT_BCS_BATCH(batch,
685                   (0 << 31) |
686                   (0 << 14) |
687                   (0 << 12) |
688                   (0 << 10) |
689                   (0 << 8));
690     OUT_BCS_BATCH(batch,
691                   ((slice_data_bit_offset >> 3) << 16) |
692                   (1 << 7)  |
693                   (1 << 6)  |
694                   ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
695     OUT_BCS_BATCH(batch, 0);
696     ADVANCE_BCS_BATCH(batch);
697 }
698
699 static void
700 gen6_mfd_avc_phantom_slice_first(VADriverContextP ctx,
701                                  VAPictureParameterBufferH264 *pic_param,
702                                  VASliceParameterBufferH264 *next_slice_param,
703                                  struct gen6_mfd_context *gen6_mfd_context)
704 {
705     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen6_mfd_context->base.batch);
706 }
707
708 static void
709 gen6_mfd_avc_phantom_slice_last(VADriverContextP ctx,
710                                 VAPictureParameterBufferH264 *pic_param,
711                                 struct gen6_mfd_context *gen6_mfd_context)
712 {
713     gen6_mfd_avc_phantom_slice(ctx, pic_param, NULL, gen6_mfd_context->base.batch);
714 }
715
716 static void
717 gen6_mfd_avc_decode_init(VADriverContextP ctx,
718                          struct decode_state *decode_state,
719                          struct gen6_mfd_context *gen6_mfd_context)
720 {
721     VAPictureParameterBufferH264 *pic_param;
722     VASliceParameterBufferH264 *slice_param;
723     struct i965_driver_data *i965 = i965_driver_data(ctx);
724     struct object_surface *obj_surface;
725     dri_bo *bo;
726     int i, j, enable_avc_ildb = 0;
727     int width_in_mbs;
728
729     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
730         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
731         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
732
733         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
734             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
735             assert((slice_param->slice_type == SLICE_TYPE_I) ||
736                    (slice_param->slice_type == SLICE_TYPE_SI) ||
737                    (slice_param->slice_type == SLICE_TYPE_P) ||
738                    (slice_param->slice_type == SLICE_TYPE_SP) ||
739                    (slice_param->slice_type == SLICE_TYPE_B));
740
741             if (slice_param->disable_deblocking_filter_idc != 1) {
742                 enable_avc_ildb = 1;
743                 break;
744             }
745
746             slice_param++;
747         }
748     }
749
750     assert(decode_state->pic_param && decode_state->pic_param->buffer);
751     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
752     intel_update_avc_frame_store_index(ctx, decode_state, pic_param,
753         gen6_mfd_context->reference_surface, &gen6_mfd_context->fs_ctx);
754     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
755
756     /* Current decoded picture */
757     obj_surface = decode_state->render_object;
758     if (pic_param->pic_fields.bits.reference_pic_flag)
759         obj_surface->flags |= SURFACE_REFERENCED;
760     else
761         obj_surface->flags &= ~SURFACE_REFERENCED;
762
763     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
764     gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
765
766     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
767     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
768     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
769     gen6_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
770
771     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
772     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
773     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
774     gen6_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
775
776     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
777     bo = dri_bo_alloc(i965->intel.bufmgr,
778                       "intra row store",
779                       width_in_mbs * 64,
780                       0x1000);
781     assert(bo);
782     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
783     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
784
785     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
786     bo = dri_bo_alloc(i965->intel.bufmgr,
787                       "deblocking filter row store",
788                       width_in_mbs * 64 * 4,
789                       0x1000);
790     assert(bo);
791     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
792     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
793
794     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
795     bo = dri_bo_alloc(i965->intel.bufmgr,
796                       "bsd mpc row store",
797                       width_in_mbs * 96,
798                       0x1000);
799     assert(bo);
800     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
801     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
802
803     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
804     bo = dri_bo_alloc(i965->intel.bufmgr,
805                       "mpr row store",
806                       width_in_mbs * 64,
807                       0x1000);
808     assert(bo);
809     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
810     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
811
812     gen6_mfd_context->bitplane_read_buffer.valid = 0;
813 }
814
815 static void
816 gen6_mfd_avc_decode_picture(VADriverContextP ctx,
817                             struct decode_state *decode_state,
818                             struct gen6_mfd_context *gen6_mfd_context)
819 {
820     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
821     VAPictureParameterBufferH264 *pic_param;
822     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
823     dri_bo *slice_data_bo;
824     int i, j;
825
826     assert(decode_state->pic_param && decode_state->pic_param->buffer);
827     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
828     gen6_mfd_avc_decode_init(ctx, decode_state, gen6_mfd_context);
829
830     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
831     intel_batchbuffer_emit_mi_flush(batch);
832     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
833     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
834     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
835     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
836     gen6_mfd_avc_img_state(ctx, decode_state, gen6_mfd_context);
837     gen6_mfd_avc_qm_state(ctx, decode_state, gen6_mfd_context);
838
839     for (j = 0; j < decode_state->num_slice_params; j++) {
840         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
841         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
842         slice_data_bo = decode_state->slice_datas[j]->bo;
843         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen6_mfd_context);
844
845         if (j == decode_state->num_slice_params - 1)
846             next_slice_group_param = NULL;
847         else
848             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
849
850             if (j == 0 &&
851                 slice_param->first_mb_in_slice)
852                 gen6_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen6_mfd_context);
853
854         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
855             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
856             assert((slice_param->slice_type == SLICE_TYPE_I) ||
857                    (slice_param->slice_type == SLICE_TYPE_SI) ||
858                    (slice_param->slice_type == SLICE_TYPE_P) ||
859                    (slice_param->slice_type == SLICE_TYPE_SP) ||
860                    (slice_param->slice_type == SLICE_TYPE_B));
861
862             if (i < decode_state->slice_params[j]->num_elements - 1)
863                 next_slice_param = slice_param + 1;
864             else
865                 next_slice_param = next_slice_group_param;
866
867             gen6_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen6_mfd_context);
868             gen6_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
869             gen6_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen6_mfd_context);
870             gen6_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen6_mfd_context);
871             gen6_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen6_mfd_context);
872             slice_param++;
873         }
874     }
875     
876     gen6_mfd_avc_phantom_slice_last(ctx, pic_param, gen6_mfd_context);
877     intel_batchbuffer_end_atomic(batch);
878     intel_batchbuffer_flush(batch);
879 }
880
881 static void
882 gen6_mfd_mpeg2_decode_init(VADriverContextP ctx,
883                            struct decode_state *decode_state,
884                            struct gen6_mfd_context *gen6_mfd_context)
885 {
886     VAPictureParameterBufferMPEG2 *pic_param;
887     struct i965_driver_data *i965 = i965_driver_data(ctx);
888     struct object_surface *obj_surface;
889     dri_bo *bo;
890     unsigned int width_in_mbs;
891
892     assert(decode_state->pic_param && decode_state->pic_param->buffer);
893     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
894     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
895
896     mpeg2_set_reference_surfaces(
897         ctx,
898         gen6_mfd_context->reference_surface,
899         decode_state,
900         pic_param
901     );
902
903     /* Current decoded picture */
904     obj_surface = decode_state->render_object;
905     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
906
907     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
908     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
909     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
910     gen6_mfd_context->pre_deblocking_output.valid = 1;
911
912     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
913     bo = dri_bo_alloc(i965->intel.bufmgr,
914                       "bsd mpc row store",
915                       width_in_mbs * 96,
916                       0x1000);
917     assert(bo);
918     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
919     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
920
921     gen6_mfd_context->post_deblocking_output.valid = 0;
922     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 0;
923     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
924     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
925     gen6_mfd_context->bitplane_read_buffer.valid = 0;
926 }
927
928 static void
929 gen6_mfd_mpeg2_pic_state(VADriverContextP ctx,
930                          struct decode_state *decode_state,
931                          struct gen6_mfd_context *gen6_mfd_context)
932 {
933     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
934     VAPictureParameterBufferMPEG2 *pic_param;
935     unsigned int tff, pic_structure;
936
937     assert(decode_state->pic_param && decode_state->pic_param->buffer);
938     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
939
940     pic_structure = pic_param->picture_coding_extension.bits.picture_structure;
941     if (pic_structure == MPEG_FRAME)
942         tff = pic_param->picture_coding_extension.bits.top_field_first;
943     else
944         tff = !(pic_param->picture_coding_extension.bits.is_first_field ^
945                 (pic_structure & MPEG_TOP_FIELD));
946
947     BEGIN_BCS_BATCH(batch, 4);
948     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (4 - 2));
949     OUT_BCS_BATCH(batch,
950                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
951                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
952                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
953                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
954                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
955                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
956                   tff << 11 |
957                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
958                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
959                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
960                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
961                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
962     OUT_BCS_BATCH(batch,
963                   pic_param->picture_coding_type << 9);
964     OUT_BCS_BATCH(batch,
965                   (ALIGN(pic_param->vertical_size, 16) / 16) << 16 |
966                   (ALIGN(pic_param->horizontal_size, 16) / 16));
967     ADVANCE_BCS_BATCH(batch);
968 }
969
970 static void
971 gen6_mfd_mpeg2_qm_state(VADriverContextP ctx,
972                         struct decode_state *decode_state,
973                         struct gen6_mfd_context *gen6_mfd_context)
974 {
975     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
976     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen6_mfd_context->iq_matrix.mpeg2;
977     int i, j;
978
979     /* Update internal QM state */
980     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
981         VAIQMatrixBufferMPEG2 * const iq_matrix =
982             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
983
984         gen_iq_matrix->load_intra_quantiser_matrix =
985             iq_matrix->load_intra_quantiser_matrix;
986         if (iq_matrix->load_intra_quantiser_matrix) {
987             for (j = 0; j < 64; j++)
988                 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
989                     iq_matrix->intra_quantiser_matrix[j];
990         }
991
992         gen_iq_matrix->load_non_intra_quantiser_matrix =
993             iq_matrix->load_non_intra_quantiser_matrix;
994         if (iq_matrix->load_non_intra_quantiser_matrix) {
995             for (j = 0; j < 64; j++)
996                 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
997                     iq_matrix->non_intra_quantiser_matrix[j];
998         }
999     }
1000
1001     /* Commit QM state to HW */
1002     for (i = 0; i < 2; i++) {
1003         unsigned char *qm = NULL;
1004
1005         if (i == 0) {
1006             if (gen_iq_matrix->load_intra_quantiser_matrix)
1007                 qm = gen_iq_matrix->intra_quantiser_matrix;
1008         } else {
1009             if (gen_iq_matrix->load_non_intra_quantiser_matrix)
1010                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1011         }
1012
1013         if (!qm)
1014             continue;
1015
1016         BEGIN_BCS_BATCH(batch, 18);
1017         OUT_BCS_BATCH(batch, MFX_MPEG2_QM_STATE | (18 - 2));
1018         OUT_BCS_BATCH(batch, i);
1019         intel_batchbuffer_data(batch, qm, 64);
1020         ADVANCE_BCS_BATCH(batch);
1021     }
1022 }
1023
1024 static void
1025 gen6_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1026                           VAPictureParameterBufferMPEG2 *pic_param,
1027                           VASliceParameterBufferMPEG2 *slice_param,
1028                           VASliceParameterBufferMPEG2 *next_slice_param,
1029                           struct gen6_mfd_context *gen6_mfd_context)
1030 {
1031     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1032     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1033     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1034
1035     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1036         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1037         is_field_pic = 1;
1038     is_field_pic_wa = is_field_pic &&
1039         gen6_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1040
1041     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1042     hpos0 = slice_param->slice_horizontal_position;
1043
1044     if (next_slice_param == NULL) {
1045         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1046         hpos1 = 0;
1047     } else {
1048         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1049         hpos1 = next_slice_param->slice_horizontal_position;
1050     }
1051
1052     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1053
1054     BEGIN_BCS_BATCH(batch, 5);
1055     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1056     OUT_BCS_BATCH(batch, 
1057                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1058     OUT_BCS_BATCH(batch, 
1059                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1060     OUT_BCS_BATCH(batch,
1061                   hpos0 << 24 |
1062                   vpos0 << 16 |
1063                   mb_count << 8 |
1064                   (next_slice_param == NULL) << 5 |
1065                   (next_slice_param == NULL) << 3 |
1066                   (slice_param->macroblock_offset & 0x7));
1067     OUT_BCS_BATCH(batch,
1068                   slice_param->quantiser_scale_code << 24);
1069     ADVANCE_BCS_BATCH(batch);
1070 }
1071
1072 static void
1073 gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1074                               struct decode_state *decode_state,
1075                               struct gen6_mfd_context *gen6_mfd_context)
1076 {
1077     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1078     VAPictureParameterBufferMPEG2 *pic_param;
1079     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param;
1080     dri_bo *slice_data_bo;
1081     int group_idx = 0, pre_group_idx = -1, element_idx = 0;
1082
1083     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1084     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1085
1086     gen6_mfd_mpeg2_decode_init(ctx, decode_state, gen6_mfd_context);
1087     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1088     intel_batchbuffer_emit_mi_flush(batch);
1089     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1090     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1091     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1092     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
1093     gen6_mfd_mpeg2_pic_state(ctx, decode_state, gen6_mfd_context);
1094     gen6_mfd_mpeg2_qm_state(ctx, decode_state, gen6_mfd_context);
1095
1096     if (gen6_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1097         gen6_mfd_context->wa_mpeg2_slice_vertical_position =
1098             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1099
1100     slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[group_idx]->buffer;
1101
1102     for (; slice_param;) {
1103         if (pre_group_idx != group_idx) {
1104             slice_data_bo = decode_state->slice_datas[group_idx]->bo;
1105             gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen6_mfd_context);
1106             pre_group_idx = group_idx;
1107         }
1108
1109         next_slice_param = intel_mpeg2_find_next_slice(decode_state, pic_param, slice_param, &group_idx, &element_idx);
1110         gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
1111         slice_param = next_slice_param;
1112     }
1113
1114     intel_batchbuffer_end_atomic(batch);
1115     intel_batchbuffer_flush(batch);
1116 }
1117
1118 static const int va_to_gen6_vc1_pic_type[5] = {
1119     GEN6_VC1_I_PICTURE,
1120     GEN6_VC1_P_PICTURE,
1121     GEN6_VC1_B_PICTURE,
1122     GEN6_VC1_BI_PICTURE,
1123     GEN6_VC1_P_PICTURE,
1124 };
1125
1126 static const int va_to_gen6_vc1_mv[4] = {
1127     1, /* 1-MV */
1128     2, /* 1-MV half-pel */
1129     3, /* 1-MV half-pef bilinear */
1130     0, /* Mixed MV */
1131 };
1132
1133 static const int b_picture_scale_factor[21] = {
1134     128, 85,  170, 64,  192,
1135     51,  102, 153, 204, 43,
1136     215, 37,  74,  111, 148,
1137     185, 222, 32,  96,  160, 
1138     224,
1139 };
1140
1141 static const int va_to_gen6_vc1_condover[3] = {
1142     0,
1143     2,
1144     3
1145 };
1146
1147 static const int va_to_gen6_vc1_profile[4] = {
1148     GEN6_VC1_SIMPLE_PROFILE,
1149     GEN6_VC1_MAIN_PROFILE,
1150     GEN6_VC1_RESERVED_PROFILE,
1151     GEN6_VC1_ADVANCED_PROFILE
1152 };
1153
1154 static void 
1155 gen6_mfd_free_vc1_surface(void **data)
1156 {
1157     struct gen6_vc1_surface *gen6_vc1_surface = *data;
1158
1159     if (!gen6_vc1_surface)
1160         return;
1161
1162     dri_bo_unreference(gen6_vc1_surface->dmv);
1163     free(gen6_vc1_surface);
1164     *data = NULL;
1165 }
1166
1167 static void
1168 gen6_mfd_init_vc1_surface(VADriverContextP ctx, 
1169                           VAPictureParameterBufferVC1 *pic_param,
1170                           struct object_surface *obj_surface)
1171 {
1172     struct i965_driver_data *i965 = i965_driver_data(ctx);
1173     struct gen6_vc1_surface *gen6_vc1_surface = obj_surface->private_data;
1174     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1175
1176     obj_surface->free_private_data = gen6_mfd_free_vc1_surface;
1177
1178     if (!gen6_vc1_surface) {
1179         gen6_vc1_surface = calloc(sizeof(struct gen6_vc1_surface), 1);
1180         assert((obj_surface->size & 0x3f) == 0);
1181         obj_surface->private_data = gen6_vc1_surface;
1182     }
1183
1184     gen6_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1185
1186     if (gen6_vc1_surface->dmv == NULL) {
1187         gen6_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1188                                              "direct mv w/r buffer",
1189                                              128 * height_in_mbs * 64,  /* scalable with frame height */
1190                                              0x1000);
1191     }
1192 }
1193
1194 static void
1195 gen6_mfd_vc1_decode_init(VADriverContextP ctx,
1196                          struct decode_state *decode_state,
1197                          struct gen6_mfd_context *gen6_mfd_context)
1198 {
1199     VAPictureParameterBufferVC1 *pic_param;
1200     struct i965_driver_data *i965 = i965_driver_data(ctx);
1201     struct object_surface *obj_surface;
1202     dri_bo *bo;
1203     int width_in_mbs;
1204     int picture_type;
1205
1206     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1207     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1208     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1209     picture_type = pic_param->picture_fields.bits.picture_type;
1210
1211     intel_update_vc1_frame_store_index(ctx,
1212                                        decode_state,
1213                                        pic_param,
1214                                        gen6_mfd_context->reference_surface);
1215
1216     /* Current decoded picture */
1217     obj_surface = decode_state->render_object;
1218     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1219     gen6_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1220
1221     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
1222     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1223     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
1224     gen6_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1225
1226     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1227     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1228     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
1229     gen6_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1230
1231     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
1232     bo = dri_bo_alloc(i965->intel.bufmgr,
1233                       "intra row store",
1234                       width_in_mbs * 64,
1235                       0x1000);
1236     assert(bo);
1237     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1238     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1239
1240     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1241     bo = dri_bo_alloc(i965->intel.bufmgr,
1242                       "deblocking filter row store",
1243                       width_in_mbs * 7 * 64,
1244                       0x1000);
1245     assert(bo);
1246     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1247     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1248
1249     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1250     bo = dri_bo_alloc(i965->intel.bufmgr,
1251                       "bsd mpc row store",
1252                       width_in_mbs * 96,
1253                       0x1000);
1254     assert(bo);
1255     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1256     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1257
1258     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1259
1260     gen6_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1261     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
1262     
1263     if (gen6_mfd_context->bitplane_read_buffer.valid) {
1264         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1265         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1266         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1267         int src_w, src_h;
1268         uint8_t *src = NULL, *dst = NULL;
1269
1270         assert(decode_state->bit_plane->buffer);
1271         src = decode_state->bit_plane->buffer;
1272
1273         bo = dri_bo_alloc(i965->intel.bufmgr,
1274                           "VC-1 Bitplane",
1275                           bitplane_width * height_in_mbs,
1276                           0x1000);
1277         assert(bo);
1278         gen6_mfd_context->bitplane_read_buffer.bo = bo;
1279
1280         dri_bo_map(bo, True);
1281         assert(bo->virtual);
1282         dst = bo->virtual;
1283
1284         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1285             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1286                 int src_index, dst_index;
1287                 int src_shift;
1288                 uint8_t src_value;
1289
1290                 src_index = (src_h * width_in_mbs + src_w) / 2;
1291                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1292                 src_value = ((src[src_index] >> src_shift) & 0xf);
1293
1294                 if (picture_type == GEN6_VC1_SKIPPED_PICTURE){
1295                     src_value |= 0x2;
1296                 }
1297
1298                 dst_index = src_w / 2;
1299                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1300             }
1301
1302             if (src_w & 1)
1303                 dst[src_w / 2] >>= 4;
1304
1305             dst += bitplane_width;
1306         }
1307
1308         dri_bo_unmap(bo);
1309     } else
1310         gen6_mfd_context->bitplane_read_buffer.bo = NULL;
1311 }
1312
1313 static void
1314 gen6_mfd_vc1_pic_state(VADriverContextP ctx,
1315                        struct decode_state *decode_state,
1316                        struct gen6_mfd_context *gen6_mfd_context)
1317 {
1318     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1319     VAPictureParameterBufferVC1 *pic_param;
1320     struct object_surface *obj_surface;
1321     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1322     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1323     int unified_mv_mode;
1324     int ref_field_pic_polarity = 0;
1325     int scale_factor = 0;
1326     int trans_ac_y = 0;
1327     int dmv_surface_valid = 0;
1328     int brfd = 0;
1329     int fcm = 0;
1330     int picture_type;
1331     int profile;
1332     int overlap;
1333
1334     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1335     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1336
1337     profile = va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile];
1338     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1339     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1340     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1341     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1342     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1343     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1344     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1345
1346     if (dquant == 0) {
1347         alt_pquant_config = 0;
1348         alt_pquant_edge_mask = 0;
1349     } else if (dquant == 2) {
1350         alt_pquant_config = 1;
1351         alt_pquant_edge_mask = 0xf;
1352     } else {
1353         assert(dquant == 1);
1354         if (dquantfrm == 0) {
1355             alt_pquant_config = 0;
1356             alt_pquant_edge_mask = 0;
1357             alt_pq = 0;
1358         } else {
1359             assert(dquantfrm == 1);
1360             alt_pquant_config = 1;
1361
1362             switch (dqprofile) {
1363             case 3:
1364                 if (dqbilevel == 0) {
1365                     alt_pquant_config = 2;
1366                     alt_pquant_edge_mask = 0;
1367                 } else {
1368                     assert(dqbilevel == 1);
1369                     alt_pquant_config = 3;
1370                     alt_pquant_edge_mask = 0;
1371                 }
1372                 break;
1373                 
1374             case 0:
1375                 alt_pquant_edge_mask = 0xf;
1376                 break;
1377
1378             case 1:
1379                 if (dqdbedge == 3)
1380                     alt_pquant_edge_mask = 0x9;
1381                 else
1382                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1383
1384                 break;
1385
1386             case 2:
1387                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1388                 break;
1389
1390             default:
1391                 assert(0);
1392             }
1393         }
1394     }
1395
1396     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1397         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1398         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1399     } else {
1400         assert(pic_param->mv_fields.bits.mv_mode < 4);
1401         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1402     }
1403
1404     if (pic_param->sequence_fields.bits.interlace == 1 &&
1405         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1406         /* FIXME: calculate reference field picture polarity */
1407         assert(0);
1408         ref_field_pic_polarity = 0;
1409     }
1410
1411     if (pic_param->b_picture_fraction < 21)
1412         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1413
1414     picture_type = va_to_gen6_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1415     
1416     if (profile == GEN6_VC1_ADVANCED_PROFILE && 
1417         picture_type == GEN6_VC1_I_PICTURE)
1418         picture_type = GEN6_VC1_BI_PICTURE;
1419
1420     if (picture_type == GEN6_VC1_I_PICTURE || picture_type == GEN6_VC1_BI_PICTURE) /* I picture */
1421         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1422     else {
1423         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1424         /*
1425          * 8.3.6.2.1 Transform Type Selection
1426          * If variable-sized transform coding is not enabled,
1427          * then the 8x8 transform shall be used for all blocks.
1428          * it is also MFX_VC1_PIC_STATE requirement.
1429          */
1430         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1431             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1432             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1433         }
1434     }
1435
1436     if (picture_type == GEN6_VC1_B_PICTURE) {
1437         struct gen6_vc1_surface *gen6_vc1_surface = NULL;
1438
1439         obj_surface = decode_state->reference_objects[1];
1440
1441         if (obj_surface)
1442             gen6_vc1_surface = obj_surface->private_data;
1443
1444         if (!gen6_vc1_surface || 
1445             (va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_I_PICTURE ||
1446              va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_BI_PICTURE))
1447             dmv_surface_valid = 0;
1448         else
1449             dmv_surface_valid = 1;
1450     }
1451
1452     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1453
1454     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1455         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1456     else {
1457         if (pic_param->picture_fields.bits.top_field_first)
1458             fcm = 2;
1459         else
1460             fcm = 3;
1461     }
1462
1463     if (pic_param->picture_fields.bits.picture_type == GEN6_VC1_B_PICTURE) { /* B picture */
1464         brfd = pic_param->reference_fields.bits.reference_distance;
1465         brfd = (scale_factor * brfd) >> 8;
1466         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1467
1468         if (brfd < 0)
1469             brfd = 0;
1470     }
1471
1472     overlap = 0;
1473     if (profile != GEN6_VC1_ADVANCED_PROFILE){
1474         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1475             pic_param->picture_fields.bits.picture_type != GEN6_VC1_B_PICTURE) {
1476             overlap = 1; 
1477         }
1478     }else {
1479         if (pic_param->picture_fields.bits.picture_type == GEN6_VC1_P_PICTURE &&
1480              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1481               overlap = 1; 
1482         }
1483         if (pic_param->picture_fields.bits.picture_type == GEN6_VC1_I_PICTURE ||
1484             pic_param->picture_fields.bits.picture_type == GEN6_VC1_BI_PICTURE){
1485              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1486                 overlap = 1; 
1487              } else if (va_to_gen6_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1488                         va_to_gen6_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1489                  overlap = 1;
1490              }
1491         }
1492     } 
1493
1494     assert(pic_param->conditional_overlap_flag < 3);
1495     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1496
1497     BEGIN_BCS_BATCH(batch, 6);
1498     OUT_BCS_BATCH(batch, MFX_VC1_PIC_STATE | (6 - 2));
1499     OUT_BCS_BATCH(batch,
1500                   (ALIGN(pic_param->coded_height, 16) / 16) << 16 |
1501                   (ALIGN(pic_param->coded_width, 16) / 16));
1502     OUT_BCS_BATCH(batch,
1503                   pic_param->sequence_fields.bits.syncmarker << 31 |
1504                   1 << 29 | /* concealment */
1505                   alt_pq << 24 |
1506                   pic_param->entrypoint_fields.bits.loopfilter << 23 |
1507                   overlap << 22 |
1508                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 21 | /* implicit quantizer */
1509                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 16 |
1510                   alt_pquant_edge_mask << 12 |
1511                   alt_pquant_config << 10 |
1512                   pic_param->pic_quantizer_fields.bits.half_qp << 9 |
1513                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 8 |
1514                   va_to_gen6_vc1_condover[pic_param->conditional_overlap_flag] << 6 |
1515                   !pic_param->picture_fields.bits.is_first_field << 5 |
1516                   picture_type << 2 |
1517                   fcm << 0);
1518     OUT_BCS_BATCH(batch,
1519                   !!pic_param->bitplane_present.value << 23 |
1520                   !pic_param->bitplane_present.flags.bp_forward_mb << 22 |
1521                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 21 |
1522                   !pic_param->bitplane_present.flags.bp_skip_mb << 20 |
1523                   !pic_param->bitplane_present.flags.bp_direct_mb << 19 |
1524                   !pic_param->bitplane_present.flags.bp_overflags << 18 |
1525                   !pic_param->bitplane_present.flags.bp_ac_pred << 17 |
1526                   !pic_param->bitplane_present.flags.bp_field_tx << 16 |
1527                   pic_param->mv_fields.bits.extended_dmv_range << 14 |
1528                   pic_param->mv_fields.bits.extended_mv_range << 12 |
1529                   pic_param->mv_fields.bits.four_mv_switch << 11 |
1530                   pic_param->fast_uvmc_flag << 10 |
1531                   unified_mv_mode << 8 |
1532                   ref_field_pic_polarity << 6 |
1533                   pic_param->reference_fields.bits.num_reference_pictures << 5 |
1534                   pic_param->reference_fields.bits.reference_distance << 0);
1535     OUT_BCS_BATCH(batch,
1536                   scale_factor << 24 |
1537                   pic_param->mv_fields.bits.mv_table << 20 |
1538                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1539                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1540                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
1541                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1542                   pic_param->mb_mode_table << 8 |
1543                   trans_ac_y << 6 |
1544                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1545                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1546                   pic_param->cbp_table << 0);
1547     OUT_BCS_BATCH(batch,
1548                   dmv_surface_valid << 13 |
1549                   brfd << 8 |
1550                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1));
1551     ADVANCE_BCS_BATCH(batch);
1552 }
1553
1554 static void
1555 gen6_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1556                              struct decode_state *decode_state,
1557                              struct gen6_mfd_context *gen6_mfd_context)
1558 {
1559     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1560     VAPictureParameterBufferVC1 *pic_param;
1561     int interpolation_mode = 0;
1562     int intensitycomp_single;
1563
1564     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1565     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1566
1567     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1568         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1569          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1570         interpolation_mode = 2; /* Half-pel bilinear */
1571     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1572              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1573               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1574         interpolation_mode = 0; /* Half-pel bicubic */
1575     else
1576         interpolation_mode = 1; /* Quarter-pel bicubic */
1577
1578     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1579     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1580     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1581
1582     BEGIN_BCS_BATCH(batch, 7);
1583     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (7 - 2));
1584     OUT_BCS_BATCH(batch,
1585                   0 << 8 | /* FIXME: interlace mode */
1586                   pic_param->rounding_control << 4 |
1587                   va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile] << 2);
1588     OUT_BCS_BATCH(batch,
1589                   pic_param->luma_shift << 16 |
1590                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1591     OUT_BCS_BATCH(batch, 0);
1592     OUT_BCS_BATCH(batch, 0);
1593     OUT_BCS_BATCH(batch, 0);
1594     OUT_BCS_BATCH(batch,
1595                   interpolation_mode << 19 |
1596                   pic_param->fast_uvmc_flag << 18 |
1597                   0 << 17 | /* FIXME: scale up or down ??? */
1598                   pic_param->range_reduction_frame << 16 |
1599                   0 << 6 | /* FIXME: double ??? */
1600                   0 << 4 |
1601                   intensitycomp_single << 2 |
1602                   intensitycomp_single << 0);
1603     ADVANCE_BCS_BATCH(batch);
1604 }
1605
1606
1607 static void
1608 gen6_mfd_vc1_directmode_state(VADriverContextP ctx,
1609                               struct decode_state *decode_state,
1610                               struct gen6_mfd_context *gen6_mfd_context)
1611 {
1612     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1613     struct object_surface *obj_surface;
1614     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1615
1616     obj_surface = decode_state->render_object;
1617
1618     if (obj_surface && obj_surface->private_data) {
1619         dmv_write_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1620     }
1621
1622     obj_surface = decode_state->reference_objects[1];
1623
1624     if (obj_surface && obj_surface->private_data) {
1625         dmv_read_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
1626     }
1627
1628     BEGIN_BCS_BATCH(batch, 3);
1629     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
1630
1631     if (dmv_write_buffer)
1632         OUT_BCS_RELOC(batch, dmv_write_buffer,
1633                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1634                       0);
1635     else
1636         OUT_BCS_BATCH(batch, 0);
1637
1638     if (dmv_read_buffer)
1639         OUT_BCS_RELOC(batch, dmv_read_buffer,
1640                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1641                       0);
1642     else
1643         OUT_BCS_BATCH(batch, 0);
1644                   
1645     ADVANCE_BCS_BATCH(batch);
1646 }
1647
1648 static int
1649 gen6_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1650 {
1651     int out_slice_data_bit_offset;
1652     int slice_header_size = in_slice_data_bit_offset / 8;
1653     int i, j;
1654
1655     if (profile != 3)
1656         out_slice_data_bit_offset = in_slice_data_bit_offset;
1657     else {
1658         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1659             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1660                 i++, j += 2;
1661             }
1662         }
1663
1664         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1665     }
1666
1667     return out_slice_data_bit_offset;
1668 }
1669
1670 static void
1671 gen6_mfd_vc1_bsd_object(VADriverContextP ctx,
1672                         VAPictureParameterBufferVC1 *pic_param,
1673                         VASliceParameterBufferVC1 *slice_param,
1674                         VASliceParameterBufferVC1 *next_slice_param,
1675                         dri_bo *slice_data_bo,
1676                         struct gen6_mfd_context *gen6_mfd_context)
1677 {
1678     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1679     int next_slice_start_vert_pos;
1680     int macroblock_offset;
1681     uint8_t *slice_data = NULL;
1682
1683     dri_bo_map(slice_data_bo, 0);
1684     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1685     macroblock_offset = gen6_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1686                                                                slice_param->macroblock_offset,
1687                                                                pic_param->sequence_fields.bits.profile);
1688     dri_bo_unmap(slice_data_bo);
1689
1690     if (next_slice_param)
1691         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1692     else
1693         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1694
1695     BEGIN_BCS_BATCH(batch, 4);
1696     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (4 - 2));
1697     OUT_BCS_BATCH(batch, 
1698                   slice_param->slice_data_size - (macroblock_offset >> 3));
1699     OUT_BCS_BATCH(batch, 
1700                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1701     OUT_BCS_BATCH(batch,
1702                   slice_param->slice_vertical_position << 24 |
1703                   next_slice_start_vert_pos << 16 |
1704                   (macroblock_offset & 0x7));
1705     ADVANCE_BCS_BATCH(batch);
1706 }
1707
1708 static void
1709 gen6_mfd_vc1_decode_picture(VADriverContextP ctx,
1710                             struct decode_state *decode_state,
1711                             struct gen6_mfd_context *gen6_mfd_context)
1712 {
1713     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
1714     VAPictureParameterBufferVC1 *pic_param;
1715     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1716     dri_bo *slice_data_bo;
1717     int i, j;
1718
1719     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1720     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1721
1722     gen6_mfd_vc1_decode_init(ctx, decode_state, gen6_mfd_context);
1723     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1724     intel_batchbuffer_emit_mi_flush(batch);
1725     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1726     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1727     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1728     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
1729     gen6_mfd_vc1_pic_state(ctx, decode_state, gen6_mfd_context);
1730     gen6_mfd_vc1_pred_pipe_state(ctx, decode_state, gen6_mfd_context);
1731     gen6_mfd_vc1_directmode_state(ctx, decode_state, gen6_mfd_context);
1732
1733     for (j = 0; j < decode_state->num_slice_params; j++) {
1734         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1735         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1736         slice_data_bo = decode_state->slice_datas[j]->bo;
1737         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen6_mfd_context);
1738
1739         if (j == decode_state->num_slice_params - 1)
1740             next_slice_group_param = NULL;
1741         else
1742             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1743
1744         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1745             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1746
1747             if (i < decode_state->slice_params[j]->num_elements - 1)
1748                 next_slice_param = slice_param + 1;
1749             else
1750                 next_slice_param = next_slice_group_param;
1751
1752             gen6_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen6_mfd_context);
1753             slice_param++;
1754         }
1755     }
1756
1757     intel_batchbuffer_end_atomic(batch);
1758     intel_batchbuffer_flush(batch);
1759 }
1760
1761 static VAStatus
1762 gen6_mfd_decode_picture(VADriverContextP ctx, 
1763                         VAProfile profile, 
1764                         union codec_state *codec_state,
1765                         struct hw_context *hw_context)
1766
1767 {
1768     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
1769     struct decode_state *decode_state = &codec_state->decode;
1770     VAStatus vaStatus;
1771
1772     assert(gen6_mfd_context);
1773
1774     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
1775
1776     if (vaStatus != VA_STATUS_SUCCESS)
1777         goto out;
1778
1779     switch (profile) {
1780     case VAProfileMPEG2Simple:
1781     case VAProfileMPEG2Main:
1782         gen6_mfd_mpeg2_decode_picture(ctx, decode_state, gen6_mfd_context);
1783         break;
1784         
1785     case VAProfileH264ConstrainedBaseline:
1786     case VAProfileH264Main:
1787     case VAProfileH264High:
1788     case VAProfileH264StereoHigh:
1789         gen6_mfd_avc_decode_picture(ctx, decode_state, gen6_mfd_context);
1790         break;
1791
1792     case VAProfileVC1Simple:
1793     case VAProfileVC1Main:
1794     case VAProfileVC1Advanced:
1795         gen6_mfd_vc1_decode_picture(ctx, decode_state, gen6_mfd_context);
1796         break;
1797
1798     default:
1799         assert(0);
1800         break;
1801     }
1802
1803     vaStatus = VA_STATUS_SUCCESS;
1804
1805 out:
1806     return vaStatus;
1807 }
1808
1809 static void
1810 gen6_mfd_context_destroy(void *hw_context)
1811 {
1812     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
1813
1814     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
1815     gen6_mfd_context->post_deblocking_output.bo = NULL;
1816
1817     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
1818     gen6_mfd_context->pre_deblocking_output.bo = NULL;
1819
1820     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
1821     gen6_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1822
1823     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1824     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1825
1826     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1827     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1828
1829     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
1830     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1831
1832     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
1833     gen6_mfd_context->bitplane_read_buffer.bo = NULL;
1834
1835     intel_batchbuffer_free(gen6_mfd_context->base.batch);
1836     free(gen6_mfd_context);
1837 }
1838
1839 struct hw_context *
1840 gen6_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
1841 {
1842     struct intel_driver_data *intel = intel_driver_data(ctx);
1843     struct gen6_mfd_context *gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context));
1844     int i;
1845
1846     gen6_mfd_context->base.destroy = gen6_mfd_context_destroy;
1847     gen6_mfd_context->base.run = gen6_mfd_decode_picture;
1848     gen6_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
1849
1850     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
1851         gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
1852         gen6_mfd_context->reference_surface[i].frame_store_id = -1;
1853         gen6_mfd_context->reference_surface[i].obj_surface = NULL;
1854     }
1855
1856     gen6_mfd_context->wa_mpeg2_slice_vertical_position = -1;
1857     
1858     return (struct hw_context *)gen6_mfd_context;
1859 }