i965_drv_video: improved MV quality for VME
[platform/upstream/libva.git] / i965_drv_video / gen7_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include <va/va_backend.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38
39 #include "i965_defines.h"
40 #include "i965_drv_video.h"
41
42 #include "gen7_mfd.h"
43
44 #define DMV_SIZE        0x88000 /* 557056 bytes for a frame */
45
46 static const uint32_t zigzag_direct[64] = {
47     0,   1,  8, 16,  9,  2,  3, 10,
48     17, 24, 32, 25, 18, 11,  4,  5,
49     12, 19, 26, 33, 40, 48, 41, 34,
50     27, 20, 13,  6,  7, 14, 21, 28,
51     35, 42, 49, 56, 57, 50, 43, 36,
52     29, 22, 15, 23, 30, 37, 44, 51,
53     58, 59, 52, 45, 38, 31, 39, 46,
54     53, 60, 61, 54, 47, 55, 62, 63
55 };
56
57 static void
58 gen7_mfd_avc_frame_store_index(VADriverContextP ctx,
59                                VAPictureParameterBufferH264 *pic_param,
60                                struct gen7_mfd_context *gen7_mfd_context)
61 {
62     struct i965_driver_data *i965 = i965_driver_data(ctx);
63     int i, j;
64
65     assert(ARRAY_ELEMS(gen7_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
66
67     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
68         int found = 0;
69
70         if (gen7_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
71             continue;
72
73         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
74             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
75             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
76                 continue;
77
78             if (gen7_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
79                 found = 1;
80                 break;
81             }
82         }
83
84         if (!found) {
85             struct object_surface *obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
86             obj_surface->flags &= ~SURFACE_REFERENCED;
87
88             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
89                 dri_bo_unreference(obj_surface->bo);
90                 obj_surface->bo = NULL;
91                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
92             }
93
94             if (obj_surface->free_private_data)
95                 obj_surface->free_private_data(&obj_surface->private_data);
96
97             gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
98             gen7_mfd_context->reference_surface[i].frame_store_id = -1;
99         }
100     }
101
102     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
103         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
104         int found = 0;
105
106         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
107             continue;
108
109         for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
110             if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
111                 continue;
112             
113             if (gen7_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
114                 found = 1;
115                 break;
116             }
117         }
118
119         if (!found) {
120             int frame_idx;
121             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
122             
123             assert(obj_surface);
124             i965_check_alloc_surface_bo(ctx, obj_surface, 1);
125
126             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface); frame_idx++) {
127                 for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
128                     if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
129                         continue;
130
131                     if (gen7_mfd_context->reference_surface[j].frame_store_id == frame_idx)
132                         break;
133                 }
134
135                 if (j == ARRAY_ELEMS(gen7_mfd_context->reference_surface))
136                     break;
137             }
138
139             assert(frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface));
140
141             for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
142                 if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
143                     gen7_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
144                     gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
145                     break;
146                 }
147             }
148         }
149     }
150
151     /* sort */
152     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface) - 1; i++) {
153         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
154             gen7_mfd_context->reference_surface[i].frame_store_id == i)
155             continue;
156
157         for (j = i + 1; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
158             if (gen7_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
159                 gen7_mfd_context->reference_surface[j].frame_store_id == i) {
160                 VASurfaceID id = gen7_mfd_context->reference_surface[i].surface_id;
161                 int frame_idx = gen7_mfd_context->reference_surface[i].frame_store_id;
162
163                 gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[j].surface_id;
164                 gen7_mfd_context->reference_surface[i].frame_store_id = gen7_mfd_context->reference_surface[j].frame_store_id;
165                 gen7_mfd_context->reference_surface[j].surface_id = id;
166                 gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
167                 break;
168             }
169         }
170     }
171 }
172
173 static void 
174 gen7_mfd_free_avc_surface(void **data)
175 {
176     struct gen7_avc_surface *gen7_avc_surface = *data;
177
178     if (!gen7_avc_surface)
179         return;
180
181     dri_bo_unreference(gen7_avc_surface->dmv_top);
182     gen7_avc_surface->dmv_top = NULL;
183     dri_bo_unreference(gen7_avc_surface->dmv_bottom);
184     gen7_avc_surface->dmv_bottom = NULL;
185
186     free(gen7_avc_surface);
187     *data = NULL;
188 }
189
190 static void
191 gen7_mfd_init_avc_surface(VADriverContextP ctx, 
192                           VAPictureParameterBufferH264 *pic_param,
193                           struct object_surface *obj_surface)
194 {
195     struct i965_driver_data *i965 = i965_driver_data(ctx);
196     struct gen7_avc_surface *gen7_avc_surface = obj_surface->private_data;
197
198     obj_surface->free_private_data = gen7_mfd_free_avc_surface;
199
200     if (!gen7_avc_surface) {
201         gen7_avc_surface = calloc(sizeof(struct gen7_avc_surface), 1);
202         assert((obj_surface->size & 0x3f) == 0);
203         obj_surface->private_data = gen7_avc_surface;
204     }
205
206     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
207                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
208
209     if (gen7_avc_surface->dmv_top == NULL) {
210         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
211                                                  "direct mv w/r buffer",
212                                                  DMV_SIZE,
213                                                  0x1000);
214     }
215
216     if (gen7_avc_surface->dmv_bottom_flag &&
217         gen7_avc_surface->dmv_bottom == NULL) {
218         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
219                                                     "direct mv w/r buffer",
220                                                     DMV_SIZE,
221                                                     0x1000);
222     }
223 }
224
225 static void
226 gen7_mfd_pipe_mode_select(VADriverContextP ctx,
227                           struct decode_state *decode_state,
228                           int standard_select,
229                           struct gen7_mfd_context *gen7_mfd_context)
230 {
231     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
232
233     assert(standard_select == MFX_FORMAT_MPEG2 ||
234            standard_select == MFX_FORMAT_AVC ||
235            standard_select == MFX_FORMAT_VC1 ||
236            standard_select == MFX_FORMAT_JPEG);
237
238     BEGIN_BCS_BATCH(batch, 5); /* FIXME: 5 ??? */
239     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
240     OUT_BCS_BATCH(batch,
241                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
242                   (MFD_MODE_VLD << 15) | /* VLD mode */
243                   (0 << 10) | /* disable Stream-Out */
244                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
245                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
246                   (0 << 5)  | /* not in stitch mode */
247                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
248                   (standard_select << 0));
249     OUT_BCS_BATCH(batch,
250                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
251                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
252                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
253                   (0 << 1)  |
254                   (0 << 0));
255     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
256     OUT_BCS_BATCH(batch, 0); /* reserved */
257     ADVANCE_BCS_BATCH(batch);
258 }
259
260 static void
261 gen7_mfd_surface_state(VADriverContextP ctx,
262                        struct decode_state *decode_state,
263                        int standard_select,
264                        struct gen7_mfd_context *gen7_mfd_context)
265 {
266     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
267     struct i965_driver_data *i965 = i965_driver_data(ctx);
268     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
269     assert(obj_surface);
270     
271     BEGIN_BCS_BATCH(batch, 6);
272     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
273     OUT_BCS_BATCH(batch, 0);
274     OUT_BCS_BATCH(batch,
275                   ((obj_surface->orig_height - 1) << 18) |
276                   ((obj_surface->orig_width - 1) << 4));
277     OUT_BCS_BATCH(batch,
278                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
279                   (1 << 27) | /* FIXME: set to 0 for JPEG */
280                   (0 << 22) | /* surface object control state, FIXME??? */
281                   ((obj_surface->width - 1) << 3) | /* pitch */
282                   (0 << 2)  | /* must be 0 for interleave U/V */
283                   (1 << 1)  | /* must be tiled */
284                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
285     OUT_BCS_BATCH(batch,
286                   (0 << 16) | /* FIXME: fix it for JPEG */
287                   (obj_surface->height)); /* FIXME: fix it for JPEG */
288     OUT_BCS_BATCH(batch, 0); /* FIXME: fix it for JPEG */
289     ADVANCE_BCS_BATCH(batch);
290 }
291
292 static void
293 gen7_mfd_pipe_buf_addr_state(VADriverContextP ctx,
294                              struct decode_state *decode_state,
295                              int standard_select,
296                              struct gen7_mfd_context *gen7_mfd_context)
297 {
298     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
299     struct i965_driver_data *i965 = i965_driver_data(ctx);
300     int i;
301
302     BEGIN_BCS_BATCH(batch, 24);
303     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
304     if (gen7_mfd_context->pre_deblocking_output.valid)
305         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
306                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
307                       0);
308     else
309         OUT_BCS_BATCH(batch, 0);
310
311     if (gen7_mfd_context->post_deblocking_output.valid)
312         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
313                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
314                       0);
315     else
316         OUT_BCS_BATCH(batch, 0);
317
318     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
319     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
320
321     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
322         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
323                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
324                       0);
325     else
326         OUT_BCS_BATCH(batch, 0);
327
328     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
329         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
330                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
331                       0);
332     else
333         OUT_BCS_BATCH(batch, 0);
334
335     /* DW 7..22 */
336     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
337         struct object_surface *obj_surface;
338
339         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
340             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
341             assert(obj_surface && obj_surface->bo);
342
343             OUT_BCS_RELOC(batch, obj_surface->bo,
344                           I915_GEM_DOMAIN_INSTRUCTION, 0,
345                           0);
346         } else {
347             OUT_BCS_BATCH(batch, 0);
348         }
349     }
350
351     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
352     ADVANCE_BCS_BATCH(batch);
353 }
354
355 static void
356 gen7_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
357                                  dri_bo *slice_data_bo,
358                                  int standard_select,
359                                  struct gen7_mfd_context *gen7_mfd_context)
360 {
361     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
362
363     BEGIN_BCS_BATCH(batch, 11);
364     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
365     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
366     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
367     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
368     OUT_BCS_BATCH(batch, 0);
369     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
370     OUT_BCS_BATCH(batch, 0);
371     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
372     OUT_BCS_BATCH(batch, 0);
373     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
374     OUT_BCS_BATCH(batch, 0);
375     ADVANCE_BCS_BATCH(batch);
376 }
377
378 static void
379 gen7_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
380                                  struct decode_state *decode_state,
381                                  int standard_select,
382                                  struct gen7_mfd_context *gen7_mfd_context)
383 {
384     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
385
386     BEGIN_BCS_BATCH(batch, 4);
387     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
388
389     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
390         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
391                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
392                       0);
393     else
394         OUT_BCS_BATCH(batch, 0);
395
396     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
397         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
398                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
399                       0);
400     else
401         OUT_BCS_BATCH(batch, 0);
402
403     if (gen7_mfd_context->bitplane_read_buffer.valid)
404         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
405                       I915_GEM_DOMAIN_INSTRUCTION, 0,
406                       0);
407     else
408         OUT_BCS_BATCH(batch, 0);
409
410     ADVANCE_BCS_BATCH(batch);
411 }
412
413 static void
414 gen7_mfd_aes_state(VADriverContextP ctx,
415                    struct decode_state *decode_state,
416                    int standard_select)
417 {
418     /* FIXME */
419 }
420
421 static void
422 gen7_mfd_qm_state(VADriverContextP ctx,
423                   int qm_type,
424                   unsigned char *qm,
425                   int qm_length,
426                   struct gen7_mfd_context *gen7_mfd_context)
427 {
428     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
429     unsigned int qm_buffer[16];
430
431     assert(qm_length <= 16 * 4);
432     memcpy(qm_buffer, qm, qm_length);
433
434     BEGIN_BCS_BATCH(batch, 18);
435     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
436     OUT_BCS_BATCH(batch, qm_type << 0);
437     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
438     ADVANCE_BCS_BATCH(batch);
439 }
440 static void
441 gen7_mfd_wait(VADriverContextP ctx,
442               struct decode_state *decode_state,
443               int standard_select,
444               struct gen7_mfd_context *gen7_mfd_context)
445 {
446     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
447
448     BEGIN_BCS_BATCH(batch, 1);
449     OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8));
450     ADVANCE_BCS_BATCH(batch);
451 }
452
453 static void
454 gen7_mfd_avc_img_state(VADriverContextP ctx,
455                        struct decode_state *decode_state,
456                        struct gen7_mfd_context *gen7_mfd_context)
457 {
458     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
459     int qm_present_flag;
460     int img_struct;
461     int mbaff_frame_flag;
462     unsigned int width_in_mbs, height_in_mbs;
463     VAPictureParameterBufferH264 *pic_param;
464
465     assert(decode_state->pic_param && decode_state->pic_param->buffer);
466     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
467     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
468
469     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
470         qm_present_flag = 1;
471     else
472         qm_present_flag = 0; /* built-in QM matrices */
473
474     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
475         img_struct = 1;
476     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
477         img_struct = 3;
478     else
479         img_struct = 0;
480
481     if ((img_struct & 0x1) == 0x1) {
482         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
483     } else {
484         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
485     }
486
487     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
488         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
489         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
490     } else {
491         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
492     }
493
494     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
495                         !pic_param->pic_fields.bits.field_pic_flag);
496
497     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
498     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
499
500     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
501     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
502            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
503     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
504
505     BEGIN_BCS_BATCH(batch, 16);
506     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
507     OUT_BCS_BATCH(batch, 
508                   width_in_mbs * height_in_mbs);
509     OUT_BCS_BATCH(batch, 
510                   ((height_in_mbs - 1) << 16) | 
511                   ((width_in_mbs - 1) << 0));
512     OUT_BCS_BATCH(batch, 
513                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
514                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
515                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
516                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
517                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
518                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
519                   (img_struct << 8));
520     OUT_BCS_BATCH(batch,
521                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
522                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
523                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
524                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
525                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
526                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
527                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
528                   (mbaff_frame_flag << 1) |
529                   (pic_param->pic_fields.bits.field_pic_flag << 0));
530     OUT_BCS_BATCH(batch, 0);
531     OUT_BCS_BATCH(batch, 0);
532     OUT_BCS_BATCH(batch, 0);
533     OUT_BCS_BATCH(batch, 0);
534     OUT_BCS_BATCH(batch, 0);
535     OUT_BCS_BATCH(batch, 0);
536     OUT_BCS_BATCH(batch, 0);
537     OUT_BCS_BATCH(batch, 0);
538     OUT_BCS_BATCH(batch, 0);
539     OUT_BCS_BATCH(batch, 0);
540     OUT_BCS_BATCH(batch, 0);
541     ADVANCE_BCS_BATCH(batch);
542 }
543
544 static void
545 gen7_mfd_avc_qm_state(VADriverContextP ctx,
546                       struct decode_state *decode_state,
547                       struct gen7_mfd_context *gen7_mfd_context)
548 {
549     VAIQMatrixBufferH264 *iq_matrix;
550     VAPictureParameterBufferH264 *pic_param;
551
552     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
553         return;
554
555     iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
556
557     assert(decode_state->pic_param && decode_state->pic_param->buffer);
558     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
559
560     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
561     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
562
563     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
564         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
565         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
566     }
567 }
568
569 static void
570 gen7_mfd_avc_directmode_state(VADriverContextP ctx,
571                               VAPictureParameterBufferH264 *pic_param,
572                               VASliceParameterBufferH264 *slice_param,
573                               struct gen7_mfd_context *gen7_mfd_context)
574 {
575     struct i965_driver_data *i965 = i965_driver_data(ctx);
576     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
577     struct object_surface *obj_surface;
578     struct gen7_avc_surface *gen7_avc_surface;
579     VAPictureH264 *va_pic;
580     int i, j;
581
582     BEGIN_BCS_BATCH(batch, 69);
583     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
584
585     /* reference surfaces 0..15 */
586     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
587         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
588             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
589             assert(obj_surface);
590             gen7_avc_surface = obj_surface->private_data;
591
592             if (gen7_avc_surface == NULL) {
593                 OUT_BCS_BATCH(batch, 0);
594                 OUT_BCS_BATCH(batch, 0);
595             } else {
596                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
597                               I915_GEM_DOMAIN_INSTRUCTION, 0,
598                               0);
599
600                 if (gen7_avc_surface->dmv_bottom_flag == 1)
601                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
602                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
603                                   0);
604                 else
605                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
606                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
607                                   0);
608             }
609         } else {
610             OUT_BCS_BATCH(batch, 0);
611             OUT_BCS_BATCH(batch, 0);
612         }
613     }
614
615     /* the current decoding frame/field */
616     va_pic = &pic_param->CurrPic;
617     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
618     obj_surface = SURFACE(va_pic->picture_id);
619     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
620     gen7_avc_surface = obj_surface->private_data;
621
622     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
623                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
624                   0);
625
626     if (gen7_avc_surface->dmv_bottom_flag == 1)
627         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
628                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
629                       0);
630     else
631         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
632                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
633                       0);
634
635     /* POC List */
636     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
637         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
638             int found = 0;
639             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
640                 va_pic = &pic_param->ReferenceFrames[j];
641                 
642                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
643                     continue;
644
645                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
646                     found = 1;
647                     break;
648                 }
649             }
650
651             assert(found == 1);
652             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
653             
654             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
655             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
656         } else {
657             OUT_BCS_BATCH(batch, 0);
658             OUT_BCS_BATCH(batch, 0);
659         }
660     }
661
662     va_pic = &pic_param->CurrPic;
663     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
664     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
665
666     ADVANCE_BCS_BATCH(batch);
667 }
668
669 static void
670 gen7_mfd_avc_slice_state(VADriverContextP ctx,
671                          VAPictureParameterBufferH264 *pic_param,
672                          VASliceParameterBufferH264 *slice_param,
673                          VASliceParameterBufferH264 *next_slice_param,
674                          struct gen7_mfd_context *gen7_mfd_context)
675 {
676     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
677     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
678     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
679     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
680     int num_ref_idx_l0, num_ref_idx_l1;
681     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
682                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
683     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
684     int slice_type;
685
686     if (slice_param->slice_type == SLICE_TYPE_I ||
687         slice_param->slice_type == SLICE_TYPE_SI) {
688         slice_type = SLICE_TYPE_I;
689     } else if (slice_param->slice_type == SLICE_TYPE_P ||
690                slice_param->slice_type == SLICE_TYPE_SP) {
691         slice_type = SLICE_TYPE_P;
692     } else { 
693         assert(slice_param->slice_type == SLICE_TYPE_B);
694         slice_type = SLICE_TYPE_B;
695     }
696
697     if (slice_type == SLICE_TYPE_I) {
698         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
699         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
700         num_ref_idx_l0 = 0;
701         num_ref_idx_l1 = 0;
702     } else if (slice_type == SLICE_TYPE_P) {
703         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
704         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
705         num_ref_idx_l1 = 0;
706     } else {
707         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
708         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
709     }
710
711     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
712     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
713     slice_ver_pos = first_mb_in_slice / width_in_mbs;
714
715     if (next_slice_param) {
716         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
717         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
718         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
719     } else {
720         next_slice_hor_pos = 0;
721         next_slice_ver_pos = height_in_mbs;
722     }
723
724     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
725     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
726     OUT_BCS_BATCH(batch, slice_type);
727     OUT_BCS_BATCH(batch, 
728                   (num_ref_idx_l1 << 24) |
729                   (num_ref_idx_l0 << 16) |
730                   (slice_param->chroma_log2_weight_denom << 8) |
731                   (slice_param->luma_log2_weight_denom << 0));
732     OUT_BCS_BATCH(batch, 
733                   (slice_param->direct_spatial_mv_pred_flag << 29) |
734                   (slice_param->disable_deblocking_filter_idc << 27) |
735                   (slice_param->cabac_init_idc << 24) |
736                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
737                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
738                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
739     OUT_BCS_BATCH(batch, 
740                   (slice_ver_pos << 24) |
741                   (slice_hor_pos << 16) | 
742                   (first_mb_in_slice << 0));
743     OUT_BCS_BATCH(batch,
744                   (next_slice_ver_pos << 16) |
745                   (next_slice_hor_pos << 0));
746     OUT_BCS_BATCH(batch, 
747                   (next_slice_param == NULL) << 19); /* last slice flag */
748     OUT_BCS_BATCH(batch, 0);
749     OUT_BCS_BATCH(batch, 0);
750     OUT_BCS_BATCH(batch, 0);
751     OUT_BCS_BATCH(batch, 0);
752     ADVANCE_BCS_BATCH(batch);
753 }
754
755 static void
756 gen7_mfd_avc_ref_idx_state(VADriverContextP ctx,
757                            VAPictureParameterBufferH264 *pic_param,
758                            VASliceParameterBufferH264 *slice_param,
759                            struct gen7_mfd_context *gen7_mfd_context)
760 {
761     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
762     int i, j, num_ref_list;
763     struct {
764         unsigned char bottom_idc:1;
765         unsigned char frame_store_index:4;
766         unsigned char field_picture:1;
767         unsigned char long_term:1;
768         unsigned char non_exist:1;
769     } refs[32];
770
771     if (slice_param->slice_type == SLICE_TYPE_I ||
772         slice_param->slice_type == SLICE_TYPE_SI)
773         return;
774
775     if (slice_param->slice_type == SLICE_TYPE_P ||
776         slice_param->slice_type == SLICE_TYPE_SP) {
777         num_ref_list = 1;
778     } else {
779         num_ref_list = 2;
780     }
781
782     for (i = 0; i < num_ref_list; i++) {
783         VAPictureH264 *va_pic;
784
785         if (i == 0) {
786             va_pic = slice_param->RefPicList0;
787         } else {
788             va_pic = slice_param->RefPicList1;
789         }
790
791         BEGIN_BCS_BATCH(batch, 10);
792         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | (10 - 2));
793         OUT_BCS_BATCH(batch, i);
794
795         for (j = 0; j < 32; j++) {
796             if (va_pic->flags & VA_PICTURE_H264_INVALID) {
797                 refs[j].non_exist = 1;
798                 refs[j].long_term = 1;
799                 refs[j].field_picture = 1;
800                 refs[j].frame_store_index = 0xf;
801                 refs[j].bottom_idc = 1;
802             } else {
803                 int frame_idx;
804                 
805                 for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface); frame_idx++) {
806                     if (gen7_mfd_context->reference_surface[frame_idx].surface_id != VA_INVALID_ID &&
807                         va_pic->picture_id == gen7_mfd_context->reference_surface[frame_idx].surface_id) {
808                         assert(frame_idx == gen7_mfd_context->reference_surface[frame_idx].frame_store_id);
809                         break;
810                     }
811                 }
812
813                 assert(frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface));
814                 
815                 refs[j].non_exist = 0;
816                 refs[j].long_term = !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
817                 refs[j].field_picture = !!(va_pic->flags & 
818                                            (VA_PICTURE_H264_TOP_FIELD | 
819                                             VA_PICTURE_H264_BOTTOM_FIELD));
820                 refs[j].frame_store_index = frame_idx;
821                 refs[j].bottom_idc = !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
822             }
823
824             va_pic++;
825         }
826         
827         intel_batchbuffer_data(batch, refs, sizeof(refs));
828         ADVANCE_BCS_BATCH(batch);
829     }
830 }
831
832 static void
833 gen7_mfd_avc_weightoffset_state(VADriverContextP ctx,
834                                 VAPictureParameterBufferH264 *pic_param,
835                                 VASliceParameterBufferH264 *slice_param,
836                                 struct gen7_mfd_context *gen7_mfd_context)
837 {
838     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
839     int i, j, num_weight_offset_table = 0;
840     short weightoffsets[32 * 6];
841
842     if ((slice_param->slice_type == SLICE_TYPE_P ||
843          slice_param->slice_type == SLICE_TYPE_SP) &&
844         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
845         num_weight_offset_table = 1;
846     }
847     
848     if ((slice_param->slice_type == SLICE_TYPE_B) &&
849         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
850         num_weight_offset_table = 2;
851     }
852
853     for (i = 0; i < num_weight_offset_table; i++) {
854         BEGIN_BCS_BATCH(batch, 98);
855         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
856         OUT_BCS_BATCH(batch, i);
857
858         if (i == 0) {
859             for (j = 0; j < 32; j++) {
860                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
861                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
862                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
863                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
864                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
865                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
866             }
867         } else {
868             for (j = 0; j < 32; j++) {
869                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
870                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
871                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
872                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
873                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
874                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
875             }
876         }
877
878         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
879         ADVANCE_BCS_BATCH(batch);
880     }
881 }
882
883 static int
884 gen7_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
885 {
886     int out_slice_data_bit_offset;
887     int slice_header_size = in_slice_data_bit_offset / 8;
888     int i, j;
889
890     for (i = 0, j = 0; i < slice_header_size; i++, j++) {
891         if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
892             i++, j += 2;
893         }
894     }
895
896     out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
897
898     if (mode_flag == ENTROPY_CABAC)
899         out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
900
901     return out_slice_data_bit_offset;
902 }
903
904 static void
905 gen7_mfd_avc_bsd_object(VADriverContextP ctx,
906                         VAPictureParameterBufferH264 *pic_param,
907                         VASliceParameterBufferH264 *slice_param,
908                         dri_bo *slice_data_bo,
909                         VASliceParameterBufferH264 *next_slice_param,
910                         struct gen7_mfd_context *gen7_mfd_context)
911 {
912     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
913     int slice_data_bit_offset;
914     uint8_t *slice_data = NULL;
915
916     dri_bo_map(slice_data_bo, 0);
917     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
918     slice_data_bit_offset = gen7_mfd_avc_get_slice_bit_offset(slice_data,
919                                                               pic_param->pic_fields.bits.entropy_coding_mode_flag,
920                                                               slice_param->slice_data_bit_offset);
921     dri_bo_unmap(slice_data_bo);
922
923     /* the input bitsteam format on GEN7 differs from GEN6 */
924     BEGIN_BCS_BATCH(batch, 6);
925     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
926     OUT_BCS_BATCH(batch, 
927                   (slice_param->slice_data_size));
928     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
929     OUT_BCS_BATCH(batch,
930                   (0 << 31) |
931                   (0 << 14) |
932                   (0 << 12) |
933                   (0 << 10) |
934                   (0 << 8));
935     OUT_BCS_BATCH(batch,
936                   ((slice_data_bit_offset >> 3) << 16) |
937                   (0 << 5)  |
938                   (0 << 4)  |
939                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
940                   (slice_data_bit_offset & 0x7));
941     OUT_BCS_BATCH(batch, 0);
942     ADVANCE_BCS_BATCH(batch);
943 }
944
945 static void
946 gen7_mfd_avc_decode_init(VADriverContextP ctx,
947                          struct decode_state *decode_state,
948                          struct gen7_mfd_context *gen7_mfd_context)
949 {
950     VAPictureParameterBufferH264 *pic_param;
951     VASliceParameterBufferH264 *slice_param;
952     VAPictureH264 *va_pic;
953     struct i965_driver_data *i965 = i965_driver_data(ctx);
954     struct object_surface *obj_surface;
955     dri_bo *bo;
956     int i, j, enable_avc_ildb = 0;
957
958     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
959         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
960         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
961
962         assert(decode_state->slice_params[j]->num_elements == 1);
963         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
964             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
965             assert((slice_param->slice_type == SLICE_TYPE_I) ||
966                    (slice_param->slice_type == SLICE_TYPE_SI) ||
967                    (slice_param->slice_type == SLICE_TYPE_P) ||
968                    (slice_param->slice_type == SLICE_TYPE_SP) ||
969                    (slice_param->slice_type == SLICE_TYPE_B));
970
971             if (slice_param->disable_deblocking_filter_idc != 1) {
972                 enable_avc_ildb = 1;
973                 break;
974             }
975
976             slice_param++;
977         }
978     }
979
980     assert(decode_state->pic_param && decode_state->pic_param->buffer);
981     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
982     gen7_mfd_avc_frame_store_index(ctx, pic_param, gen7_mfd_context);
983
984     /* Current decoded picture */
985     va_pic = &pic_param->CurrPic;
986     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
987     obj_surface = SURFACE(va_pic->picture_id);
988     assert(obj_surface);
989     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
990     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
991     gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface);
992     i965_check_alloc_surface_bo(ctx, obj_surface, 1);
993
994     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
995     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
996     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
997     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
998
999     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1000     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1001     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1002     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
1003
1004     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1005     bo = dri_bo_alloc(i965->intel.bufmgr,
1006                       "intra row store",
1007                       128 * 64,
1008                       0x1000);
1009     assert(bo);
1010     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1011     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1012
1013     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1014     bo = dri_bo_alloc(i965->intel.bufmgr,
1015                       "deblocking filter row store",
1016                       30720, /* 4 * 120 * 64 */
1017                       0x1000);
1018     assert(bo);
1019     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1020     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1021
1022     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1023     bo = dri_bo_alloc(i965->intel.bufmgr,
1024                       "bsd mpc row store",
1025                       11520, /* 1.5 * 120 * 64 */
1026                       0x1000);
1027     assert(bo);
1028     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1029     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1030
1031     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
1032     bo = dri_bo_alloc(i965->intel.bufmgr,
1033                       "mpr row store",
1034                       7680, /* 1. 0 * 120 * 64 */
1035                       0x1000);
1036     assert(bo);
1037     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
1038     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
1039
1040     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1041 }
1042
1043 static void
1044 gen7_mfd_avc_decode_picture(VADriverContextP ctx,
1045                             struct decode_state *decode_state,
1046                             struct gen7_mfd_context *gen7_mfd_context)
1047 {
1048     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1049     VAPictureParameterBufferH264 *pic_param;
1050     VASliceParameterBufferH264 *slice_param, *next_slice_param;
1051     dri_bo *slice_data_bo;
1052     int i, j;
1053
1054     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1055     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1056     gen7_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
1057
1058     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1059     intel_batchbuffer_emit_mi_flush(batch);
1060     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1061     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1062     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1063     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1064     gen7_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
1065     gen7_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
1066
1067     for (j = 0; j < decode_state->num_slice_params; j++) {
1068         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1069         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1070         slice_data_bo = decode_state->slice_datas[j]->bo;
1071
1072         if (j == decode_state->num_slice_params - 1)
1073             next_slice_param = NULL;
1074         else
1075             next_slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1076
1077         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
1078         assert(decode_state->slice_params[j]->num_elements == 1);
1079
1080         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1081             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1082             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1083                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1084                    (slice_param->slice_type == SLICE_TYPE_P) ||
1085                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1086                    (slice_param->slice_type == SLICE_TYPE_B));
1087
1088             if (i < decode_state->slice_params[j]->num_elements - 1)
1089                 next_slice_param = slice_param + 1;
1090
1091             gen7_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen7_mfd_context);
1092             gen7_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
1093             gen7_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
1094             gen7_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1095             gen7_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
1096             slice_param++;
1097         }
1098     }
1099
1100     intel_batchbuffer_end_atomic(batch);
1101     intel_batchbuffer_flush(batch);
1102 }
1103
1104 static void
1105 gen7_mfd_mpeg2_decode_init(VADriverContextP ctx,
1106                            struct decode_state *decode_state,
1107                            struct gen7_mfd_context *gen7_mfd_context)
1108 {
1109     VAPictureParameterBufferMPEG2 *pic_param;
1110     struct i965_driver_data *i965 = i965_driver_data(ctx);
1111     struct object_surface *obj_surface;
1112     int i;
1113     dri_bo *bo;
1114
1115     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1116     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1117
1118     /* reference picture */
1119     obj_surface = SURFACE(pic_param->forward_reference_picture);
1120
1121     if (obj_surface && obj_surface->bo)
1122         gen7_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1123     else
1124         gen7_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1125
1126     obj_surface = SURFACE(pic_param->backward_reference_picture);
1127
1128     if (obj_surface && obj_surface->bo)
1129         gen7_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1130     else
1131         gen7_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1132
1133     /* must do so !!! */
1134     for (i = 2; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++)
1135         gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[i % 2].surface_id;
1136
1137     /* Current decoded picture */
1138     obj_surface = SURFACE(decode_state->current_render_target);
1139     assert(obj_surface);
1140     i965_check_alloc_surface_bo(ctx, obj_surface, 1);
1141
1142     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1143     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1144     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1145     gen7_mfd_context->pre_deblocking_output.valid = 1;
1146
1147     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1148     bo = dri_bo_alloc(i965->intel.bufmgr,
1149                       "bsd mpc row store",
1150                       11520, /* 1.5 * 120 * 64 */
1151                       0x1000);
1152     assert(bo);
1153     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1154     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1155
1156     gen7_mfd_context->post_deblocking_output.valid = 0;
1157     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1158     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1159     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1160     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1161 }
1162
1163 static void
1164 gen7_mfd_mpeg2_pic_state(VADriverContextP ctx,
1165                          struct decode_state *decode_state,
1166                          struct gen7_mfd_context *gen7_mfd_context)
1167 {
1168     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1169     VAPictureParameterBufferMPEG2 *pic_param;
1170
1171     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1172     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1173
1174     BEGIN_BCS_BATCH(batch, 13);
1175     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1176     OUT_BCS_BATCH(batch,
1177                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1178                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1179                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1180                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1181                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1182                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1183                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1184                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1185                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1186                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1187                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1188                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1189     OUT_BCS_BATCH(batch,
1190                   pic_param->picture_coding_type << 9);
1191     OUT_BCS_BATCH(batch,
1192                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1193                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1194     OUT_BCS_BATCH(batch, 0);
1195     OUT_BCS_BATCH(batch, 0);
1196     OUT_BCS_BATCH(batch, 0);
1197     OUT_BCS_BATCH(batch, 0);
1198     OUT_BCS_BATCH(batch, 0);
1199     OUT_BCS_BATCH(batch, 0);
1200     OUT_BCS_BATCH(batch, 0);
1201     OUT_BCS_BATCH(batch, 0);
1202     OUT_BCS_BATCH(batch, 0);
1203     ADVANCE_BCS_BATCH(batch);
1204 }
1205
1206 static void
1207 gen7_mfd_mpeg2_qm_state(VADriverContextP ctx,
1208                         struct decode_state *decode_state,
1209                         struct gen7_mfd_context *gen7_mfd_context)
1210 {
1211     VAIQMatrixBufferMPEG2 *iq_matrix;
1212     int i;
1213
1214     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
1215         return;
1216
1217     iq_matrix = (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1218
1219     for (i = 0; i < 2; i++) {
1220         int k, m;
1221         unsigned char *qm = NULL;
1222         unsigned char qmx[64];
1223         int qm_type;
1224
1225         if (i == 0) {
1226             if (iq_matrix->load_intra_quantiser_matrix) {
1227                 qm = iq_matrix->intra_quantiser_matrix;
1228                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1229             }
1230         } else {
1231             if (iq_matrix->load_non_intra_quantiser_matrix) {
1232                 qm = iq_matrix->non_intra_quantiser_matrix;
1233                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1234             }
1235         }
1236
1237         if (!qm)
1238             continue;
1239
1240         /* Upload quantisation matrix in raster order. The mplayer vaapi
1241          * patch passes quantisation matrix in zig-zag order to va library.
1242          */
1243         for (k = 0; k < 64; k++) {
1244             m = zigzag_direct[k];
1245             qmx[m] = qm[k];
1246         }
1247
1248         gen7_mfd_qm_state(ctx, qm_type, qmx, 64, gen7_mfd_context);
1249     }
1250 }
1251
1252 static void
1253 gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1254                           VAPictureParameterBufferMPEG2 *pic_param,
1255                           VASliceParameterBufferMPEG2 *slice_param,
1256                           VASliceParameterBufferMPEG2 *next_slice_param,
1257                           struct gen7_mfd_context *gen7_mfd_context)
1258 {
1259     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1260     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1261     unsigned int height_in_mbs = ALIGN(pic_param->vertical_size, 16) / 16;
1262     int mb_count;
1263
1264     if (next_slice_param == NULL)
1265         mb_count = width_in_mbs * height_in_mbs - 
1266             (slice_param->slice_vertical_position * width_in_mbs + slice_param->slice_horizontal_position);
1267     else
1268         mb_count = (next_slice_param->slice_vertical_position * width_in_mbs + next_slice_param->slice_horizontal_position) - 
1269             (slice_param->slice_vertical_position * width_in_mbs + slice_param->slice_horizontal_position);
1270
1271     BEGIN_BCS_BATCH(batch, 5);
1272     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1273     OUT_BCS_BATCH(batch, 
1274                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1275     OUT_BCS_BATCH(batch, 
1276                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1277     OUT_BCS_BATCH(batch,
1278                   slice_param->slice_horizontal_position << 24 |
1279                   slice_param->slice_vertical_position << 16 |
1280                   mb_count << 8 |
1281                   (next_slice_param == NULL) << 5 |
1282                   (next_slice_param == NULL) << 3 |
1283                   (slice_param->macroblock_offset & 0x7));
1284     OUT_BCS_BATCH(batch,
1285                   slice_param->quantiser_scale_code << 24);
1286     ADVANCE_BCS_BATCH(batch);
1287 }
1288
1289 static void
1290 gen7_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1291                               struct decode_state *decode_state,
1292                               struct gen7_mfd_context *gen7_mfd_context)
1293 {
1294     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1295     VAPictureParameterBufferMPEG2 *pic_param;
1296     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param;
1297     dri_bo *slice_data_bo;
1298     int i, j;
1299
1300     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1301     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1302
1303     gen7_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1304     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1305     intel_batchbuffer_emit_mi_flush(batch);
1306     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1307     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1308     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1309     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1310     gen7_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1311     gen7_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1312
1313     assert(decode_state->num_slice_params == 1);
1314     for (j = 0; j < decode_state->num_slice_params; j++) {
1315         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1316         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1317         slice_data_bo = decode_state->slice_datas[j]->bo;
1318         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1319
1320         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1321             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1322
1323             if (i < decode_state->slice_params[j]->num_elements - 1)
1324                 next_slice_param = slice_param + 1;
1325             else
1326                 next_slice_param = NULL;
1327
1328             gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1329             slice_param++;
1330         }
1331     }
1332
1333     intel_batchbuffer_end_atomic(batch);
1334     intel_batchbuffer_flush(batch);
1335 }
1336
1337 static const int va_to_gen7_vc1_pic_type[5] = {
1338     GEN7_VC1_I_PICTURE,
1339     GEN7_VC1_P_PICTURE,
1340     GEN7_VC1_B_PICTURE,
1341     GEN7_VC1_BI_PICTURE,
1342     GEN7_VC1_P_PICTURE,
1343 };
1344
1345 static const int va_to_gen7_vc1_mv[4] = {
1346     1, /* 1-MV */
1347     2, /* 1-MV half-pel */
1348     3, /* 1-MV half-pef bilinear */
1349     0, /* Mixed MV */
1350 };
1351
1352 static const int b_picture_scale_factor[21] = {
1353     128, 85,  170, 64,  192,
1354     51,  102, 153, 204, 43,
1355     215, 37,  74,  111, 148,
1356     185, 222, 32,  96,  160, 
1357     224,
1358 };
1359
1360 static const int va_to_gen7_vc1_condover[3] = {
1361     0,
1362     2,
1363     3
1364 };
1365
1366 static const int va_to_gen7_vc1_profile[4] = {
1367     GEN7_VC1_SIMPLE_PROFILE,
1368     GEN7_VC1_MAIN_PROFILE,
1369     GEN7_VC1_RESERVED_PROFILE,
1370     GEN7_VC1_ADVANCED_PROFILE
1371 };
1372
1373 static const int va_to_gen7_vc1_ttfrm[8] = {
1374     0,  /* 8x8 */
1375     1,  /* 8x4 bottom */
1376     1,  /* 8x4 top */
1377     1,  /* 8x4 */
1378     2,  /* 4x8 bottom */
1379     2,  /* 4x8 top */
1380     2,  /* 4x8 */
1381     3,  /* 4x4 */
1382 };
1383
1384 static void 
1385 gen7_mfd_free_vc1_surface(void **data)
1386 {
1387     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1388
1389     if (!gen7_vc1_surface)
1390         return;
1391
1392     dri_bo_unreference(gen7_vc1_surface->dmv);
1393     free(gen7_vc1_surface);
1394     *data = NULL;
1395 }
1396
1397 static void
1398 gen7_mfd_init_vc1_surface(VADriverContextP ctx, 
1399                           VAPictureParameterBufferVC1 *pic_param,
1400                           struct object_surface *obj_surface)
1401 {
1402     struct i965_driver_data *i965 = i965_driver_data(ctx);
1403     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1404
1405     obj_surface->free_private_data = gen7_mfd_free_vc1_surface;
1406
1407     if (!gen7_vc1_surface) {
1408         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1409         assert((obj_surface->size & 0x3f) == 0);
1410         obj_surface->private_data = gen7_vc1_surface;
1411     }
1412
1413     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1414
1415     if (gen7_vc1_surface->dmv == NULL) {
1416         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1417                                              "direct mv w/r buffer",
1418                                              557056,    /* 64 * 128 * 64 */
1419                                              0x1000);
1420     }
1421 }
1422
1423 static void
1424 gen7_mfd_vc1_decode_init(VADriverContextP ctx,
1425                          struct decode_state *decode_state,
1426                          struct gen7_mfd_context *gen7_mfd_context)
1427 {
1428     VAPictureParameterBufferVC1 *pic_param;
1429     struct i965_driver_data *i965 = i965_driver_data(ctx);
1430     struct object_surface *obj_surface;
1431     int i;
1432     dri_bo *bo;
1433
1434     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1435     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1436
1437     /* reference picture */
1438     obj_surface = SURFACE(pic_param->forward_reference_picture);
1439
1440     if (obj_surface && obj_surface->bo)
1441         gen7_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1442     else
1443         gen7_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1444
1445     obj_surface = SURFACE(pic_param->backward_reference_picture);
1446
1447     if (obj_surface && obj_surface->bo)
1448         gen7_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1449     else
1450         gen7_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1451
1452     /* must do so !!! */
1453     for (i = 2; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++)
1454         gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[i % 2].surface_id;
1455
1456     /* Current decoded picture */
1457     obj_surface = SURFACE(decode_state->current_render_target);
1458     assert(obj_surface);
1459     gen7_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1460     i965_check_alloc_surface_bo(ctx, obj_surface, 1);
1461
1462     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1463     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1464     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1465     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1466
1467     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1468     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1469     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1470     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1471
1472     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1473     bo = dri_bo_alloc(i965->intel.bufmgr,
1474                       "intra row store",
1475                       128 * 64,
1476                       0x1000);
1477     assert(bo);
1478     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1479     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1480
1481     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1482     bo = dri_bo_alloc(i965->intel.bufmgr,
1483                       "deblocking filter row store",
1484                       46080, /* 6 * 120 * 64 */
1485                       0x1000);
1486     assert(bo);
1487     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1488     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1489
1490     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1491     bo = dri_bo_alloc(i965->intel.bufmgr,
1492                       "bsd mpc row store",
1493                       11520, /* 1.5 * 120 * 64 */
1494                       0x1000);
1495     assert(bo);
1496     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1497     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1498
1499     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1500
1501     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1502     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1503     
1504     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1505         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1506         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1507         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1508         int src_w, src_h;
1509         uint8_t *src = NULL, *dst = NULL;
1510
1511         assert(decode_state->bit_plane->buffer);
1512         src = decode_state->bit_plane->buffer;
1513
1514         bo = dri_bo_alloc(i965->intel.bufmgr,
1515                           "VC-1 Bitplane",
1516                           bitplane_width * bitplane_width,
1517                           0x1000);
1518         assert(bo);
1519         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1520
1521         dri_bo_map(bo, True);
1522         assert(bo->virtual);
1523         dst = bo->virtual;
1524
1525         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1526             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1527                 int src_index, dst_index;
1528                 int src_shift;
1529                 uint8_t src_value;
1530
1531                 src_index = (src_h * width_in_mbs + src_w) / 2;
1532                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1533                 src_value = ((src[src_index] >> src_shift) & 0xf);
1534
1535                 dst_index = src_w / 2;
1536                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1537             }
1538
1539             if (src_w & 1)
1540                 dst[src_w / 2] >>= 4;
1541
1542             dst += bitplane_width;
1543         }
1544
1545         dri_bo_unmap(bo);
1546     } else
1547         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1548 }
1549
1550 static void
1551 gen7_mfd_vc1_pic_state(VADriverContextP ctx,
1552                        struct decode_state *decode_state,
1553                        struct gen7_mfd_context *gen7_mfd_context)
1554 {
1555     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1556     VAPictureParameterBufferVC1 *pic_param;
1557     struct i965_driver_data *i965 = i965_driver_data(ctx);
1558     struct object_surface *obj_surface;
1559     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1560     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1561     int unified_mv_mode;
1562     int ref_field_pic_polarity = 0;
1563     int scale_factor = 0;
1564     int trans_ac_y = 0;
1565     int dmv_surface_valid = 0;
1566     int brfd = 0;
1567     int fcm = 0;
1568     int picture_type;
1569     int profile;
1570     int overlap;
1571     int interpolation_mode = 0;
1572
1573     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1574     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1575
1576     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1577     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1578     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1579     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1580     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1581     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1582     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1583     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1584
1585     if (dquant == 0) {
1586         alt_pquant_config = 0;
1587         alt_pquant_edge_mask = 0;
1588     } else if (dquant == 2) {
1589         alt_pquant_config = 1;
1590         alt_pquant_edge_mask = 0xf;
1591     } else {
1592         assert(dquant == 1);
1593         if (dquantfrm == 0) {
1594             alt_pquant_config = 0;
1595             alt_pquant_edge_mask = 0;
1596             alt_pq = 0;
1597         } else {
1598             assert(dquantfrm == 1);
1599             alt_pquant_config = 1;
1600
1601             switch (dqprofile) {
1602             case 3:
1603                 if (dqbilevel == 0) {
1604                     alt_pquant_config = 2;
1605                     alt_pquant_edge_mask = 0;
1606                 } else {
1607                     assert(dqbilevel == 1);
1608                     alt_pquant_config = 3;
1609                     alt_pquant_edge_mask = 0;
1610                 }
1611                 break;
1612                 
1613             case 0:
1614                 alt_pquant_edge_mask = 0xf;
1615                 break;
1616
1617             case 1:
1618                 if (dqdbedge == 3)
1619                     alt_pquant_edge_mask = 0x9;
1620                 else
1621                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1622
1623                 break;
1624
1625             case 2:
1626                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1627                 break;
1628
1629             default:
1630                 assert(0);
1631             }
1632         }
1633     }
1634
1635     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1636         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1637         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1638     } else {
1639         assert(pic_param->mv_fields.bits.mv_mode < 4);
1640         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1641     }
1642
1643     if (pic_param->sequence_fields.bits.interlace == 1 &&
1644         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1645         /* FIXME: calculate reference field picture polarity */
1646         assert(0);
1647         ref_field_pic_polarity = 0;
1648     }
1649
1650     if (pic_param->b_picture_fraction < 21)
1651         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1652
1653     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1654     
1655     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1656         picture_type == GEN7_VC1_I_PICTURE)
1657         picture_type = GEN7_VC1_BI_PICTURE;
1658
1659     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1660         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1661     else
1662         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1663
1664
1665     if (picture_type == GEN7_VC1_B_PICTURE) {
1666         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1667
1668         obj_surface = SURFACE(pic_param->backward_reference_picture);
1669         assert(obj_surface);
1670         gen7_vc1_surface = obj_surface->private_data;
1671
1672         if (!gen7_vc1_surface || 
1673             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1674              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1675             dmv_surface_valid = 0;
1676         else
1677             dmv_surface_valid = 1;
1678     }
1679
1680     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1681
1682     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1683         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1684     else {
1685         if (pic_param->picture_fields.bits.top_field_first)
1686             fcm = 2;
1687         else
1688             fcm = 3;
1689     }
1690
1691     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1692         brfd = pic_param->reference_fields.bits.reference_distance;
1693         brfd = (scale_factor * brfd) >> 8;
1694         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1695
1696         if (brfd < 0)
1697             brfd = 0;
1698     }
1699
1700     overlap = pic_param->sequence_fields.bits.overlap;
1701     if (profile != GEN7_VC1_ADVANCED_PROFILE && pic_param->pic_quantizer_fields.bits.pic_quantizer_scale < 9)
1702         overlap = 0;
1703
1704     assert(pic_param->conditional_overlap_flag < 3);
1705     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1706
1707     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1708         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1709          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1710         interpolation_mode = 8; /* Half-pel bilinear */
1711     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1712              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1713               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1714         interpolation_mode = 0; /* Half-pel bicubic */
1715     else
1716         interpolation_mode = 1; /* Quarter-pel bicubic */
1717
1718     BEGIN_BCS_BATCH(batch, 6);
1719     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1720     OUT_BCS_BATCH(batch,
1721                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1722                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1723     OUT_BCS_BATCH(batch,
1724                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1725                   dmv_surface_valid << 15 |
1726                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1727                   pic_param->rounding_control << 13 |
1728                   pic_param->sequence_fields.bits.syncmarker << 12 |
1729                   interpolation_mode << 8 |
1730                   0 << 7 | /* FIXME: scale up or down ??? */
1731                   pic_param->range_reduction_frame << 6 |
1732                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1733                   overlap << 4 |
1734                   !pic_param->picture_fields.bits.is_first_field << 3 |
1735                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1736     OUT_BCS_BATCH(batch,
1737                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1738                   picture_type << 26 |
1739                   fcm << 24 |
1740                   alt_pq << 16 |
1741                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1742                   scale_factor << 0);
1743     OUT_BCS_BATCH(batch,
1744                   unified_mv_mode << 28 |
1745                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1746                   pic_param->fast_uvmc_flag << 26 |
1747                   ref_field_pic_polarity << 25 |
1748                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1749                   pic_param->reference_fields.bits.reference_distance << 20 |
1750                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1751                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1752                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1753                   alt_pquant_edge_mask << 4 |
1754                   alt_pquant_config << 2 |
1755                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1756                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1757     OUT_BCS_BATCH(batch,
1758                   !!pic_param->bitplane_present.value << 31 |
1759                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1760                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1761                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1762                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1763                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1764                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1765                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1766                   pic_param->mv_fields.bits.mv_table << 20 |
1767                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1768                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1769                   va_to_gen7_vc1_ttfrm[pic_param->transform_fields.bits.frame_level_transform_type] << 12 |                  
1770                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1771                   pic_param->mb_mode_table << 8 |
1772                   trans_ac_y << 6 |
1773                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1774                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1775                   pic_param->cbp_table << 0);
1776     ADVANCE_BCS_BATCH(batch);
1777 }
1778
1779 static void
1780 gen7_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1781                              struct decode_state *decode_state,
1782                              struct gen7_mfd_context *gen7_mfd_context)
1783 {
1784     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1785     VAPictureParameterBufferVC1 *pic_param;
1786     int intensitycomp_single;
1787
1788     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1789     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1790
1791     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1792     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1793     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1794
1795     BEGIN_BCS_BATCH(batch, 6);
1796     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1797     OUT_BCS_BATCH(batch,
1798                   0 << 14 | /* FIXME: double ??? */
1799                   0 << 12 |
1800                   intensitycomp_single << 10 |
1801                   intensitycomp_single << 8 |
1802                   0 << 4 | /* FIXME: interlace mode */
1803                   0);
1804     OUT_BCS_BATCH(batch,
1805                   pic_param->luma_shift << 16 |
1806                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1807     OUT_BCS_BATCH(batch, 0);
1808     OUT_BCS_BATCH(batch, 0);
1809     OUT_BCS_BATCH(batch, 0);
1810     ADVANCE_BCS_BATCH(batch);
1811 }
1812
1813
1814 static void
1815 gen7_mfd_vc1_directmode_state(VADriverContextP ctx,
1816                               struct decode_state *decode_state,
1817                               struct gen7_mfd_context *gen7_mfd_context)
1818 {
1819     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1820     VAPictureParameterBufferVC1 *pic_param;
1821     struct i965_driver_data *i965 = i965_driver_data(ctx);
1822     struct object_surface *obj_surface;
1823     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1824
1825     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1826     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1827
1828     obj_surface = SURFACE(decode_state->current_render_target);
1829
1830     if (obj_surface && obj_surface->private_data) {
1831         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1832     }
1833
1834     obj_surface = SURFACE(pic_param->backward_reference_picture);
1835
1836     if (obj_surface && obj_surface->private_data) {
1837         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1838     }
1839
1840     BEGIN_BCS_BATCH(batch, 3);
1841     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
1842
1843     if (dmv_write_buffer)
1844         OUT_BCS_RELOC(batch, dmv_write_buffer,
1845                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1846                       0);
1847     else
1848         OUT_BCS_BATCH(batch, 0);
1849
1850     if (dmv_read_buffer)
1851         OUT_BCS_RELOC(batch, dmv_read_buffer,
1852                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1853                       0);
1854     else
1855         OUT_BCS_BATCH(batch, 0);
1856                   
1857     ADVANCE_BCS_BATCH(batch);
1858 }
1859
1860 static int
1861 gen7_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1862 {
1863     int out_slice_data_bit_offset;
1864     int slice_header_size = in_slice_data_bit_offset / 8;
1865     int i, j;
1866
1867     if (profile != 3)
1868         out_slice_data_bit_offset = in_slice_data_bit_offset;
1869     else {
1870         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1871             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1872                 i++, j += 2;
1873             }
1874         }
1875
1876         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1877     }
1878
1879     return out_slice_data_bit_offset;
1880 }
1881
1882 static void
1883 gen7_mfd_vc1_bsd_object(VADriverContextP ctx,
1884                         VAPictureParameterBufferVC1 *pic_param,
1885                         VASliceParameterBufferVC1 *slice_param,
1886                         VASliceParameterBufferVC1 *next_slice_param,
1887                         dri_bo *slice_data_bo,
1888                         struct gen7_mfd_context *gen7_mfd_context)
1889 {
1890     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1891     int next_slice_start_vert_pos;
1892     int macroblock_offset;
1893     uint8_t *slice_data = NULL;
1894
1895     dri_bo_map(slice_data_bo, 0);
1896     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1897     macroblock_offset = gen7_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1898                                                                slice_param->macroblock_offset,
1899                                                                pic_param->sequence_fields.bits.profile);
1900     dri_bo_unmap(slice_data_bo);
1901
1902     if (next_slice_param)
1903         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1904     else
1905         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1906
1907     BEGIN_BCS_BATCH(batch, 5);
1908     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1909     OUT_BCS_BATCH(batch, 
1910                   slice_param->slice_data_size - (macroblock_offset >> 3));
1911     OUT_BCS_BATCH(batch, 
1912                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1913     OUT_BCS_BATCH(batch,
1914                   slice_param->slice_vertical_position << 16 |
1915                   next_slice_start_vert_pos << 0);
1916     OUT_BCS_BATCH(batch,
1917                   (macroblock_offset & 0x7));
1918     ADVANCE_BCS_BATCH(batch);
1919 }
1920
1921 static void
1922 gen7_mfd_vc1_decode_picture(VADriverContextP ctx,
1923                             struct decode_state *decode_state,
1924                             struct gen7_mfd_context *gen7_mfd_context)
1925 {
1926     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1927     VAPictureParameterBufferVC1 *pic_param;
1928     VASliceParameterBufferVC1 *slice_param, *next_slice_param;
1929     dri_bo *slice_data_bo;
1930     int i, j;
1931
1932     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1933     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1934
1935     gen7_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1936     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1937     intel_batchbuffer_emit_mi_flush(batch);
1938     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1939     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1940     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1941     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1942     gen7_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1943     gen7_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1944     gen7_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1945
1946     assert(decode_state->num_slice_params == 1);
1947     for (j = 0; j < decode_state->num_slice_params; j++) {
1948         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1949         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1950         slice_data_bo = decode_state->slice_datas[j]->bo;
1951         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1952
1953         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1954             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1955
1956             if (i < decode_state->slice_params[j]->num_elements - 1)
1957                 next_slice_param = slice_param + 1;
1958             else
1959                 next_slice_param = NULL;
1960
1961             gen7_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1962             slice_param++;
1963         }
1964     }
1965
1966     intel_batchbuffer_end_atomic(batch);
1967     intel_batchbuffer_flush(batch);
1968 }
1969
1970 static void 
1971 gen7_mfd_decode_picture(VADriverContextP ctx, 
1972                         VAProfile profile, 
1973                         union codec_state *codec_state,
1974                         struct hw_context *hw_context)
1975
1976 {
1977     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
1978     struct decode_state *decode_state = &codec_state->dec;
1979
1980     assert(gen7_mfd_context);
1981
1982     switch (profile) {
1983     case VAProfileMPEG2Simple:
1984     case VAProfileMPEG2Main:
1985         gen7_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
1986         break;
1987         
1988     case VAProfileH264Baseline:
1989     case VAProfileH264Main:
1990     case VAProfileH264High:
1991         gen7_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
1992         break;
1993
1994     case VAProfileVC1Simple:
1995     case VAProfileVC1Main:
1996     case VAProfileVC1Advanced:
1997         gen7_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
1998         break;
1999
2000     default:
2001         assert(0);
2002         break;
2003     }
2004 }
2005
2006 static void
2007 gen7_mfd_context_destroy(void *hw_context)
2008 {
2009     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2010
2011     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2012     gen7_mfd_context->post_deblocking_output.bo = NULL;
2013
2014     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2015     gen7_mfd_context->pre_deblocking_output.bo = NULL;
2016
2017     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2018     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2019
2020     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2021     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2022
2023     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2024     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2025
2026     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2027     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2028
2029     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
2030     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2031
2032     intel_batchbuffer_free(gen7_mfd_context->base.batch);
2033     free(gen7_mfd_context);
2034 }
2035
2036 struct hw_context *
2037 gen7_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
2038 {
2039     struct intel_driver_data *intel = intel_driver_data(ctx);
2040     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
2041     int i;
2042
2043     gen7_mfd_context->base.destroy = gen7_mfd_context_destroy;
2044     gen7_mfd_context->base.run = gen7_mfd_decode_picture;
2045     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
2046
2047     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
2048         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
2049         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
2050     }
2051
2052     return (struct hw_context *)gen7_mfd_context;
2053 }