Avoid depending on va_backend.h for some files
[platform/upstream/libva-intel-driver.git] / src / gen7_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33
34 #include "intel_batchbuffer.h"
35 #include "intel_driver.h"
36
37 #include "i965_defines.h"
38 #include "i965_drv_video.h"
39
40 #include "gen7_mfd.h"
41
42 #define DMV_SIZE        0x88000 /* 557056 bytes for a frame */
43
44 static const uint32_t zigzag_direct[64] = {
45     0,   1,  8, 16,  9,  2,  3, 10,
46     17, 24, 32, 25, 18, 11,  4,  5,
47     12, 19, 26, 33, 40, 48, 41, 34,
48     27, 20, 13,  6,  7, 14, 21, 28,
49     35, 42, 49, 56, 57, 50, 43, 36,
50     29, 22, 15, 23, 30, 37, 44, 51,
51     58, 59, 52, 45, 38, 31, 39, 46,
52     53, 60, 61, 54, 47, 55, 62, 63
53 };
54
55 static void
56 gen7_mfd_avc_frame_store_index(VADriverContextP ctx,
57                                VAPictureParameterBufferH264 *pic_param,
58                                struct gen7_mfd_context *gen7_mfd_context)
59 {
60     struct i965_driver_data *i965 = i965_driver_data(ctx);
61     int i, j;
62
63     assert(ARRAY_ELEMS(gen7_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
64
65     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
66         int found = 0;
67
68         if (gen7_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
69             continue;
70
71         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
72             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
73             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
74                 continue;
75
76             if (gen7_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
77                 found = 1;
78                 break;
79             }
80         }
81
82         if (!found) {
83             struct object_surface *obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
84             obj_surface->flags &= ~SURFACE_REFERENCED;
85
86             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
87                 dri_bo_unreference(obj_surface->bo);
88                 obj_surface->bo = NULL;
89                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
90             }
91
92             if (obj_surface->free_private_data)
93                 obj_surface->free_private_data(&obj_surface->private_data);
94
95             gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
96             gen7_mfd_context->reference_surface[i].frame_store_id = -1;
97         }
98     }
99
100     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
101         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
102         int found = 0;
103
104         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
105             continue;
106
107         for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
108             if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
109                 continue;
110             
111             if (gen7_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
112                 found = 1;
113                 break;
114             }
115         }
116
117         if (!found) {
118             int frame_idx;
119             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
120             
121             assert(obj_surface);
122             i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
123
124             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface); frame_idx++) {
125                 for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
126                     if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
127                         continue;
128
129                     if (gen7_mfd_context->reference_surface[j].frame_store_id == frame_idx)
130                         break;
131                 }
132
133                 if (j == ARRAY_ELEMS(gen7_mfd_context->reference_surface))
134                     break;
135             }
136
137             assert(frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface));
138
139             for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
140                 if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
141                     gen7_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
142                     gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
143                     break;
144                 }
145             }
146         }
147     }
148
149     /* sort */
150     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface) - 1; i++) {
151         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
152             gen7_mfd_context->reference_surface[i].frame_store_id == i)
153             continue;
154
155         for (j = i + 1; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
156             if (gen7_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
157                 gen7_mfd_context->reference_surface[j].frame_store_id == i) {
158                 VASurfaceID id = gen7_mfd_context->reference_surface[i].surface_id;
159                 int frame_idx = gen7_mfd_context->reference_surface[i].frame_store_id;
160
161                 gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[j].surface_id;
162                 gen7_mfd_context->reference_surface[i].frame_store_id = gen7_mfd_context->reference_surface[j].frame_store_id;
163                 gen7_mfd_context->reference_surface[j].surface_id = id;
164                 gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
165                 break;
166             }
167         }
168     }
169 }
170
171 static void 
172 gen7_mfd_free_avc_surface(void **data)
173 {
174     struct gen7_avc_surface *gen7_avc_surface = *data;
175
176     if (!gen7_avc_surface)
177         return;
178
179     dri_bo_unreference(gen7_avc_surface->dmv_top);
180     gen7_avc_surface->dmv_top = NULL;
181     dri_bo_unreference(gen7_avc_surface->dmv_bottom);
182     gen7_avc_surface->dmv_bottom = NULL;
183
184     free(gen7_avc_surface);
185     *data = NULL;
186 }
187
188 static void
189 gen7_mfd_init_avc_surface(VADriverContextP ctx, 
190                           VAPictureParameterBufferH264 *pic_param,
191                           struct object_surface *obj_surface)
192 {
193     struct i965_driver_data *i965 = i965_driver_data(ctx);
194     struct gen7_avc_surface *gen7_avc_surface = obj_surface->private_data;
195
196     obj_surface->free_private_data = gen7_mfd_free_avc_surface;
197
198     if (!gen7_avc_surface) {
199         gen7_avc_surface = calloc(sizeof(struct gen7_avc_surface), 1);
200         assert((obj_surface->size & 0x3f) == 0);
201         obj_surface->private_data = gen7_avc_surface;
202     }
203
204     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
205                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
206
207     if (gen7_avc_surface->dmv_top == NULL) {
208         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
209                                                  "direct mv w/r buffer",
210                                                  DMV_SIZE,
211                                                  0x1000);
212     }
213
214     if (gen7_avc_surface->dmv_bottom_flag &&
215         gen7_avc_surface->dmv_bottom == NULL) {
216         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
217                                                     "direct mv w/r buffer",
218                                                     DMV_SIZE,
219                                                     0x1000);
220     }
221 }
222
223 static void
224 gen7_mfd_pipe_mode_select(VADriverContextP ctx,
225                           struct decode_state *decode_state,
226                           int standard_select,
227                           struct gen7_mfd_context *gen7_mfd_context)
228 {
229     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
230
231     assert(standard_select == MFX_FORMAT_MPEG2 ||
232            standard_select == MFX_FORMAT_AVC ||
233            standard_select == MFX_FORMAT_VC1 ||
234            standard_select == MFX_FORMAT_JPEG);
235
236     BEGIN_BCS_BATCH(batch, 5); /* FIXME: 5 ??? */
237     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
238     OUT_BCS_BATCH(batch,
239                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
240                   (MFD_MODE_VLD << 15) | /* VLD mode */
241                   (0 << 10) | /* disable Stream-Out */
242                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
243                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
244                   (0 << 5)  | /* not in stitch mode */
245                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
246                   (standard_select << 0));
247     OUT_BCS_BATCH(batch,
248                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
249                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
250                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
251                   (0 << 1)  |
252                   (0 << 0));
253     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
254     OUT_BCS_BATCH(batch, 0); /* reserved */
255     ADVANCE_BCS_BATCH(batch);
256 }
257
258 static void
259 gen7_mfd_surface_state(VADriverContextP ctx,
260                        struct decode_state *decode_state,
261                        int standard_select,
262                        struct gen7_mfd_context *gen7_mfd_context)
263 {
264     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
265     struct i965_driver_data *i965 = i965_driver_data(ctx);
266     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
267     assert(obj_surface);
268     
269     BEGIN_BCS_BATCH(batch, 6);
270     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
271     OUT_BCS_BATCH(batch, 0);
272     OUT_BCS_BATCH(batch,
273                   ((obj_surface->orig_height - 1) << 18) |
274                   ((obj_surface->orig_width - 1) << 4));
275     OUT_BCS_BATCH(batch,
276                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
277                   (1 << 27) | /* FIXME: set to 0 for JPEG */
278                   (0 << 22) | /* surface object control state, FIXME??? */
279                   ((obj_surface->width - 1) << 3) | /* pitch */
280                   (0 << 2)  | /* must be 0 for interleave U/V */
281                   (1 << 1)  | /* must be tiled */
282                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
283     OUT_BCS_BATCH(batch,
284                   (0 << 16) | /* FIXME: fix it for JPEG */
285                   (obj_surface->height)); /* FIXME: fix it for JPEG */
286     OUT_BCS_BATCH(batch, 0); /* FIXME: fix it for JPEG */
287     ADVANCE_BCS_BATCH(batch);
288 }
289
290 static void
291 gen7_mfd_pipe_buf_addr_state(VADriverContextP ctx,
292                              struct decode_state *decode_state,
293                              int standard_select,
294                              struct gen7_mfd_context *gen7_mfd_context)
295 {
296     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
297     struct i965_driver_data *i965 = i965_driver_data(ctx);
298     int i;
299
300     BEGIN_BCS_BATCH(batch, 24);
301     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
302     if (gen7_mfd_context->pre_deblocking_output.valid)
303         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
304                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
305                       0);
306     else
307         OUT_BCS_BATCH(batch, 0);
308
309     if (gen7_mfd_context->post_deblocking_output.valid)
310         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
311                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
312                       0);
313     else
314         OUT_BCS_BATCH(batch, 0);
315
316     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
317     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
318
319     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
320         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
321                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
322                       0);
323     else
324         OUT_BCS_BATCH(batch, 0);
325
326     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
327         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
328                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
329                       0);
330     else
331         OUT_BCS_BATCH(batch, 0);
332
333     /* DW 7..22 */
334     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
335         struct object_surface *obj_surface;
336
337         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
338             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
339             assert(obj_surface && obj_surface->bo);
340
341             OUT_BCS_RELOC(batch, obj_surface->bo,
342                           I915_GEM_DOMAIN_INSTRUCTION, 0,
343                           0);
344         } else {
345             OUT_BCS_BATCH(batch, 0);
346         }
347     }
348
349     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
350     ADVANCE_BCS_BATCH(batch);
351 }
352
353 static void
354 gen7_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
355                                  dri_bo *slice_data_bo,
356                                  int standard_select,
357                                  struct gen7_mfd_context *gen7_mfd_context)
358 {
359     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
360
361     BEGIN_BCS_BATCH(batch, 11);
362     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
363     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
364     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
365     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
366     OUT_BCS_BATCH(batch, 0);
367     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
368     OUT_BCS_BATCH(batch, 0);
369     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
370     OUT_BCS_BATCH(batch, 0);
371     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
372     OUT_BCS_BATCH(batch, 0);
373     ADVANCE_BCS_BATCH(batch);
374 }
375
376 static void
377 gen7_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
378                                  struct decode_state *decode_state,
379                                  int standard_select,
380                                  struct gen7_mfd_context *gen7_mfd_context)
381 {
382     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
383
384     BEGIN_BCS_BATCH(batch, 4);
385     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
386
387     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
388         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
389                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
390                       0);
391     else
392         OUT_BCS_BATCH(batch, 0);
393
394     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
395         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
396                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
397                       0);
398     else
399         OUT_BCS_BATCH(batch, 0);
400
401     if (gen7_mfd_context->bitplane_read_buffer.valid)
402         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
403                       I915_GEM_DOMAIN_INSTRUCTION, 0,
404                       0);
405     else
406         OUT_BCS_BATCH(batch, 0);
407
408     ADVANCE_BCS_BATCH(batch);
409 }
410
411 static void
412 gen7_mfd_aes_state(VADriverContextP ctx,
413                    struct decode_state *decode_state,
414                    int standard_select)
415 {
416     /* FIXME */
417 }
418
419 static void
420 gen7_mfd_qm_state(VADriverContextP ctx,
421                   int qm_type,
422                   unsigned char *qm,
423                   int qm_length,
424                   struct gen7_mfd_context *gen7_mfd_context)
425 {
426     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
427     unsigned int qm_buffer[16];
428
429     assert(qm_length <= 16 * 4);
430     memcpy(qm_buffer, qm, qm_length);
431
432     BEGIN_BCS_BATCH(batch, 18);
433     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
434     OUT_BCS_BATCH(batch, qm_type << 0);
435     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
436     ADVANCE_BCS_BATCH(batch);
437 }
438 static void
439 gen7_mfd_wait(VADriverContextP ctx,
440               struct decode_state *decode_state,
441               int standard_select,
442               struct gen7_mfd_context *gen7_mfd_context)
443 {
444     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
445
446     BEGIN_BCS_BATCH(batch, 1);
447     OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8));
448     ADVANCE_BCS_BATCH(batch);
449 }
450
451 static void
452 gen7_mfd_avc_img_state(VADriverContextP ctx,
453                        struct decode_state *decode_state,
454                        struct gen7_mfd_context *gen7_mfd_context)
455 {
456     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
457     int qm_present_flag;
458     int img_struct;
459     int mbaff_frame_flag;
460     unsigned int width_in_mbs, height_in_mbs;
461     VAPictureParameterBufferH264 *pic_param;
462
463     assert(decode_state->pic_param && decode_state->pic_param->buffer);
464     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
465     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
466
467     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
468         qm_present_flag = 1;
469     else
470         qm_present_flag = 0; /* built-in QM matrices */
471
472     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
473         img_struct = 1;
474     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
475         img_struct = 3;
476     else
477         img_struct = 0;
478
479     if ((img_struct & 0x1) == 0x1) {
480         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
481     } else {
482         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
483     }
484
485     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
486         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
487         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
488     } else {
489         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
490     }
491
492     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
493                         !pic_param->pic_fields.bits.field_pic_flag);
494
495     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
496     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
497
498     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
499     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
500            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
501     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
502
503     BEGIN_BCS_BATCH(batch, 16);
504     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
505     OUT_BCS_BATCH(batch, 
506                   width_in_mbs * height_in_mbs);
507     OUT_BCS_BATCH(batch, 
508                   ((height_in_mbs - 1) << 16) | 
509                   ((width_in_mbs - 1) << 0));
510     OUT_BCS_BATCH(batch, 
511                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
512                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
513                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
514                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
515                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
516                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
517                   (img_struct << 8));
518     OUT_BCS_BATCH(batch,
519                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
520                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
521                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
522                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
523                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
524                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
525                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
526                   (mbaff_frame_flag << 1) |
527                   (pic_param->pic_fields.bits.field_pic_flag << 0));
528     OUT_BCS_BATCH(batch, 0);
529     OUT_BCS_BATCH(batch, 0);
530     OUT_BCS_BATCH(batch, 0);
531     OUT_BCS_BATCH(batch, 0);
532     OUT_BCS_BATCH(batch, 0);
533     OUT_BCS_BATCH(batch, 0);
534     OUT_BCS_BATCH(batch, 0);
535     OUT_BCS_BATCH(batch, 0);
536     OUT_BCS_BATCH(batch, 0);
537     OUT_BCS_BATCH(batch, 0);
538     OUT_BCS_BATCH(batch, 0);
539     ADVANCE_BCS_BATCH(batch);
540 }
541
542 static void
543 gen7_mfd_avc_qm_state(VADriverContextP ctx,
544                       struct decode_state *decode_state,
545                       struct gen7_mfd_context *gen7_mfd_context)
546 {
547     VAIQMatrixBufferH264 *iq_matrix;
548     VAPictureParameterBufferH264 *pic_param;
549
550     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
551         return;
552
553     iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
554
555     assert(decode_state->pic_param && decode_state->pic_param->buffer);
556     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
557
558     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
559     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
560
561     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
562         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
563         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
564     }
565 }
566
567 static void
568 gen7_mfd_avc_directmode_state(VADriverContextP ctx,
569                               VAPictureParameterBufferH264 *pic_param,
570                               VASliceParameterBufferH264 *slice_param,
571                               struct gen7_mfd_context *gen7_mfd_context)
572 {
573     struct i965_driver_data *i965 = i965_driver_data(ctx);
574     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
575     struct object_surface *obj_surface;
576     struct gen7_avc_surface *gen7_avc_surface;
577     VAPictureH264 *va_pic;
578     int i, j;
579
580     BEGIN_BCS_BATCH(batch, 69);
581     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
582
583     /* reference surfaces 0..15 */
584     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
585         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
586             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
587             assert(obj_surface);
588             gen7_avc_surface = obj_surface->private_data;
589
590             if (gen7_avc_surface == NULL) {
591                 OUT_BCS_BATCH(batch, 0);
592                 OUT_BCS_BATCH(batch, 0);
593             } else {
594                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
595                               I915_GEM_DOMAIN_INSTRUCTION, 0,
596                               0);
597
598                 if (gen7_avc_surface->dmv_bottom_flag == 1)
599                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
600                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
601                                   0);
602                 else
603                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
604                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
605                                   0);
606             }
607         } else {
608             OUT_BCS_BATCH(batch, 0);
609             OUT_BCS_BATCH(batch, 0);
610         }
611     }
612
613     /* the current decoding frame/field */
614     va_pic = &pic_param->CurrPic;
615     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
616     obj_surface = SURFACE(va_pic->picture_id);
617     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
618     gen7_avc_surface = obj_surface->private_data;
619
620     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
621                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
622                   0);
623
624     if (gen7_avc_surface->dmv_bottom_flag == 1)
625         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
626                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
627                       0);
628     else
629         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
630                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
631                       0);
632
633     /* POC List */
634     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
635         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
636             int found = 0;
637             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
638                 va_pic = &pic_param->ReferenceFrames[j];
639                 
640                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
641                     continue;
642
643                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
644                     found = 1;
645                     break;
646                 }
647             }
648
649             assert(found == 1);
650             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
651             
652             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
653             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
654         } else {
655             OUT_BCS_BATCH(batch, 0);
656             OUT_BCS_BATCH(batch, 0);
657         }
658     }
659
660     va_pic = &pic_param->CurrPic;
661     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
662     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
663
664     ADVANCE_BCS_BATCH(batch);
665 }
666
667 static void
668 gen7_mfd_avc_slice_state(VADriverContextP ctx,
669                          VAPictureParameterBufferH264 *pic_param,
670                          VASliceParameterBufferH264 *slice_param,
671                          VASliceParameterBufferH264 *next_slice_param,
672                          struct gen7_mfd_context *gen7_mfd_context)
673 {
674     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
675     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
676     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
677     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
678     int num_ref_idx_l0, num_ref_idx_l1;
679     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
680                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
681     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
682     int slice_type;
683
684     if (slice_param->slice_type == SLICE_TYPE_I ||
685         slice_param->slice_type == SLICE_TYPE_SI) {
686         slice_type = SLICE_TYPE_I;
687     } else if (slice_param->slice_type == SLICE_TYPE_P ||
688                slice_param->slice_type == SLICE_TYPE_SP) {
689         slice_type = SLICE_TYPE_P;
690     } else { 
691         assert(slice_param->slice_type == SLICE_TYPE_B);
692         slice_type = SLICE_TYPE_B;
693     }
694
695     if (slice_type == SLICE_TYPE_I) {
696         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
697         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
698         num_ref_idx_l0 = 0;
699         num_ref_idx_l1 = 0;
700     } else if (slice_type == SLICE_TYPE_P) {
701         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
702         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
703         num_ref_idx_l1 = 0;
704     } else {
705         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
706         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
707     }
708
709     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
710     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
711     slice_ver_pos = first_mb_in_slice / width_in_mbs;
712
713     if (next_slice_param) {
714         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
715         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
716         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
717     } else {
718         next_slice_hor_pos = 0;
719         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
720     }
721
722     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
723     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
724     OUT_BCS_BATCH(batch, slice_type);
725     OUT_BCS_BATCH(batch, 
726                   (num_ref_idx_l1 << 24) |
727                   (num_ref_idx_l0 << 16) |
728                   (slice_param->chroma_log2_weight_denom << 8) |
729                   (slice_param->luma_log2_weight_denom << 0));
730     OUT_BCS_BATCH(batch, 
731                   (slice_param->direct_spatial_mv_pred_flag << 29) |
732                   (slice_param->disable_deblocking_filter_idc << 27) |
733                   (slice_param->cabac_init_idc << 24) |
734                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
735                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
736                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
737     OUT_BCS_BATCH(batch, 
738                   (slice_ver_pos << 24) |
739                   (slice_hor_pos << 16) | 
740                   (first_mb_in_slice << 0));
741     OUT_BCS_BATCH(batch,
742                   (next_slice_ver_pos << 16) |
743                   (next_slice_hor_pos << 0));
744     OUT_BCS_BATCH(batch, 
745                   (next_slice_param == NULL) << 19); /* last slice flag */
746     OUT_BCS_BATCH(batch, 0);
747     OUT_BCS_BATCH(batch, 0);
748     OUT_BCS_BATCH(batch, 0);
749     OUT_BCS_BATCH(batch, 0);
750     ADVANCE_BCS_BATCH(batch);
751 }
752
753 static void
754 gen7_mfd_avc_ref_idx_state(VADriverContextP ctx,
755                            VAPictureParameterBufferH264 *pic_param,
756                            VASliceParameterBufferH264 *slice_param,
757                            struct gen7_mfd_context *gen7_mfd_context)
758 {
759     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
760     int i, j, num_ref_list;
761     struct {
762         unsigned char bottom_idc:1;
763         unsigned char frame_store_index:4;
764         unsigned char field_picture:1;
765         unsigned char long_term:1;
766         unsigned char non_exist:1;
767     } refs[32];
768
769     if (slice_param->slice_type == SLICE_TYPE_I ||
770         slice_param->slice_type == SLICE_TYPE_SI)
771         return;
772
773     if (slice_param->slice_type == SLICE_TYPE_P ||
774         slice_param->slice_type == SLICE_TYPE_SP) {
775         num_ref_list = 1;
776     } else {
777         num_ref_list = 2;
778     }
779
780     for (i = 0; i < num_ref_list; i++) {
781         VAPictureH264 *va_pic;
782
783         if (i == 0) {
784             va_pic = slice_param->RefPicList0;
785         } else {
786             va_pic = slice_param->RefPicList1;
787         }
788
789         BEGIN_BCS_BATCH(batch, 10);
790         OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | (10 - 2));
791         OUT_BCS_BATCH(batch, i);
792
793         for (j = 0; j < 32; j++) {
794             if (va_pic->flags & VA_PICTURE_H264_INVALID) {
795                 refs[j].non_exist = 1;
796                 refs[j].long_term = 1;
797                 refs[j].field_picture = 1;
798                 refs[j].frame_store_index = 0xf;
799                 refs[j].bottom_idc = 1;
800             } else {
801                 int frame_idx;
802                 
803                 for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface); frame_idx++) {
804                     if (gen7_mfd_context->reference_surface[frame_idx].surface_id != VA_INVALID_ID &&
805                         va_pic->picture_id == gen7_mfd_context->reference_surface[frame_idx].surface_id) {
806                         assert(frame_idx == gen7_mfd_context->reference_surface[frame_idx].frame_store_id);
807                         break;
808                     }
809                 }
810
811                 assert(frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface));
812                 
813                 refs[j].non_exist = 0;
814                 refs[j].long_term = !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
815                 refs[j].field_picture = !!(va_pic->flags & 
816                                            (VA_PICTURE_H264_TOP_FIELD | 
817                                             VA_PICTURE_H264_BOTTOM_FIELD));
818                 refs[j].frame_store_index = frame_idx;
819                 refs[j].bottom_idc = !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
820             }
821
822             va_pic++;
823         }
824         
825         intel_batchbuffer_data(batch, refs, sizeof(refs));
826         ADVANCE_BCS_BATCH(batch);
827     }
828 }
829
830 static void
831 gen7_mfd_avc_weightoffset_state(VADriverContextP ctx,
832                                 VAPictureParameterBufferH264 *pic_param,
833                                 VASliceParameterBufferH264 *slice_param,
834                                 struct gen7_mfd_context *gen7_mfd_context)
835 {
836     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
837     int i, j, num_weight_offset_table = 0;
838     short weightoffsets[32 * 6];
839
840     if ((slice_param->slice_type == SLICE_TYPE_P ||
841          slice_param->slice_type == SLICE_TYPE_SP) &&
842         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
843         num_weight_offset_table = 1;
844     }
845     
846     if ((slice_param->slice_type == SLICE_TYPE_B) &&
847         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
848         num_weight_offset_table = 2;
849     }
850
851     for (i = 0; i < num_weight_offset_table; i++) {
852         BEGIN_BCS_BATCH(batch, 98);
853         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
854         OUT_BCS_BATCH(batch, i);
855
856         if (i == 0) {
857             for (j = 0; j < 32; j++) {
858                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
859                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
860                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
861                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
862                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
863                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
864             }
865         } else {
866             for (j = 0; j < 32; j++) {
867                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
868                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
869                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
870                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
871                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
872                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
873             }
874         }
875
876         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
877         ADVANCE_BCS_BATCH(batch);
878     }
879 }
880
881 static int
882 gen7_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
883 {
884     int out_slice_data_bit_offset;
885     int slice_header_size = in_slice_data_bit_offset / 8;
886     int i, j;
887
888     for (i = 0, j = 0; i < slice_header_size; i++, j++) {
889         if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
890             i++, j += 2;
891         }
892     }
893
894     out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
895
896     if (mode_flag == ENTROPY_CABAC)
897         out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
898
899     return out_slice_data_bit_offset;
900 }
901
902 static void
903 gen7_mfd_avc_bsd_object(VADriverContextP ctx,
904                         VAPictureParameterBufferH264 *pic_param,
905                         VASliceParameterBufferH264 *slice_param,
906                         dri_bo *slice_data_bo,
907                         VASliceParameterBufferH264 *next_slice_param,
908                         struct gen7_mfd_context *gen7_mfd_context)
909 {
910     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
911     int slice_data_bit_offset;
912     uint8_t *slice_data = NULL;
913
914     dri_bo_map(slice_data_bo, 0);
915     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
916     slice_data_bit_offset = gen7_mfd_avc_get_slice_bit_offset(slice_data,
917                                                               pic_param->pic_fields.bits.entropy_coding_mode_flag,
918                                                               slice_param->slice_data_bit_offset);
919     dri_bo_unmap(slice_data_bo);
920
921     /* the input bitsteam format on GEN7 differs from GEN6 */
922     BEGIN_BCS_BATCH(batch, 6);
923     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
924     OUT_BCS_BATCH(batch, 
925                   (slice_param->slice_data_size));
926     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
927     OUT_BCS_BATCH(batch,
928                   (0 << 31) |
929                   (0 << 14) |
930                   (0 << 12) |
931                   (0 << 10) |
932                   (0 << 8));
933     OUT_BCS_BATCH(batch,
934                   ((slice_data_bit_offset >> 3) << 16) |
935                   (0 << 5)  |
936                   (0 << 4)  |
937                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
938                   (slice_data_bit_offset & 0x7));
939     OUT_BCS_BATCH(batch, 0);
940     ADVANCE_BCS_BATCH(batch);
941 }
942
943 static void
944 gen7_mfd_avc_decode_init(VADriverContextP ctx,
945                          struct decode_state *decode_state,
946                          struct gen7_mfd_context *gen7_mfd_context)
947 {
948     VAPictureParameterBufferH264 *pic_param;
949     VASliceParameterBufferH264 *slice_param;
950     VAPictureH264 *va_pic;
951     struct i965_driver_data *i965 = i965_driver_data(ctx);
952     struct object_surface *obj_surface;
953     dri_bo *bo;
954     int i, j, enable_avc_ildb = 0;
955
956     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
957         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
958         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
959
960         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
961             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
962             assert((slice_param->slice_type == SLICE_TYPE_I) ||
963                    (slice_param->slice_type == SLICE_TYPE_SI) ||
964                    (slice_param->slice_type == SLICE_TYPE_P) ||
965                    (slice_param->slice_type == SLICE_TYPE_SP) ||
966                    (slice_param->slice_type == SLICE_TYPE_B));
967
968             if (slice_param->disable_deblocking_filter_idc != 1) {
969                 enable_avc_ildb = 1;
970                 break;
971             }
972
973             slice_param++;
974         }
975     }
976
977     assert(decode_state->pic_param && decode_state->pic_param->buffer);
978     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
979     gen7_mfd_avc_frame_store_index(ctx, pic_param, gen7_mfd_context);
980
981     /* Current decoded picture */
982     va_pic = &pic_param->CurrPic;
983     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
984     obj_surface = SURFACE(va_pic->picture_id);
985     assert(obj_surface);
986     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
987     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
988     gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface);
989     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
990
991     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
992     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
993     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
994     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
995
996     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
997     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
998     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
999     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
1000
1001     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1002     bo = dri_bo_alloc(i965->intel.bufmgr,
1003                       "intra row store",
1004                       128 * 64,
1005                       0x1000);
1006     assert(bo);
1007     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1008     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1009
1010     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1011     bo = dri_bo_alloc(i965->intel.bufmgr,
1012                       "deblocking filter row store",
1013                       30720, /* 4 * 120 * 64 */
1014                       0x1000);
1015     assert(bo);
1016     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1017     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1018
1019     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1020     bo = dri_bo_alloc(i965->intel.bufmgr,
1021                       "bsd mpc row store",
1022                       11520, /* 1.5 * 120 * 64 */
1023                       0x1000);
1024     assert(bo);
1025     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1026     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1027
1028     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
1029     bo = dri_bo_alloc(i965->intel.bufmgr,
1030                       "mpr row store",
1031                       7680, /* 1. 0 * 120 * 64 */
1032                       0x1000);
1033     assert(bo);
1034     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
1035     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
1036
1037     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1038 }
1039
1040 static void
1041 gen7_mfd_avc_decode_picture(VADriverContextP ctx,
1042                             struct decode_state *decode_state,
1043                             struct gen7_mfd_context *gen7_mfd_context)
1044 {
1045     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1046     VAPictureParameterBufferH264 *pic_param;
1047     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
1048     dri_bo *slice_data_bo;
1049     int i, j;
1050
1051     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1052     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1053     gen7_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
1054
1055     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1056     intel_batchbuffer_emit_mi_flush(batch);
1057     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1058     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1059     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1060     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1061     gen7_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
1062     gen7_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
1063
1064     for (j = 0; j < decode_state->num_slice_params; j++) {
1065         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1066         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1067         slice_data_bo = decode_state->slice_datas[j]->bo;
1068         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
1069
1070         if (j == decode_state->num_slice_params - 1)
1071             next_slice_group_param = NULL;
1072         else
1073             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1074
1075         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1076             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1077             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1078                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1079                    (slice_param->slice_type == SLICE_TYPE_P) ||
1080                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1081                    (slice_param->slice_type == SLICE_TYPE_B));
1082
1083             if (i < decode_state->slice_params[j]->num_elements - 1)
1084                 next_slice_param = slice_param + 1;
1085             else
1086                 next_slice_param = next_slice_group_param;
1087
1088             gen7_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen7_mfd_context);
1089             gen7_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
1090             gen7_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
1091             gen7_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1092             gen7_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
1093             slice_param++;
1094         }
1095     }
1096
1097     intel_batchbuffer_end_atomic(batch);
1098     intel_batchbuffer_flush(batch);
1099 }
1100
1101 static void
1102 gen7_mfd_mpeg2_decode_init(VADriverContextP ctx,
1103                            struct decode_state *decode_state,
1104                            struct gen7_mfd_context *gen7_mfd_context)
1105 {
1106     VAPictureParameterBufferMPEG2 *pic_param;
1107     struct i965_driver_data *i965 = i965_driver_data(ctx);
1108     struct object_surface *obj_surface;
1109     int i;
1110     dri_bo *bo;
1111
1112     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1113     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1114
1115     /* reference picture */
1116     obj_surface = SURFACE(pic_param->forward_reference_picture);
1117
1118     if (obj_surface && obj_surface->bo)
1119         gen7_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1120     else
1121         gen7_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1122
1123     obj_surface = SURFACE(pic_param->backward_reference_picture);
1124
1125     if (obj_surface && obj_surface->bo)
1126         gen7_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1127     else
1128         gen7_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1129
1130     /* must do so !!! */
1131     for (i = 2; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++)
1132         gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[i % 2].surface_id;
1133
1134     /* Current decoded picture */
1135     obj_surface = SURFACE(decode_state->current_render_target);
1136     assert(obj_surface);
1137     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
1138
1139     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1140     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1141     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1142     gen7_mfd_context->pre_deblocking_output.valid = 1;
1143
1144     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1145     bo = dri_bo_alloc(i965->intel.bufmgr,
1146                       "bsd mpc row store",
1147                       11520, /* 1.5 * 120 * 64 */
1148                       0x1000);
1149     assert(bo);
1150     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1151     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1152
1153     gen7_mfd_context->post_deblocking_output.valid = 0;
1154     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1155     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1156     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1157     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1158 }
1159
1160 static void
1161 gen7_mfd_mpeg2_pic_state(VADriverContextP ctx,
1162                          struct decode_state *decode_state,
1163                          struct gen7_mfd_context *gen7_mfd_context)
1164 {
1165     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1166     VAPictureParameterBufferMPEG2 *pic_param;
1167
1168     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1169     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1170
1171     BEGIN_BCS_BATCH(batch, 13);
1172     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1173     OUT_BCS_BATCH(batch,
1174                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1175                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1176                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1177                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1178                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1179                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1180                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1181                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1182                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1183                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1184                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1185                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1186     OUT_BCS_BATCH(batch,
1187                   pic_param->picture_coding_type << 9);
1188     OUT_BCS_BATCH(batch,
1189                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1190                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1191     OUT_BCS_BATCH(batch, 0);
1192     OUT_BCS_BATCH(batch, 0);
1193     OUT_BCS_BATCH(batch, 0);
1194     OUT_BCS_BATCH(batch, 0);
1195     OUT_BCS_BATCH(batch, 0);
1196     OUT_BCS_BATCH(batch, 0);
1197     OUT_BCS_BATCH(batch, 0);
1198     OUT_BCS_BATCH(batch, 0);
1199     OUT_BCS_BATCH(batch, 0);
1200     ADVANCE_BCS_BATCH(batch);
1201 }
1202
1203 static void
1204 gen7_mfd_mpeg2_qm_state(VADriverContextP ctx,
1205                         struct decode_state *decode_state,
1206                         struct gen7_mfd_context *gen7_mfd_context)
1207 {
1208     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1209     int i, j;
1210
1211     /* Update internal QM state */
1212     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1213         VAIQMatrixBufferMPEG2 * const iq_matrix =
1214             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1215
1216         gen_iq_matrix->load_intra_quantiser_matrix =
1217             iq_matrix->load_intra_quantiser_matrix;
1218         if (iq_matrix->load_intra_quantiser_matrix) {
1219             for (j = 0; j < 64; j++)
1220                 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1221                     iq_matrix->intra_quantiser_matrix[j];
1222         }
1223
1224         gen_iq_matrix->load_non_intra_quantiser_matrix =
1225             iq_matrix->load_non_intra_quantiser_matrix;
1226         if (iq_matrix->load_non_intra_quantiser_matrix) {
1227             for (j = 0; j < 64; j++)
1228                 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1229                     iq_matrix->non_intra_quantiser_matrix[j];
1230         }
1231     }
1232
1233     /* Commit QM state to HW */
1234     for (i = 0; i < 2; i++) {
1235         unsigned char *qm = NULL;
1236         int qm_type;
1237
1238         if (i == 0) {
1239             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1240                 qm = gen_iq_matrix->intra_quantiser_matrix;
1241                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1242             }
1243         } else {
1244             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1245                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1246                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1247             }
1248         }
1249
1250         if (!qm)
1251             continue;
1252
1253         gen7_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1254     }
1255 }
1256
1257 static void
1258 gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1259                           VAPictureParameterBufferMPEG2 *pic_param,
1260                           VASliceParameterBufferMPEG2 *slice_param,
1261                           VASliceParameterBufferMPEG2 *next_slice_param,
1262                           struct gen7_mfd_context *gen7_mfd_context)
1263 {
1264     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1265     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1266     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic = 0;
1267
1268     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1269         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1270         is_field_pic = 1;
1271
1272     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic);
1273     hpos0 = slice_param->slice_horizontal_position;
1274
1275     if (next_slice_param == NULL) {
1276         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1277         hpos1 = 0;
1278     } else {
1279         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic);
1280         hpos1 = next_slice_param->slice_horizontal_position;
1281     }
1282
1283     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1284
1285     BEGIN_BCS_BATCH(batch, 5);
1286     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1287     OUT_BCS_BATCH(batch, 
1288                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1289     OUT_BCS_BATCH(batch, 
1290                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1291     OUT_BCS_BATCH(batch,
1292                   hpos0 << 24 |
1293                   vpos0 << 16 |
1294                   mb_count << 8 |
1295                   (next_slice_param == NULL) << 5 |
1296                   (next_slice_param == NULL) << 3 |
1297                   (slice_param->macroblock_offset & 0x7));
1298     OUT_BCS_BATCH(batch,
1299                   slice_param->quantiser_scale_code << 24);
1300     ADVANCE_BCS_BATCH(batch);
1301 }
1302
1303 static void
1304 gen7_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1305                               struct decode_state *decode_state,
1306                               struct gen7_mfd_context *gen7_mfd_context)
1307 {
1308     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1309     VAPictureParameterBufferMPEG2 *pic_param;
1310     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1311     dri_bo *slice_data_bo;
1312     int i, j;
1313
1314     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1315     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1316
1317     gen7_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1318     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1319     intel_batchbuffer_emit_mi_flush(batch);
1320     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1321     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1322     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1323     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1324     gen7_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1325     gen7_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1326
1327     for (j = 0; j < decode_state->num_slice_params; j++) {
1328         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1329         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1330         slice_data_bo = decode_state->slice_datas[j]->bo;
1331         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1332
1333         if (j == decode_state->num_slice_params - 1)
1334             next_slice_group_param = NULL;
1335         else
1336             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1337
1338         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1339             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1340
1341             if (i < decode_state->slice_params[j]->num_elements - 1)
1342                 next_slice_param = slice_param + 1;
1343             else
1344                 next_slice_param = next_slice_group_param;
1345
1346             gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1347             slice_param++;
1348         }
1349     }
1350
1351     intel_batchbuffer_end_atomic(batch);
1352     intel_batchbuffer_flush(batch);
1353 }
1354
1355 static const int va_to_gen7_vc1_pic_type[5] = {
1356     GEN7_VC1_I_PICTURE,
1357     GEN7_VC1_P_PICTURE,
1358     GEN7_VC1_B_PICTURE,
1359     GEN7_VC1_BI_PICTURE,
1360     GEN7_VC1_P_PICTURE,
1361 };
1362
1363 static const int va_to_gen7_vc1_mv[4] = {
1364     1, /* 1-MV */
1365     2, /* 1-MV half-pel */
1366     3, /* 1-MV half-pef bilinear */
1367     0, /* Mixed MV */
1368 };
1369
1370 static const int b_picture_scale_factor[21] = {
1371     128, 85,  170, 64,  192,
1372     51,  102, 153, 204, 43,
1373     215, 37,  74,  111, 148,
1374     185, 222, 32,  96,  160, 
1375     224,
1376 };
1377
1378 static const int va_to_gen7_vc1_condover[3] = {
1379     0,
1380     2,
1381     3
1382 };
1383
1384 static const int va_to_gen7_vc1_profile[4] = {
1385     GEN7_VC1_SIMPLE_PROFILE,
1386     GEN7_VC1_MAIN_PROFILE,
1387     GEN7_VC1_RESERVED_PROFILE,
1388     GEN7_VC1_ADVANCED_PROFILE
1389 };
1390
1391 static void 
1392 gen7_mfd_free_vc1_surface(void **data)
1393 {
1394     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1395
1396     if (!gen7_vc1_surface)
1397         return;
1398
1399     dri_bo_unreference(gen7_vc1_surface->dmv);
1400     free(gen7_vc1_surface);
1401     *data = NULL;
1402 }
1403
1404 static void
1405 gen7_mfd_init_vc1_surface(VADriverContextP ctx, 
1406                           VAPictureParameterBufferVC1 *pic_param,
1407                           struct object_surface *obj_surface)
1408 {
1409     struct i965_driver_data *i965 = i965_driver_data(ctx);
1410     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1411
1412     obj_surface->free_private_data = gen7_mfd_free_vc1_surface;
1413
1414     if (!gen7_vc1_surface) {
1415         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1416         assert((obj_surface->size & 0x3f) == 0);
1417         obj_surface->private_data = gen7_vc1_surface;
1418     }
1419
1420     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1421
1422     if (gen7_vc1_surface->dmv == NULL) {
1423         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1424                                              "direct mv w/r buffer",
1425                                              557056,    /* 64 * 128 * 64 */
1426                                              0x1000);
1427     }
1428 }
1429
1430 static void
1431 gen7_mfd_vc1_decode_init(VADriverContextP ctx,
1432                          struct decode_state *decode_state,
1433                          struct gen7_mfd_context *gen7_mfd_context)
1434 {
1435     VAPictureParameterBufferVC1 *pic_param;
1436     struct i965_driver_data *i965 = i965_driver_data(ctx);
1437     struct object_surface *obj_surface;
1438     int i;
1439     dri_bo *bo;
1440
1441     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1442     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1443
1444     /* reference picture */
1445     obj_surface = SURFACE(pic_param->forward_reference_picture);
1446
1447     if (obj_surface && obj_surface->bo)
1448         gen7_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1449     else
1450         gen7_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1451
1452     obj_surface = SURFACE(pic_param->backward_reference_picture);
1453
1454     if (obj_surface && obj_surface->bo)
1455         gen7_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1456     else
1457         gen7_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1458
1459     /* must do so !!! */
1460     for (i = 2; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++)
1461         gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[i % 2].surface_id;
1462
1463     /* Current decoded picture */
1464     obj_surface = SURFACE(decode_state->current_render_target);
1465     assert(obj_surface);
1466     gen7_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1467     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
1468
1469     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1470     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1471     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1472     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1473
1474     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1475     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1476     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1477     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1478
1479     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1480     bo = dri_bo_alloc(i965->intel.bufmgr,
1481                       "intra row store",
1482                       128 * 64,
1483                       0x1000);
1484     assert(bo);
1485     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1486     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1487
1488     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1489     bo = dri_bo_alloc(i965->intel.bufmgr,
1490                       "deblocking filter row store",
1491                       46080, /* 6 * 120 * 64 */
1492                       0x1000);
1493     assert(bo);
1494     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1495     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1496
1497     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1498     bo = dri_bo_alloc(i965->intel.bufmgr,
1499                       "bsd mpc row store",
1500                       11520, /* 1.5 * 120 * 64 */
1501                       0x1000);
1502     assert(bo);
1503     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1504     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1505
1506     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1507
1508     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1509     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1510     
1511     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1512         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1513         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1514         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1515         int src_w, src_h;
1516         uint8_t *src = NULL, *dst = NULL;
1517
1518         assert(decode_state->bit_plane->buffer);
1519         src = decode_state->bit_plane->buffer;
1520
1521         bo = dri_bo_alloc(i965->intel.bufmgr,
1522                           "VC-1 Bitplane",
1523                           bitplane_width * bitplane_width,
1524                           0x1000);
1525         assert(bo);
1526         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1527
1528         dri_bo_map(bo, True);
1529         assert(bo->virtual);
1530         dst = bo->virtual;
1531
1532         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1533             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1534                 int src_index, dst_index;
1535                 int src_shift;
1536                 uint8_t src_value;
1537
1538                 src_index = (src_h * width_in_mbs + src_w) / 2;
1539                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1540                 src_value = ((src[src_index] >> src_shift) & 0xf);
1541
1542                 dst_index = src_w / 2;
1543                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1544             }
1545
1546             if (src_w & 1)
1547                 dst[src_w / 2] >>= 4;
1548
1549             dst += bitplane_width;
1550         }
1551
1552         dri_bo_unmap(bo);
1553     } else
1554         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1555 }
1556
1557 static void
1558 gen7_mfd_vc1_pic_state(VADriverContextP ctx,
1559                        struct decode_state *decode_state,
1560                        struct gen7_mfd_context *gen7_mfd_context)
1561 {
1562     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1563     VAPictureParameterBufferVC1 *pic_param;
1564     struct i965_driver_data *i965 = i965_driver_data(ctx);
1565     struct object_surface *obj_surface;
1566     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1567     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1568     int unified_mv_mode;
1569     int ref_field_pic_polarity = 0;
1570     int scale_factor = 0;
1571     int trans_ac_y = 0;
1572     int dmv_surface_valid = 0;
1573     int brfd = 0;
1574     int fcm = 0;
1575     int picture_type;
1576     int profile;
1577     int overlap;
1578     int interpolation_mode = 0;
1579
1580     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1581     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1582
1583     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1584     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1585     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1586     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1587     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1588     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1589     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1590     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1591
1592     if (dquant == 0) {
1593         alt_pquant_config = 0;
1594         alt_pquant_edge_mask = 0;
1595     } else if (dquant == 2) {
1596         alt_pquant_config = 1;
1597         alt_pquant_edge_mask = 0xf;
1598     } else {
1599         assert(dquant == 1);
1600         if (dquantfrm == 0) {
1601             alt_pquant_config = 0;
1602             alt_pquant_edge_mask = 0;
1603             alt_pq = 0;
1604         } else {
1605             assert(dquantfrm == 1);
1606             alt_pquant_config = 1;
1607
1608             switch (dqprofile) {
1609             case 3:
1610                 if (dqbilevel == 0) {
1611                     alt_pquant_config = 2;
1612                     alt_pquant_edge_mask = 0;
1613                 } else {
1614                     assert(dqbilevel == 1);
1615                     alt_pquant_config = 3;
1616                     alt_pquant_edge_mask = 0;
1617                 }
1618                 break;
1619                 
1620             case 0:
1621                 alt_pquant_edge_mask = 0xf;
1622                 break;
1623
1624             case 1:
1625                 if (dqdbedge == 3)
1626                     alt_pquant_edge_mask = 0x9;
1627                 else
1628                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1629
1630                 break;
1631
1632             case 2:
1633                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1634                 break;
1635
1636             default:
1637                 assert(0);
1638             }
1639         }
1640     }
1641
1642     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1643         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1644         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1645     } else {
1646         assert(pic_param->mv_fields.bits.mv_mode < 4);
1647         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1648     }
1649
1650     if (pic_param->sequence_fields.bits.interlace == 1 &&
1651         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1652         /* FIXME: calculate reference field picture polarity */
1653         assert(0);
1654         ref_field_pic_polarity = 0;
1655     }
1656
1657     if (pic_param->b_picture_fraction < 21)
1658         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1659
1660     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1661     
1662     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1663         picture_type == GEN7_VC1_I_PICTURE)
1664         picture_type = GEN7_VC1_BI_PICTURE;
1665
1666     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1667         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1668     else
1669         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1670
1671
1672     if (picture_type == GEN7_VC1_B_PICTURE) {
1673         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1674
1675         obj_surface = SURFACE(pic_param->backward_reference_picture);
1676         assert(obj_surface);
1677         gen7_vc1_surface = obj_surface->private_data;
1678
1679         if (!gen7_vc1_surface || 
1680             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1681              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1682             dmv_surface_valid = 0;
1683         else
1684             dmv_surface_valid = 1;
1685     }
1686
1687     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1688
1689     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1690         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1691     else {
1692         if (pic_param->picture_fields.bits.top_field_first)
1693             fcm = 2;
1694         else
1695             fcm = 3;
1696     }
1697
1698     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1699         brfd = pic_param->reference_fields.bits.reference_distance;
1700         brfd = (scale_factor * brfd) >> 8;
1701         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1702
1703         if (brfd < 0)
1704             brfd = 0;
1705     }
1706
1707     overlap = pic_param->sequence_fields.bits.overlap;
1708     if (profile != GEN7_VC1_ADVANCED_PROFILE && pic_param->pic_quantizer_fields.bits.pic_quantizer_scale < 9)
1709         overlap = 0;
1710
1711     assert(pic_param->conditional_overlap_flag < 3);
1712     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1713
1714     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1715         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1716          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1717         interpolation_mode = 8; /* Half-pel bilinear */
1718     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1719              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1720               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1721         interpolation_mode = 0; /* Half-pel bicubic */
1722     else
1723         interpolation_mode = 1; /* Quarter-pel bicubic */
1724
1725     BEGIN_BCS_BATCH(batch, 6);
1726     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1727     OUT_BCS_BATCH(batch,
1728                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1729                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1730     OUT_BCS_BATCH(batch,
1731                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1732                   dmv_surface_valid << 15 |
1733                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1734                   pic_param->rounding_control << 13 |
1735                   pic_param->sequence_fields.bits.syncmarker << 12 |
1736                   interpolation_mode << 8 |
1737                   0 << 7 | /* FIXME: scale up or down ??? */
1738                   pic_param->range_reduction_frame << 6 |
1739                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1740                   overlap << 4 |
1741                   !pic_param->picture_fields.bits.is_first_field << 3 |
1742                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1743     OUT_BCS_BATCH(batch,
1744                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1745                   picture_type << 26 |
1746                   fcm << 24 |
1747                   alt_pq << 16 |
1748                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1749                   scale_factor << 0);
1750     OUT_BCS_BATCH(batch,
1751                   unified_mv_mode << 28 |
1752                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1753                   pic_param->fast_uvmc_flag << 26 |
1754                   ref_field_pic_polarity << 25 |
1755                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1756                   pic_param->reference_fields.bits.reference_distance << 20 |
1757                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1758                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1759                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1760                   alt_pquant_edge_mask << 4 |
1761                   alt_pquant_config << 2 |
1762                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1763                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1764     OUT_BCS_BATCH(batch,
1765                   !!pic_param->bitplane_present.value << 31 |
1766                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1767                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1768                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1769                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1770                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1771                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1772                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1773                   pic_param->mv_fields.bits.mv_table << 20 |
1774                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1775                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1776                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1777                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1778                   pic_param->mb_mode_table << 8 |
1779                   trans_ac_y << 6 |
1780                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1781                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1782                   pic_param->cbp_table << 0);
1783     ADVANCE_BCS_BATCH(batch);
1784 }
1785
1786 static void
1787 gen7_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1788                              struct decode_state *decode_state,
1789                              struct gen7_mfd_context *gen7_mfd_context)
1790 {
1791     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1792     VAPictureParameterBufferVC1 *pic_param;
1793     int intensitycomp_single;
1794
1795     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1796     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1797
1798     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1799     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1800     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1801
1802     BEGIN_BCS_BATCH(batch, 6);
1803     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1804     OUT_BCS_BATCH(batch,
1805                   0 << 14 | /* FIXME: double ??? */
1806                   0 << 12 |
1807                   intensitycomp_single << 10 |
1808                   intensitycomp_single << 8 |
1809                   0 << 4 | /* FIXME: interlace mode */
1810                   0);
1811     OUT_BCS_BATCH(batch,
1812                   pic_param->luma_shift << 16 |
1813                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1814     OUT_BCS_BATCH(batch, 0);
1815     OUT_BCS_BATCH(batch, 0);
1816     OUT_BCS_BATCH(batch, 0);
1817     ADVANCE_BCS_BATCH(batch);
1818 }
1819
1820
1821 static void
1822 gen7_mfd_vc1_directmode_state(VADriverContextP ctx,
1823                               struct decode_state *decode_state,
1824                               struct gen7_mfd_context *gen7_mfd_context)
1825 {
1826     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1827     VAPictureParameterBufferVC1 *pic_param;
1828     struct i965_driver_data *i965 = i965_driver_data(ctx);
1829     struct object_surface *obj_surface;
1830     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1831
1832     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1833     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1834
1835     obj_surface = SURFACE(decode_state->current_render_target);
1836
1837     if (obj_surface && obj_surface->private_data) {
1838         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1839     }
1840
1841     obj_surface = SURFACE(pic_param->backward_reference_picture);
1842
1843     if (obj_surface && obj_surface->private_data) {
1844         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1845     }
1846
1847     BEGIN_BCS_BATCH(batch, 3);
1848     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
1849
1850     if (dmv_write_buffer)
1851         OUT_BCS_RELOC(batch, dmv_write_buffer,
1852                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1853                       0);
1854     else
1855         OUT_BCS_BATCH(batch, 0);
1856
1857     if (dmv_read_buffer)
1858         OUT_BCS_RELOC(batch, dmv_read_buffer,
1859                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1860                       0);
1861     else
1862         OUT_BCS_BATCH(batch, 0);
1863                   
1864     ADVANCE_BCS_BATCH(batch);
1865 }
1866
1867 static int
1868 gen7_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1869 {
1870     int out_slice_data_bit_offset;
1871     int slice_header_size = in_slice_data_bit_offset / 8;
1872     int i, j;
1873
1874     if (profile != 3)
1875         out_slice_data_bit_offset = in_slice_data_bit_offset;
1876     else {
1877         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1878             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1879                 i++, j += 2;
1880             }
1881         }
1882
1883         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1884     }
1885
1886     return out_slice_data_bit_offset;
1887 }
1888
1889 static void
1890 gen7_mfd_vc1_bsd_object(VADriverContextP ctx,
1891                         VAPictureParameterBufferVC1 *pic_param,
1892                         VASliceParameterBufferVC1 *slice_param,
1893                         VASliceParameterBufferVC1 *next_slice_param,
1894                         dri_bo *slice_data_bo,
1895                         struct gen7_mfd_context *gen7_mfd_context)
1896 {
1897     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1898     int next_slice_start_vert_pos;
1899     int macroblock_offset;
1900     uint8_t *slice_data = NULL;
1901
1902     dri_bo_map(slice_data_bo, 0);
1903     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1904     macroblock_offset = gen7_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1905                                                                slice_param->macroblock_offset,
1906                                                                pic_param->sequence_fields.bits.profile);
1907     dri_bo_unmap(slice_data_bo);
1908
1909     if (next_slice_param)
1910         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1911     else
1912         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1913
1914     BEGIN_BCS_BATCH(batch, 5);
1915     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1916     OUT_BCS_BATCH(batch, 
1917                   slice_param->slice_data_size - (macroblock_offset >> 3));
1918     OUT_BCS_BATCH(batch, 
1919                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1920     OUT_BCS_BATCH(batch,
1921                   slice_param->slice_vertical_position << 16 |
1922                   next_slice_start_vert_pos << 0);
1923     OUT_BCS_BATCH(batch,
1924                   (macroblock_offset & 0x7));
1925     ADVANCE_BCS_BATCH(batch);
1926 }
1927
1928 static void
1929 gen7_mfd_vc1_decode_picture(VADriverContextP ctx,
1930                             struct decode_state *decode_state,
1931                             struct gen7_mfd_context *gen7_mfd_context)
1932 {
1933     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1934     VAPictureParameterBufferVC1 *pic_param;
1935     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1936     dri_bo *slice_data_bo;
1937     int i, j;
1938
1939     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1940     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1941
1942     gen7_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1943     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1944     intel_batchbuffer_emit_mi_flush(batch);
1945     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1946     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1947     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1948     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1949     gen7_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1950     gen7_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1951     gen7_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1952
1953     for (j = 0; j < decode_state->num_slice_params; j++) {
1954         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1955         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1956         slice_data_bo = decode_state->slice_datas[j]->bo;
1957         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1958
1959         if (j == decode_state->num_slice_params - 1)
1960             next_slice_group_param = NULL;
1961         else
1962             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1963
1964         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1965             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1966
1967             if (i < decode_state->slice_params[j]->num_elements - 1)
1968                 next_slice_param = slice_param + 1;
1969             else
1970                 next_slice_param = next_slice_group_param;
1971
1972             gen7_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1973             slice_param++;
1974         }
1975     }
1976
1977     intel_batchbuffer_end_atomic(batch);
1978     intel_batchbuffer_flush(batch);
1979 }
1980
1981 static void 
1982 gen7_mfd_decode_picture(VADriverContextP ctx, 
1983                         VAProfile profile, 
1984                         union codec_state *codec_state,
1985                         struct hw_context *hw_context)
1986
1987 {
1988     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
1989     struct decode_state *decode_state = &codec_state->dec;
1990
1991     assert(gen7_mfd_context);
1992
1993     switch (profile) {
1994     case VAProfileMPEG2Simple:
1995     case VAProfileMPEG2Main:
1996         gen7_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
1997         break;
1998         
1999     case VAProfileH264Baseline:
2000     case VAProfileH264Main:
2001     case VAProfileH264High:
2002         gen7_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
2003         break;
2004
2005     case VAProfileVC1Simple:
2006     case VAProfileVC1Main:
2007     case VAProfileVC1Advanced:
2008         gen7_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
2009         break;
2010
2011     default:
2012         assert(0);
2013         break;
2014     }
2015 }
2016
2017 static void
2018 gen7_mfd_context_destroy(void *hw_context)
2019 {
2020     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2021
2022     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2023     gen7_mfd_context->post_deblocking_output.bo = NULL;
2024
2025     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2026     gen7_mfd_context->pre_deblocking_output.bo = NULL;
2027
2028     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2029     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2030
2031     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2032     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2033
2034     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2035     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2036
2037     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2038     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2039
2040     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
2041     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2042
2043     intel_batchbuffer_free(gen7_mfd_context->base.batch);
2044     free(gen7_mfd_context);
2045 }
2046
2047 struct hw_context *
2048 gen7_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
2049 {
2050     struct intel_driver_data *intel = intel_driver_data(ctx);
2051     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
2052     int i;
2053
2054     gen7_mfd_context->base.destroy = gen7_mfd_context_destroy;
2055     gen7_mfd_context->base.run = gen7_mfd_decode_picture;
2056     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
2057
2058     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
2059         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
2060         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
2061     }
2062
2063     return (struct hw_context *)gen7_mfd_context;
2064 }