Allow to create batchbuffer based on the expected buffer size
[profile/ivi/vaapi-intel-driver.git] / src / gen75_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #ifndef HAVE_GEN_AVC_SURFACE
31 #define HAVE_GEN_AVC_SURFACE 1
32 #endif
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 #include <va/va_dec_jpeg.h>
39
40 #include "intel_batchbuffer.h"
41 #include "intel_driver.h"
42
43 #include "i965_defines.h"
44 #include "i965_drv_video.h"
45 #include "i965_decoder_utils.h"
46
47 #include "gen7_mfd.h"
48 #include "intel_media.h"
49
50 #define B0_STEP_REV             2
51 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
52
53 static const uint32_t zigzag_direct[64] = {
54     0,   1,  8, 16,  9,  2,  3, 10,
55     17, 24, 32, 25, 18, 11,  4,  5,
56     12, 19, 26, 33, 40, 48, 41, 34,
57     27, 20, 13,  6,  7, 14, 21, 28,
58     35, 42, 49, 56, 57, 50, 43, 36,
59     29, 22, 15, 23, 30, 37, 44, 51,
60     58, 59, 52, 45, 38, 31, 39, 46,
61     53, 60, 61, 54, 47, 55, 62, 63
62 };
63
64 static void
65 gen75_mfd_avc_frame_store_index(VADriverContextP ctx,
66                                VAPictureParameterBufferH264 *pic_param,
67                                struct gen7_mfd_context *gen7_mfd_context)
68 {
69     struct i965_driver_data *i965 = i965_driver_data(ctx);
70     int i, j;
71
72     assert(ARRAY_ELEMS(gen7_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
73
74     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
75         int found = 0;
76
77         if (gen7_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
78             continue;
79
80         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
81             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
82             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
83                 continue;
84
85             if (gen7_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
86                 found = 1;
87                 break;
88             }
89         }
90
91         if (!found) {
92             struct object_surface *obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
93             obj_surface->flags &= ~SURFACE_REFERENCED;
94
95             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
96                 dri_bo_unreference(obj_surface->bo);
97                 obj_surface->bo = NULL;
98                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
99             }
100
101             if (obj_surface->free_private_data)
102                 obj_surface->free_private_data(&obj_surface->private_data);
103
104             gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
105             gen7_mfd_context->reference_surface[i].frame_store_id = -1;
106         }
107     }
108
109     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
110         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
111         int found = 0;
112
113         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
114             continue;
115
116         for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
117             if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
118                 continue;
119             
120             if (gen7_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
121                 found = 1;
122                 break;
123             }
124         }
125
126         if (!found) {
127             int frame_idx;
128             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
129             
130             assert(obj_surface);
131             i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
132
133             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface); frame_idx++) {
134                 for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
135                     if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
136                         continue;
137
138                     if (gen7_mfd_context->reference_surface[j].frame_store_id == frame_idx)
139                         break;
140                 }
141
142                 if (j == ARRAY_ELEMS(gen7_mfd_context->reference_surface))
143                     break;
144             }
145
146             assert(frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface));
147
148             for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
149                 if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
150                     gen7_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
151                     gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
152                     break;
153                 }
154             }
155         }
156     }
157
158     /* sort */
159     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface) - 1; i++) {
160         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
161             gen7_mfd_context->reference_surface[i].frame_store_id == i)
162             continue;
163
164         for (j = i + 1; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
165             if (gen7_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
166                 gen7_mfd_context->reference_surface[j].frame_store_id == i) {
167                 VASurfaceID id = gen7_mfd_context->reference_surface[i].surface_id;
168                 int frame_idx = gen7_mfd_context->reference_surface[i].frame_store_id;
169
170                 gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[j].surface_id;
171                 gen7_mfd_context->reference_surface[i].frame_store_id = gen7_mfd_context->reference_surface[j].frame_store_id;
172                 gen7_mfd_context->reference_surface[j].surface_id = id;
173                 gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
174                 break;
175             }
176         }
177     }
178 }
179
180 static void
181 gen75_mfd_init_avc_surface(VADriverContextP ctx, 
182                           VAPictureParameterBufferH264 *pic_param,
183                           struct object_surface *obj_surface)
184 {
185     struct i965_driver_data *i965 = i965_driver_data(ctx);
186     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
187     int width_in_mbs, height_in_mbs;
188
189     obj_surface->free_private_data = gen_free_avc_surface;
190     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
191     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
192
193     if (!gen7_avc_surface) {
194         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
195         assert((obj_surface->size & 0x3f) == 0);
196         obj_surface->private_data = gen7_avc_surface;
197     }
198
199     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
200                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
201
202     if (gen7_avc_surface->dmv_top == NULL) {
203         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
204                                                  "direct mv w/r buffer",
205                                                  width_in_mbs * height_in_mbs * 128,
206                                                  0x1000);
207         assert(gen7_avc_surface->dmv_top);
208     }
209
210     if (gen7_avc_surface->dmv_bottom_flag &&
211         gen7_avc_surface->dmv_bottom == NULL) {
212         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
213                                                     "direct mv w/r buffer",
214                                                     width_in_mbs * height_in_mbs * 128,                                                    
215                                                     0x1000);
216         assert(gen7_avc_surface->dmv_bottom);
217     }
218 }
219
220 static void
221 gen75_mfd_pipe_mode_select(VADriverContextP ctx,
222                           struct decode_state *decode_state,
223                           int standard_select,
224                           struct gen7_mfd_context *gen7_mfd_context)
225 {
226     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
227
228     assert(standard_select == MFX_FORMAT_MPEG2 ||
229            standard_select == MFX_FORMAT_AVC ||
230            standard_select == MFX_FORMAT_VC1 ||
231            standard_select == MFX_FORMAT_JPEG);
232
233     BEGIN_BCS_BATCH(batch, 5);
234     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
235     OUT_BCS_BATCH(batch,
236                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
237                   (MFD_MODE_VLD << 15) | /* VLD mode */
238                   (0 << 10) | /* disable Stream-Out */
239                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
240                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
241                   (0 << 5)  | /* not in stitch mode */
242                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
243                   (standard_select << 0));
244     OUT_BCS_BATCH(batch,
245                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
246                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
247                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
248                   (0 << 1)  |
249                   (0 << 0));
250     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
251     OUT_BCS_BATCH(batch, 0); /* reserved */
252     ADVANCE_BCS_BATCH(batch);
253 }
254
255 static void
256 gen75_mfd_surface_state(VADriverContextP ctx,
257                        struct decode_state *decode_state,
258                        int standard_select,
259                        struct gen7_mfd_context *gen7_mfd_context)
260 {
261     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
262     struct i965_driver_data *i965 = i965_driver_data(ctx);
263     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
264     unsigned int y_cb_offset;
265     unsigned int y_cr_offset;
266
267     assert(obj_surface);
268
269     y_cb_offset = obj_surface->y_cb_offset;
270     y_cr_offset = obj_surface->y_cr_offset;
271
272     BEGIN_BCS_BATCH(batch, 6);
273     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
274     OUT_BCS_BATCH(batch, 0);
275     OUT_BCS_BATCH(batch,
276                   ((obj_surface->orig_height - 1) << 18) |
277                   ((obj_surface->orig_width - 1) << 4));
278     OUT_BCS_BATCH(batch,
279                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
280                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
281                   (0 << 22) | /* surface object control state, ignored */
282                   ((obj_surface->width - 1) << 3) | /* pitch */
283                   (0 << 2)  | /* must be 0 */
284                   (1 << 1)  | /* must be tiled */
285                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
286     OUT_BCS_BATCH(batch,
287                   (0 << 16) | /* X offset for U(Cb), must be 0 */
288                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
289     OUT_BCS_BATCH(batch,
290                   (0 << 16) | /* X offset for V(Cr), must be 0 */
291                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
292     ADVANCE_BCS_BATCH(batch);
293 }
294
295 static void
296 gen75_mfd_pipe_buf_addr_state_bplus(VADriverContextP ctx,
297                              struct decode_state *decode_state,
298                              int standard_select,
299                              struct gen7_mfd_context *gen7_mfd_context)
300 {
301     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
302     struct i965_driver_data *i965 = i965_driver_data(ctx);
303     int i;
304
305     BEGIN_BCS_BATCH(batch, 61);
306     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
307         /* Pre-deblock 1-3 */
308     if (gen7_mfd_context->pre_deblocking_output.valid)
309         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
310                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
311                       0);
312     else
313         OUT_BCS_BATCH(batch, 0);
314
315         OUT_BCS_BATCH(batch, 0);
316         OUT_BCS_BATCH(batch, 0);
317         /* Post-debloing 4-6 */
318     if (gen7_mfd_context->post_deblocking_output.valid)
319         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
320                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
321                       0);
322     else
323         OUT_BCS_BATCH(batch, 0);
324
325         OUT_BCS_BATCH(batch, 0);
326         OUT_BCS_BATCH(batch, 0);
327
328         /* uncompressed-video & stream out 7-12 */
329     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
330     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
331         OUT_BCS_BATCH(batch, 0);
332         OUT_BCS_BATCH(batch, 0);
333         OUT_BCS_BATCH(batch, 0);
334         OUT_BCS_BATCH(batch, 0);
335
336         /* intra row-store scratch 13-15 */
337     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
338         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
339                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
340                       0);
341     else
342         OUT_BCS_BATCH(batch, 0);
343
344         OUT_BCS_BATCH(batch, 0);
345         OUT_BCS_BATCH(batch, 0);
346         /* deblocking-filter-row-store 16-18 */
347     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
348         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
349                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
350                       0);
351     else
352         OUT_BCS_BATCH(batch, 0);
353         OUT_BCS_BATCH(batch, 0);
354         OUT_BCS_BATCH(batch, 0);
355
356     /* DW 19..50 */
357     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
358         struct object_surface *obj_surface;
359
360         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
361             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
362             assert(obj_surface && obj_surface->bo);
363
364             OUT_BCS_RELOC(batch, obj_surface->bo,
365                           I915_GEM_DOMAIN_INSTRUCTION, 0,
366                           0);
367         } else {
368             OUT_BCS_BATCH(batch, 0);
369         }
370             OUT_BCS_BATCH(batch, 0);
371     }
372         /* reference property 51 */
373     OUT_BCS_BATCH(batch, 0);  
374         
375         /* Macroblock status & ILDB 52-57 */
376         OUT_BCS_BATCH(batch, 0);
377         OUT_BCS_BATCH(batch, 0);
378         OUT_BCS_BATCH(batch, 0);
379         OUT_BCS_BATCH(batch, 0);
380         OUT_BCS_BATCH(batch, 0);
381         OUT_BCS_BATCH(batch, 0);
382
383         /* the second Macroblock status 58-60 */        
384         OUT_BCS_BATCH(batch, 0);
385         OUT_BCS_BATCH(batch, 0);
386         OUT_BCS_BATCH(batch, 0);
387     ADVANCE_BCS_BATCH(batch);
388 }
389
390 static void
391 gen75_mfd_pipe_buf_addr_state(VADriverContextP ctx,
392                              struct decode_state *decode_state,
393                              int standard_select,
394                              struct gen7_mfd_context *gen7_mfd_context)
395 {
396     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
397     struct i965_driver_data *i965 = i965_driver_data(ctx);
398     int i;
399
400         if (IS_STEPPING_BPLUS(i965)) {
401                 gen75_mfd_pipe_buf_addr_state_bplus(ctx, decode_state,
402                                 standard_select, gen7_mfd_context);
403                 return;
404         }
405     BEGIN_BCS_BATCH(batch, 25);
406     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
407     if (gen7_mfd_context->pre_deblocking_output.valid)
408         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
409                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
410                       0);
411     else
412         OUT_BCS_BATCH(batch, 0);
413
414     if (gen7_mfd_context->post_deblocking_output.valid)
415         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
416                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
417                       0);
418     else
419         OUT_BCS_BATCH(batch, 0);
420
421     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
422     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
423
424     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
425         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
426                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
427                       0);
428     else
429         OUT_BCS_BATCH(batch, 0);
430
431     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
432         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
433                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
434                       0);
435     else
436         OUT_BCS_BATCH(batch, 0);
437
438     /* DW 7..22 */
439     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
440         struct object_surface *obj_surface;
441
442         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
443             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
444             assert(obj_surface && obj_surface->bo);
445
446             OUT_BCS_RELOC(batch, obj_surface->bo,
447                           I915_GEM_DOMAIN_INSTRUCTION, 0,
448                           0);
449         } else {
450             OUT_BCS_BATCH(batch, 0);
451         }
452     }
453
454     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
455     OUT_BCS_BATCH(batch, 0);   /* ignore DW24 for decoding */
456     ADVANCE_BCS_BATCH(batch);
457 }
458
459 static void
460 gen75_mfd_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
461                                  dri_bo *slice_data_bo,
462                                  int standard_select,
463                                  struct gen7_mfd_context *gen7_mfd_context)
464 {
465     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
466
467     BEGIN_BCS_BATCH(batch, 26);
468     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
469         /* MFX In BS 1-5 */
470     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
471     OUT_BCS_BATCH(batch, 0);
472     OUT_BCS_BATCH(batch, 0);
473         /* Upper bound 4-5 */   
474     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
475     OUT_BCS_BATCH(batch, 0);
476
477         /* MFX indirect MV 6-10 */
478     OUT_BCS_BATCH(batch, 0);
479     OUT_BCS_BATCH(batch, 0);
480     OUT_BCS_BATCH(batch, 0);
481     OUT_BCS_BATCH(batch, 0);
482     OUT_BCS_BATCH(batch, 0);
483         
484         /* MFX IT_COFF 11-15 */
485     OUT_BCS_BATCH(batch, 0);
486     OUT_BCS_BATCH(batch, 0);
487     OUT_BCS_BATCH(batch, 0);
488     OUT_BCS_BATCH(batch, 0);
489     OUT_BCS_BATCH(batch, 0);
490
491         /* MFX IT_DBLK 16-20 */
492     OUT_BCS_BATCH(batch, 0);
493     OUT_BCS_BATCH(batch, 0);
494     OUT_BCS_BATCH(batch, 0);
495     OUT_BCS_BATCH(batch, 0);
496     OUT_BCS_BATCH(batch, 0);
497
498         /* MFX PAK_BSE object for encoder 21-25 */
499     OUT_BCS_BATCH(batch, 0);
500     OUT_BCS_BATCH(batch, 0);
501     OUT_BCS_BATCH(batch, 0);
502     OUT_BCS_BATCH(batch, 0);
503     OUT_BCS_BATCH(batch, 0);
504
505     ADVANCE_BCS_BATCH(batch);
506 }
507
508 static void
509 gen75_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
510                                  dri_bo *slice_data_bo,
511                                  int standard_select,
512                                  struct gen7_mfd_context *gen7_mfd_context)
513 {
514     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
515
516     struct i965_driver_data *i965 = i965_driver_data(ctx);
517
518         if (IS_STEPPING_BPLUS(i965)) {
519                 gen75_mfd_ind_obj_base_addr_state_bplus(ctx, slice_data_bo,
520                                         standard_select, gen7_mfd_context);
521                 return;
522         }
523     BEGIN_BCS_BATCH(batch, 11);
524     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
525     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
526     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
527     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
528     OUT_BCS_BATCH(batch, 0);
529     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
530     OUT_BCS_BATCH(batch, 0);
531     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
532     OUT_BCS_BATCH(batch, 0);
533     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
534     OUT_BCS_BATCH(batch, 0);
535     ADVANCE_BCS_BATCH(batch);
536 }
537
538 static void
539 gen75_mfd_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
540                                  struct decode_state *decode_state,
541                                  int standard_select,
542                                  struct gen7_mfd_context *gen7_mfd_context)
543 {
544     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
545
546     BEGIN_BCS_BATCH(batch, 10);
547     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
548
549     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
550         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
551                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
552                       0);
553         else
554                 OUT_BCS_BATCH(batch, 0);
555                 
556     OUT_BCS_BATCH(batch, 0);
557     OUT_BCS_BATCH(batch, 0);
558         /* MPR Row Store Scratch buffer 4-6 */
559     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
560         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
561                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
562                       0);
563     else
564             OUT_BCS_BATCH(batch, 0);
565     OUT_BCS_BATCH(batch, 0);
566     OUT_BCS_BATCH(batch, 0);
567
568         /* Bitplane 7-9 */ 
569     if (gen7_mfd_context->bitplane_read_buffer.valid)
570         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
571                       I915_GEM_DOMAIN_INSTRUCTION, 0,
572                       0);
573     else
574         OUT_BCS_BATCH(batch, 0);
575     OUT_BCS_BATCH(batch, 0);
576     OUT_BCS_BATCH(batch, 0);
577     ADVANCE_BCS_BATCH(batch);
578 }
579
580 static void
581 gen75_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
582                                  struct decode_state *decode_state,
583                                  int standard_select,
584                                  struct gen7_mfd_context *gen7_mfd_context)
585 {
586     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
587     struct i965_driver_data *i965 = i965_driver_data(ctx);
588
589         if (IS_STEPPING_BPLUS(i965)) {
590                 gen75_mfd_bsp_buf_base_addr_state_bplus(ctx, decode_state,
591                                         standard_select, gen7_mfd_context);
592                 return;
593         }
594
595     BEGIN_BCS_BATCH(batch, 4);
596     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
597
598     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
599         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
600                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
601                       0);
602     else
603         OUT_BCS_BATCH(batch, 0);
604                 
605     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
606         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
607                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
608                       0);
609     else
610         OUT_BCS_BATCH(batch, 0);
611
612     if (gen7_mfd_context->bitplane_read_buffer.valid)
613         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
614                       I915_GEM_DOMAIN_INSTRUCTION, 0,
615                       0);
616     else
617         OUT_BCS_BATCH(batch, 0);
618
619     ADVANCE_BCS_BATCH(batch);
620 }
621
622 #if 0
623 static void
624 gen7_mfd_aes_state(VADriverContextP ctx,
625                    struct decode_state *decode_state,
626                    int standard_select)
627 {
628     /* FIXME */
629 }
630 #endif
631
632 static void
633 gen75_mfd_qm_state(VADriverContextP ctx,
634                   int qm_type,
635                   unsigned char *qm,
636                   int qm_length,
637                   struct gen7_mfd_context *gen7_mfd_context)
638 {
639     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
640     unsigned int qm_buffer[16];
641
642     assert(qm_length <= 16 * 4);
643     memcpy(qm_buffer, qm, qm_length);
644
645     BEGIN_BCS_BATCH(batch, 18);
646     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
647     OUT_BCS_BATCH(batch, qm_type << 0);
648     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
649     ADVANCE_BCS_BATCH(batch);
650 }
651
652 #if 0
653 static void
654 gen7_mfd_wait(VADriverContextP ctx,
655               struct decode_state *decode_state,
656               int standard_select,
657               struct gen7_mfd_context *gen7_mfd_context)
658 {
659     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
660
661     BEGIN_BCS_BATCH(batch, 1);
662     OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8));
663     ADVANCE_BCS_BATCH(batch);
664 }
665 #endif
666
667 static void
668 gen75_mfd_avc_img_state(VADriverContextP ctx,
669                        struct decode_state *decode_state,
670                        struct gen7_mfd_context *gen7_mfd_context)
671 {
672     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
673     int img_struct;
674     int mbaff_frame_flag;
675     unsigned int width_in_mbs, height_in_mbs;
676     VAPictureParameterBufferH264 *pic_param;
677
678     assert(decode_state->pic_param && decode_state->pic_param->buffer);
679     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
680     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
681
682     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
683         img_struct = 1;
684     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
685         img_struct = 3;
686     else
687         img_struct = 0;
688
689     if ((img_struct & 0x1) == 0x1) {
690         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
691     } else {
692         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
693     }
694
695     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
696         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
697         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
698     } else {
699         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
700     }
701
702     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
703                         !pic_param->pic_fields.bits.field_pic_flag);
704
705     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
706     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
707
708     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
709     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
710            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
711     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
712
713     BEGIN_BCS_BATCH(batch, 17);
714     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
715     OUT_BCS_BATCH(batch, 
716                   width_in_mbs * height_in_mbs);
717     OUT_BCS_BATCH(batch, 
718                   ((height_in_mbs - 1) << 16) | 
719                   ((width_in_mbs - 1) << 0));
720     OUT_BCS_BATCH(batch, 
721                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
722                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
723                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
724                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
725                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
726                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
727                   (img_struct << 8));
728     OUT_BCS_BATCH(batch,
729                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
730                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
731                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
732                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
733                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
734                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
735                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
736                   (mbaff_frame_flag << 1) |
737                   (pic_param->pic_fields.bits.field_pic_flag << 0));
738     OUT_BCS_BATCH(batch, 0);
739     OUT_BCS_BATCH(batch, 0);
740     OUT_BCS_BATCH(batch, 0);
741     OUT_BCS_BATCH(batch, 0);
742     OUT_BCS_BATCH(batch, 0);
743     OUT_BCS_BATCH(batch, 0);
744     OUT_BCS_BATCH(batch, 0);
745     OUT_BCS_BATCH(batch, 0);
746     OUT_BCS_BATCH(batch, 0);
747     OUT_BCS_BATCH(batch, 0);
748     OUT_BCS_BATCH(batch, 0);
749     OUT_BCS_BATCH(batch, 0);
750     ADVANCE_BCS_BATCH(batch);
751 }
752
753 static void
754 gen75_mfd_avc_qm_state(VADriverContextP ctx,
755                       struct decode_state *decode_state,
756                       struct gen7_mfd_context *gen7_mfd_context)
757 {
758     VAIQMatrixBufferH264 *iq_matrix;
759     VAPictureParameterBufferH264 *pic_param;
760
761     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
762         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
763     else
764         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
765
766     assert(decode_state->pic_param && decode_state->pic_param->buffer);
767     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
768
769     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
770     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
771
772     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
773         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
774         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
775     }
776 }
777
778 static void
779 gen75_mfd_avc_picid_state(VADriverContextP ctx,
780                       struct decode_state *decode_state,
781                       struct gen7_mfd_context *gen7_mfd_context)
782 {
783     struct i965_driver_data *i965 = i965_driver_data(ctx);
784     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
785
786     BEGIN_BCS_BATCH(batch, 10);
787     OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
788     OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
789     OUT_BCS_BATCH(batch, 0);
790     OUT_BCS_BATCH(batch, 0);
791     OUT_BCS_BATCH(batch, 0);
792     OUT_BCS_BATCH(batch, 0);
793     OUT_BCS_BATCH(batch, 0);
794     OUT_BCS_BATCH(batch, 0);
795     OUT_BCS_BATCH(batch, 0);
796     OUT_BCS_BATCH(batch, 0);
797     ADVANCE_BCS_BATCH(batch);
798 }
799
800 static void
801 gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx,
802                               VAPictureParameterBufferH264 *pic_param,
803                               VASliceParameterBufferH264 *slice_param,
804                               struct gen7_mfd_context *gen7_mfd_context)
805 {
806     struct i965_driver_data *i965 = i965_driver_data(ctx);
807     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
808     struct object_surface *obj_surface;
809     GenAvcSurface *gen7_avc_surface;
810     VAPictureH264 *va_pic;
811     int i, j;
812
813     BEGIN_BCS_BATCH(batch, 71);
814     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
815
816     /* reference surfaces 0..15 */
817     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
818         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
819             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
820             assert(obj_surface);
821             gen7_avc_surface = obj_surface->private_data;
822
823             if (gen7_avc_surface == NULL) {
824                 OUT_BCS_BATCH(batch, 0);
825                 OUT_BCS_BATCH(batch, 0);
826             } else {
827                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
828                               I915_GEM_DOMAIN_INSTRUCTION, 0,
829                               0);
830                 OUT_BCS_BATCH(batch, 0);
831             }
832         } else {
833             OUT_BCS_BATCH(batch, 0);
834             OUT_BCS_BATCH(batch, 0);
835         }
836     }
837         OUT_BCS_BATCH(batch, 0);
838
839     /* the current decoding frame/field */
840     va_pic = &pic_param->CurrPic;
841     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
842     obj_surface = SURFACE(va_pic->picture_id);
843     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
844     gen7_avc_surface = obj_surface->private_data;
845
846     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
847                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
848                   0);
849
850         OUT_BCS_BATCH(batch, 0);
851         OUT_BCS_BATCH(batch, 0);
852
853     /* POC List */
854     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
855         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
856             int found = 0;
857             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
858                 va_pic = &pic_param->ReferenceFrames[j];
859                 
860                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
861                     continue;
862
863                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
864                     found = 1;
865                     break;
866                 }
867             }
868
869             assert(found == 1);
870             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
871             
872             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
873             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
874         } else {
875             OUT_BCS_BATCH(batch, 0);
876             OUT_BCS_BATCH(batch, 0);
877         }
878     }
879
880     va_pic = &pic_param->CurrPic;
881     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
882     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
883
884     ADVANCE_BCS_BATCH(batch);
885 }
886
887 static void
888 gen75_mfd_avc_directmode_state(VADriverContextP ctx,
889                               VAPictureParameterBufferH264 *pic_param,
890                               VASliceParameterBufferH264 *slice_param,
891                               struct gen7_mfd_context *gen7_mfd_context)
892 {
893     struct i965_driver_data *i965 = i965_driver_data(ctx);
894     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
895     struct object_surface *obj_surface;
896     GenAvcSurface *gen7_avc_surface;
897     VAPictureH264 *va_pic;
898     int i, j;
899
900         if (IS_STEPPING_BPLUS(i965)) {
901                 gen75_mfd_avc_directmode_state_bplus(ctx, pic_param, slice_param,
902                         gen7_mfd_context);
903
904                 return;
905         }
906
907     BEGIN_BCS_BATCH(batch, 69);
908     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
909
910     /* reference surfaces 0..15 */
911     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
912         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
913             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
914             assert(obj_surface);
915             gen7_avc_surface = obj_surface->private_data;
916
917             if (gen7_avc_surface == NULL) {
918                 OUT_BCS_BATCH(batch, 0);
919                 OUT_BCS_BATCH(batch, 0);
920             } else {
921                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
922                               I915_GEM_DOMAIN_INSTRUCTION, 0,
923                               0);
924
925                 if (gen7_avc_surface->dmv_bottom_flag == 1)
926                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
927                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
928                                   0);
929                 else
930                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
931                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
932                                   0);
933             }
934         } else {
935             OUT_BCS_BATCH(batch, 0);
936             OUT_BCS_BATCH(batch, 0);
937         }
938     }
939
940     /* the current decoding frame/field */
941     va_pic = &pic_param->CurrPic;
942     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
943     obj_surface = SURFACE(va_pic->picture_id);
944     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
945     gen7_avc_surface = obj_surface->private_data;
946
947     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
948                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
949                   0);
950
951     if (gen7_avc_surface->dmv_bottom_flag == 1)
952         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
953                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
954                       0);
955     else
956         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
957                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
958                       0);
959
960     /* POC List */
961     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
962         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
963             int found = 0;
964             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
965                 va_pic = &pic_param->ReferenceFrames[j];
966                 
967                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
968                     continue;
969
970                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
971                     found = 1;
972                     break;
973                 }
974             }
975
976             assert(found == 1);
977             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
978             
979             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
980             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
981         } else {
982             OUT_BCS_BATCH(batch, 0);
983             OUT_BCS_BATCH(batch, 0);
984         }
985     }
986
987     va_pic = &pic_param->CurrPic;
988     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
989     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
990
991     ADVANCE_BCS_BATCH(batch);
992 }
993
994 static void
995 gen75_mfd_avc_slice_state(VADriverContextP ctx,
996                          VAPictureParameterBufferH264 *pic_param,
997                          VASliceParameterBufferH264 *slice_param,
998                          VASliceParameterBufferH264 *next_slice_param,
999                          struct gen7_mfd_context *gen7_mfd_context)
1000 {
1001     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1002     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
1003     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
1004     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
1005     int num_ref_idx_l0, num_ref_idx_l1;
1006     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
1007                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
1008     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
1009     int slice_type;
1010
1011     if (slice_param->slice_type == SLICE_TYPE_I ||
1012         slice_param->slice_type == SLICE_TYPE_SI) {
1013         slice_type = SLICE_TYPE_I;
1014     } else if (slice_param->slice_type == SLICE_TYPE_P ||
1015                slice_param->slice_type == SLICE_TYPE_SP) {
1016         slice_type = SLICE_TYPE_P;
1017     } else { 
1018         assert(slice_param->slice_type == SLICE_TYPE_B);
1019         slice_type = SLICE_TYPE_B;
1020     }
1021
1022     if (slice_type == SLICE_TYPE_I) {
1023         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
1024         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
1025         num_ref_idx_l0 = 0;
1026         num_ref_idx_l1 = 0;
1027     } else if (slice_type == SLICE_TYPE_P) {
1028         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
1029         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
1030         num_ref_idx_l1 = 0;
1031     } else {
1032         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
1033         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
1034     }
1035
1036     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
1037     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
1038     slice_ver_pos = first_mb_in_slice / width_in_mbs;
1039
1040     if (next_slice_param) {
1041         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
1042         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
1043         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
1044     } else {
1045         next_slice_hor_pos = 0;
1046         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
1047     }
1048
1049     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
1050     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
1051     OUT_BCS_BATCH(batch, slice_type);
1052     OUT_BCS_BATCH(batch, 
1053                   (num_ref_idx_l1 << 24) |
1054                   (num_ref_idx_l0 << 16) |
1055                   (slice_param->chroma_log2_weight_denom << 8) |
1056                   (slice_param->luma_log2_weight_denom << 0));
1057     OUT_BCS_BATCH(batch, 
1058                   (slice_param->direct_spatial_mv_pred_flag << 29) |
1059                   (slice_param->disable_deblocking_filter_idc << 27) |
1060                   (slice_param->cabac_init_idc << 24) |
1061                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
1062                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
1063                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
1064     OUT_BCS_BATCH(batch, 
1065                   (slice_ver_pos << 24) |
1066                   (slice_hor_pos << 16) | 
1067                   (first_mb_in_slice << 0));
1068     OUT_BCS_BATCH(batch,
1069                   (next_slice_ver_pos << 16) |
1070                   (next_slice_hor_pos << 0));
1071     OUT_BCS_BATCH(batch, 
1072                   (next_slice_param == NULL) << 19); /* last slice flag */
1073     OUT_BCS_BATCH(batch, 0);
1074     OUT_BCS_BATCH(batch, 0);
1075     OUT_BCS_BATCH(batch, 0);
1076     OUT_BCS_BATCH(batch, 0);
1077     ADVANCE_BCS_BATCH(batch);
1078 }
1079
1080 static inline void
1081 gen75_mfd_avc_ref_idx_state(VADriverContextP ctx,
1082                            VAPictureParameterBufferH264 *pic_param,
1083                            VASliceParameterBufferH264 *slice_param,
1084                            struct gen7_mfd_context *gen7_mfd_context)
1085 {
1086     gen6_send_avc_ref_idx_state(
1087         gen7_mfd_context->base.batch,
1088         slice_param,
1089         gen7_mfd_context->reference_surface
1090     );
1091 }
1092
1093 static void
1094 gen75_mfd_avc_weightoffset_state(VADriverContextP ctx,
1095                                 VAPictureParameterBufferH264 *pic_param,
1096                                 VASliceParameterBufferH264 *slice_param,
1097                                 struct gen7_mfd_context *gen7_mfd_context)
1098 {
1099     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1100     int i, j, num_weight_offset_table = 0;
1101     short weightoffsets[32 * 6];
1102
1103     if ((slice_param->slice_type == SLICE_TYPE_P ||
1104          slice_param->slice_type == SLICE_TYPE_SP) &&
1105         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
1106         num_weight_offset_table = 1;
1107     }
1108     
1109     if ((slice_param->slice_type == SLICE_TYPE_B) &&
1110         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
1111         num_weight_offset_table = 2;
1112     }
1113
1114     for (i = 0; i < num_weight_offset_table; i++) {
1115         BEGIN_BCS_BATCH(batch, 98);
1116         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
1117         OUT_BCS_BATCH(batch, i);
1118
1119         if (i == 0) {
1120             for (j = 0; j < 32; j++) {
1121                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
1122                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
1123                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
1124                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
1125                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
1126                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
1127             }
1128         } else {
1129             for (j = 0; j < 32; j++) {
1130                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
1131                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
1132                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
1133                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
1134                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
1135                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
1136             }
1137         }
1138
1139         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
1140         ADVANCE_BCS_BATCH(batch);
1141     }
1142 }
1143
1144 static int
1145 gen75_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
1146 {
1147     int out_slice_data_bit_offset;
1148     int slice_header_size = in_slice_data_bit_offset / 8;
1149     int i, j;
1150
1151     for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1152         if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
1153             i++, j += 2;
1154         }
1155     }
1156
1157     out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1158
1159     if (mode_flag == ENTROPY_CABAC)
1160         out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
1161
1162     return out_slice_data_bit_offset;
1163 }
1164
1165 static void
1166 gen75_mfd_avc_bsd_object(VADriverContextP ctx,
1167                         VAPictureParameterBufferH264 *pic_param,
1168                         VASliceParameterBufferH264 *slice_param,
1169                         dri_bo *slice_data_bo,
1170                         VASliceParameterBufferH264 *next_slice_param,
1171                         struct gen7_mfd_context *gen7_mfd_context)
1172 {
1173     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1174     int slice_data_bit_offset;
1175     uint8_t *slice_data = NULL;
1176
1177     dri_bo_map(slice_data_bo, 0);
1178     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1179     slice_data_bit_offset = gen75_mfd_avc_get_slice_bit_offset(slice_data,
1180                                                               pic_param->pic_fields.bits.entropy_coding_mode_flag,
1181                                                               slice_param->slice_data_bit_offset);
1182     dri_bo_unmap(slice_data_bo);
1183
1184     /* the input bitsteam format on GEN7 differs from GEN6 */
1185     BEGIN_BCS_BATCH(batch, 6);
1186     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
1187     OUT_BCS_BATCH(batch, 
1188                   (slice_param->slice_data_size));
1189     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
1190     OUT_BCS_BATCH(batch,
1191                   (0 << 31) |
1192                   (0 << 14) |
1193                   (0 << 12) |
1194                   (0 << 10) |
1195                   (0 << 8));
1196     OUT_BCS_BATCH(batch,
1197                   ((slice_data_bit_offset >> 3) << 16) |
1198                   (0 << 5)  |
1199                   (0 << 4)  |
1200                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
1201                   (slice_data_bit_offset & 0x7));
1202     OUT_BCS_BATCH(batch, 0);
1203     ADVANCE_BCS_BATCH(batch);
1204 }
1205
1206 static inline void
1207 gen75_mfd_avc_context_init(
1208     VADriverContextP         ctx,
1209     struct gen7_mfd_context *gen7_mfd_context
1210 )
1211 {
1212     /* Initialize flat scaling lists */
1213     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
1214 }
1215
1216 static void
1217 gen75_mfd_avc_decode_init(VADriverContextP ctx,
1218                          struct decode_state *decode_state,
1219                          struct gen7_mfd_context *gen7_mfd_context)
1220 {
1221     VAPictureParameterBufferH264 *pic_param;
1222     VASliceParameterBufferH264 *slice_param;
1223     VAPictureH264 *va_pic;
1224     struct i965_driver_data *i965 = i965_driver_data(ctx);
1225     struct object_surface *obj_surface;
1226     dri_bo *bo;
1227     int i, j, enable_avc_ildb = 0;
1228     unsigned int width_in_mbs, height_in_mbs;
1229
1230     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
1231         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1232         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1233
1234         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1235             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1236             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1237                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1238                    (slice_param->slice_type == SLICE_TYPE_P) ||
1239                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1240                    (slice_param->slice_type == SLICE_TYPE_B));
1241
1242             if (slice_param->disable_deblocking_filter_idc != 1) {
1243                 enable_avc_ildb = 1;
1244                 break;
1245             }
1246
1247             slice_param++;
1248         }
1249     }
1250
1251     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1252     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1253     gen75_mfd_avc_frame_store_index(ctx, pic_param, gen7_mfd_context);
1254     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
1255     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
1256     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
1257     assert(height_in_mbs > 0 && height_in_mbs <= 256);
1258
1259     /* Current decoded picture */
1260     va_pic = &pic_param->CurrPic;
1261     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
1262     obj_surface = SURFACE(va_pic->picture_id);
1263     assert(obj_surface);
1264     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
1265     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
1266     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1267     gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface);
1268
1269     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1270     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1271     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1272     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
1273
1274     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1275     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1276     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1277     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
1278
1279     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1280     bo = dri_bo_alloc(i965->intel.bufmgr,
1281                       "intra row store",
1282                       width_in_mbs * 64,
1283                       0x1000);
1284     assert(bo);
1285     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1286     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1287
1288     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1289     bo = dri_bo_alloc(i965->intel.bufmgr,
1290                       "deblocking filter row store",
1291                       width_in_mbs * 64 * 4,
1292                       0x1000);
1293     assert(bo);
1294     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1295     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1296
1297     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1298     bo = dri_bo_alloc(i965->intel.bufmgr,
1299                       "bsd mpc row store",
1300                       width_in_mbs * 64 * 2,
1301                       0x1000);
1302     assert(bo);
1303     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1304     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1305
1306     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
1307     bo = dri_bo_alloc(i965->intel.bufmgr,
1308                       "mpr row store",
1309                       width_in_mbs * 64 * 2,
1310                       0x1000);
1311     assert(bo);
1312     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
1313     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
1314
1315     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1316 }
1317
1318 static void
1319 gen75_mfd_avc_decode_picture(VADriverContextP ctx,
1320                             struct decode_state *decode_state,
1321                             struct gen7_mfd_context *gen7_mfd_context)
1322 {
1323     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1324     VAPictureParameterBufferH264 *pic_param;
1325     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
1326     dri_bo *slice_data_bo;
1327     int i, j;
1328
1329     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1330     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1331     gen75_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
1332
1333     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1334     intel_batchbuffer_emit_mi_flush(batch);
1335     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1336     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1337     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1338     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1339     gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
1340     gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
1341     gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
1342
1343     for (j = 0; j < decode_state->num_slice_params; j++) {
1344         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1345         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1346         slice_data_bo = decode_state->slice_datas[j]->bo;
1347         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
1348
1349         if (j == decode_state->num_slice_params - 1)
1350             next_slice_group_param = NULL;
1351         else
1352             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1353
1354         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1355             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1356             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1357                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1358                    (slice_param->slice_type == SLICE_TYPE_P) ||
1359                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1360                    (slice_param->slice_type == SLICE_TYPE_B));
1361
1362             if (i < decode_state->slice_params[j]->num_elements - 1)
1363                 next_slice_param = slice_param + 1;
1364             else
1365                 next_slice_param = next_slice_group_param;
1366
1367             gen75_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen7_mfd_context);
1368             gen75_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
1369             gen75_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
1370             gen75_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1371             gen75_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
1372             slice_param++;
1373         }
1374     }
1375
1376     intel_batchbuffer_end_atomic(batch);
1377     intel_batchbuffer_flush(batch);
1378 }
1379
1380 static void
1381 gen75_mfd_mpeg2_decode_init(VADriverContextP ctx,
1382                            struct decode_state *decode_state,
1383                            struct gen7_mfd_context *gen7_mfd_context)
1384 {
1385     VAPictureParameterBufferMPEG2 *pic_param;
1386     struct i965_driver_data *i965 = i965_driver_data(ctx);
1387     struct object_surface *obj_surface;
1388     dri_bo *bo;
1389     unsigned int width_in_mbs;
1390
1391     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1392     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1393     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1394
1395     mpeg2_set_reference_surfaces(
1396         ctx,
1397         gen7_mfd_context->reference_surface,
1398         decode_state,
1399         pic_param
1400     );
1401
1402     /* Current decoded picture */
1403     obj_surface = SURFACE(decode_state->current_render_target);
1404     assert(obj_surface);
1405     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1406
1407     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1408     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1409     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1410     gen7_mfd_context->pre_deblocking_output.valid = 1;
1411
1412     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1413     bo = dri_bo_alloc(i965->intel.bufmgr,
1414                       "bsd mpc row store",
1415                       width_in_mbs * 96,
1416                       0x1000);
1417     assert(bo);
1418     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1419     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1420
1421     gen7_mfd_context->post_deblocking_output.valid = 0;
1422     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1423     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1424     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1425     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1426 }
1427
1428 static void
1429 gen75_mfd_mpeg2_pic_state(VADriverContextP ctx,
1430                          struct decode_state *decode_state,
1431                          struct gen7_mfd_context *gen7_mfd_context)
1432 {
1433     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1434     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1435     VAPictureParameterBufferMPEG2 *pic_param;
1436     unsigned int slice_concealment_disable_bit = 0;
1437
1438     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1439     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1440
1441     slice_concealment_disable_bit = 1;
1442
1443     BEGIN_BCS_BATCH(batch, 13);
1444     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1445     OUT_BCS_BATCH(batch,
1446                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1447                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1448                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1449                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1450                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1451                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1452                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1453                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1454                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1455                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1456                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1457                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1458     OUT_BCS_BATCH(batch,
1459                   pic_param->picture_coding_type << 9);
1460     OUT_BCS_BATCH(batch,
1461                   (slice_concealment_disable_bit << 31) |
1462                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1463                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1464     OUT_BCS_BATCH(batch, 0);
1465     OUT_BCS_BATCH(batch, 0);
1466     OUT_BCS_BATCH(batch, 0);
1467     OUT_BCS_BATCH(batch, 0);
1468     OUT_BCS_BATCH(batch, 0);
1469     OUT_BCS_BATCH(batch, 0);
1470     OUT_BCS_BATCH(batch, 0);
1471     OUT_BCS_BATCH(batch, 0);
1472     OUT_BCS_BATCH(batch, 0);
1473     ADVANCE_BCS_BATCH(batch);
1474 }
1475
1476 static void
1477 gen75_mfd_mpeg2_qm_state(VADriverContextP ctx,
1478                         struct decode_state *decode_state,
1479                         struct gen7_mfd_context *gen7_mfd_context)
1480 {
1481     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1482     int i, j;
1483
1484     /* Update internal QM state */
1485     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1486         VAIQMatrixBufferMPEG2 * const iq_matrix =
1487             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1488
1489         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1490             iq_matrix->load_intra_quantiser_matrix) {
1491             gen_iq_matrix->load_intra_quantiser_matrix =
1492                 iq_matrix->load_intra_quantiser_matrix;
1493             if (iq_matrix->load_intra_quantiser_matrix) {
1494                 for (j = 0; j < 64; j++)
1495                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1496                         iq_matrix->intra_quantiser_matrix[j];
1497             }
1498         }
1499
1500         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1501             iq_matrix->load_non_intra_quantiser_matrix) {
1502             gen_iq_matrix->load_non_intra_quantiser_matrix =
1503                 iq_matrix->load_non_intra_quantiser_matrix;
1504             if (iq_matrix->load_non_intra_quantiser_matrix) {
1505                 for (j = 0; j < 64; j++)
1506                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1507                         iq_matrix->non_intra_quantiser_matrix[j];
1508             }
1509         }
1510     }
1511
1512     /* Commit QM state to HW */
1513     for (i = 0; i < 2; i++) {
1514         unsigned char *qm = NULL;
1515         int qm_type;
1516
1517         if (i == 0) {
1518             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1519                 qm = gen_iq_matrix->intra_quantiser_matrix;
1520                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1521             }
1522         } else {
1523             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1524                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1525                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1526             }
1527         }
1528
1529         if (!qm)
1530             continue;
1531
1532         gen75_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1533     }
1534 }
1535
1536 static void
1537 gen75_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1538                           VAPictureParameterBufferMPEG2 *pic_param,
1539                           VASliceParameterBufferMPEG2 *slice_param,
1540                           VASliceParameterBufferMPEG2 *next_slice_param,
1541                           struct gen7_mfd_context *gen7_mfd_context)
1542 {
1543     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1544     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1545     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1546     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1547
1548     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1549         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1550         is_field_pic = 1;
1551     is_field_pic_wa = is_field_pic &&
1552         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1553
1554     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1555     hpos0 = slice_param->slice_horizontal_position;
1556
1557     if (next_slice_param == NULL) {
1558         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1559         hpos1 = 0;
1560     } else {
1561         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1562         hpos1 = next_slice_param->slice_horizontal_position;
1563     }
1564
1565     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1566
1567     BEGIN_BCS_BATCH(batch, 5);
1568     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1569     OUT_BCS_BATCH(batch, 
1570                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1571     OUT_BCS_BATCH(batch, 
1572                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1573     OUT_BCS_BATCH(batch,
1574                   hpos0 << 24 |
1575                   vpos0 << 16 |
1576                   mb_count << 8 |
1577                   (next_slice_param == NULL) << 5 |
1578                   (next_slice_param == NULL) << 3 |
1579                   (slice_param->macroblock_offset & 0x7));
1580     OUT_BCS_BATCH(batch,
1581                   (slice_param->quantiser_scale_code << 24) |
1582                   (vpos1 << 8 | hpos1));
1583     ADVANCE_BCS_BATCH(batch);
1584 }
1585
1586 static void
1587 gen75_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1588                               struct decode_state *decode_state,
1589                               struct gen7_mfd_context *gen7_mfd_context)
1590 {
1591     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1592     VAPictureParameterBufferMPEG2 *pic_param;
1593     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1594     dri_bo *slice_data_bo;
1595     int i, j;
1596
1597     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1598     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1599
1600     gen75_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1601     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1602     intel_batchbuffer_emit_mi_flush(batch);
1603     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1604     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1605     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1606     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1607     gen75_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1608     gen75_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1609
1610     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1611         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1612             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1613
1614     for (j = 0; j < decode_state->num_slice_params; j++) {
1615         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1616         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1617         slice_data_bo = decode_state->slice_datas[j]->bo;
1618         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1619
1620         if (j == decode_state->num_slice_params - 1)
1621             next_slice_group_param = NULL;
1622         else
1623             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1624
1625         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1626             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1627
1628             if (i < decode_state->slice_params[j]->num_elements - 1)
1629                 next_slice_param = slice_param + 1;
1630             else
1631                 next_slice_param = next_slice_group_param;
1632
1633             gen75_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1634             slice_param++;
1635         }
1636     }
1637
1638     intel_batchbuffer_end_atomic(batch);
1639     intel_batchbuffer_flush(batch);
1640 }
1641
1642 static const int va_to_gen7_vc1_pic_type[5] = {
1643     GEN7_VC1_I_PICTURE,
1644     GEN7_VC1_P_PICTURE,
1645     GEN7_VC1_B_PICTURE,
1646     GEN7_VC1_BI_PICTURE,
1647     GEN7_VC1_P_PICTURE,
1648 };
1649
1650 static const int va_to_gen7_vc1_mv[4] = {
1651     1, /* 1-MV */
1652     2, /* 1-MV half-pel */
1653     3, /* 1-MV half-pef bilinear */
1654     0, /* Mixed MV */
1655 };
1656
1657 static const int b_picture_scale_factor[21] = {
1658     128, 85,  170, 64,  192,
1659     51,  102, 153, 204, 43,
1660     215, 37,  74,  111, 148,
1661     185, 222, 32,  96,  160, 
1662     224,
1663 };
1664
1665 static const int va_to_gen7_vc1_condover[3] = {
1666     0,
1667     2,
1668     3
1669 };
1670
1671 static const int va_to_gen7_vc1_profile[4] = {
1672     GEN7_VC1_SIMPLE_PROFILE,
1673     GEN7_VC1_MAIN_PROFILE,
1674     GEN7_VC1_RESERVED_PROFILE,
1675     GEN7_VC1_ADVANCED_PROFILE
1676 };
1677
1678 static void 
1679 gen75_mfd_free_vc1_surface(void **data)
1680 {
1681     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1682
1683     if (!gen7_vc1_surface)
1684         return;
1685
1686     dri_bo_unreference(gen7_vc1_surface->dmv);
1687     free(gen7_vc1_surface);
1688     *data = NULL;
1689 }
1690
1691 static void
1692 gen75_mfd_init_vc1_surface(VADriverContextP ctx, 
1693                           VAPictureParameterBufferVC1 *pic_param,
1694                           struct object_surface *obj_surface)
1695 {
1696     struct i965_driver_data *i965 = i965_driver_data(ctx);
1697     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1698     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1699     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1700
1701     obj_surface->free_private_data = gen75_mfd_free_vc1_surface;
1702
1703     if (!gen7_vc1_surface) {
1704         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1705         assert((obj_surface->size & 0x3f) == 0);
1706         obj_surface->private_data = gen7_vc1_surface;
1707     }
1708
1709     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1710
1711     if (gen7_vc1_surface->dmv == NULL) {
1712         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1713                                              "direct mv w/r buffer",
1714                                              width_in_mbs * height_in_mbs * 64,
1715                                              0x1000);
1716     }
1717 }
1718
1719 static void
1720 gen75_mfd_vc1_decode_init(VADriverContextP ctx,
1721                          struct decode_state *decode_state,
1722                          struct gen7_mfd_context *gen7_mfd_context)
1723 {
1724     VAPictureParameterBufferVC1 *pic_param;
1725     struct i965_driver_data *i965 = i965_driver_data(ctx);
1726     struct object_surface *obj_surface;
1727     int i;
1728     dri_bo *bo;
1729     int width_in_mbs;
1730
1731     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1732     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1733     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1734
1735     /* reference picture */
1736     obj_surface = SURFACE(pic_param->forward_reference_picture);
1737
1738     if (obj_surface && obj_surface->bo)
1739         gen7_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1740     else
1741         gen7_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1742
1743     obj_surface = SURFACE(pic_param->backward_reference_picture);
1744
1745     if (obj_surface && obj_surface->bo)
1746         gen7_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1747     else
1748         gen7_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1749
1750     /* must do so !!! */
1751     for (i = 2; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++)
1752         gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[i % 2].surface_id;
1753
1754     /* Current decoded picture */
1755     obj_surface = SURFACE(decode_state->current_render_target);
1756     assert(obj_surface);
1757     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1758     gen75_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1759
1760     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1761     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1762     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1763     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1764
1765     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1766     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1767     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1768     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1769
1770     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1771     bo = dri_bo_alloc(i965->intel.bufmgr,
1772                       "intra row store",
1773                       width_in_mbs * 64,
1774                       0x1000);
1775     assert(bo);
1776     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1777     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1778
1779     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1780     bo = dri_bo_alloc(i965->intel.bufmgr,
1781                       "deblocking filter row store",
1782                       width_in_mbs * 6 * 64,
1783                       0x1000);
1784     assert(bo);
1785     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1786     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1787
1788     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1789     bo = dri_bo_alloc(i965->intel.bufmgr,
1790                       "bsd mpc row store",
1791                       width_in_mbs * 96,
1792                       0x1000);
1793     assert(bo);
1794     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1795     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1796
1797     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1798
1799     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1800     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1801     
1802     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1803         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1804         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1805         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1806         int src_w, src_h;
1807         uint8_t *src = NULL, *dst = NULL;
1808
1809         assert(decode_state->bit_plane->buffer);
1810         src = decode_state->bit_plane->buffer;
1811
1812         bo = dri_bo_alloc(i965->intel.bufmgr,
1813                           "VC-1 Bitplane",
1814                           bitplane_width * bitplane_width,
1815                           0x1000);
1816         assert(bo);
1817         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1818
1819         dri_bo_map(bo, True);
1820         assert(bo->virtual);
1821         dst = bo->virtual;
1822
1823         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1824             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1825                 int src_index, dst_index;
1826                 int src_shift;
1827                 uint8_t src_value;
1828
1829                 src_index = (src_h * width_in_mbs + src_w) / 2;
1830                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1831                 src_value = ((src[src_index] >> src_shift) & 0xf);
1832
1833                 dst_index = src_w / 2;
1834                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1835             }
1836
1837             if (src_w & 1)
1838                 dst[src_w / 2] >>= 4;
1839
1840             dst += bitplane_width;
1841         }
1842
1843         dri_bo_unmap(bo);
1844     } else
1845         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1846 }
1847
1848 static void
1849 gen75_mfd_vc1_pic_state(VADriverContextP ctx,
1850                        struct decode_state *decode_state,
1851                        struct gen7_mfd_context *gen7_mfd_context)
1852 {
1853     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1854     VAPictureParameterBufferVC1 *pic_param;
1855     struct i965_driver_data *i965 = i965_driver_data(ctx);
1856     struct object_surface *obj_surface;
1857     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1858     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1859     int unified_mv_mode;
1860     int ref_field_pic_polarity = 0;
1861     int scale_factor = 0;
1862     int trans_ac_y = 0;
1863     int dmv_surface_valid = 0;
1864     int brfd = 0;
1865     int fcm = 0;
1866     int picture_type;
1867     int profile;
1868     int overlap;
1869     int interpolation_mode = 0;
1870
1871     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1872     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1873
1874     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1875     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1876     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1877     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1878     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1879     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1880     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1881     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1882
1883     if (dquant == 0) {
1884         alt_pquant_config = 0;
1885         alt_pquant_edge_mask = 0;
1886     } else if (dquant == 2) {
1887         alt_pquant_config = 1;
1888         alt_pquant_edge_mask = 0xf;
1889     } else {
1890         assert(dquant == 1);
1891         if (dquantfrm == 0) {
1892             alt_pquant_config = 0;
1893             alt_pquant_edge_mask = 0;
1894             alt_pq = 0;
1895         } else {
1896             assert(dquantfrm == 1);
1897             alt_pquant_config = 1;
1898
1899             switch (dqprofile) {
1900             case 3:
1901                 if (dqbilevel == 0) {
1902                     alt_pquant_config = 2;
1903                     alt_pquant_edge_mask = 0;
1904                 } else {
1905                     assert(dqbilevel == 1);
1906                     alt_pquant_config = 3;
1907                     alt_pquant_edge_mask = 0;
1908                 }
1909                 break;
1910                 
1911             case 0:
1912                 alt_pquant_edge_mask = 0xf;
1913                 break;
1914
1915             case 1:
1916                 if (dqdbedge == 3)
1917                     alt_pquant_edge_mask = 0x9;
1918                 else
1919                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1920
1921                 break;
1922
1923             case 2:
1924                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1925                 break;
1926
1927             default:
1928                 assert(0);
1929             }
1930         }
1931     }
1932
1933     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1934         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1935         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1936     } else {
1937         assert(pic_param->mv_fields.bits.mv_mode < 4);
1938         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1939     }
1940
1941     if (pic_param->sequence_fields.bits.interlace == 1 &&
1942         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1943         /* FIXME: calculate reference field picture polarity */
1944         assert(0);
1945         ref_field_pic_polarity = 0;
1946     }
1947
1948     if (pic_param->b_picture_fraction < 21)
1949         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1950
1951     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1952     
1953     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1954         picture_type == GEN7_VC1_I_PICTURE)
1955         picture_type = GEN7_VC1_BI_PICTURE;
1956
1957     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1958         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1959     else
1960         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1961
1962
1963     if (picture_type == GEN7_VC1_B_PICTURE) {
1964         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1965
1966         obj_surface = SURFACE(pic_param->backward_reference_picture);
1967         assert(obj_surface);
1968         gen7_vc1_surface = obj_surface->private_data;
1969
1970         if (!gen7_vc1_surface || 
1971             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1972              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1973             dmv_surface_valid = 0;
1974         else
1975             dmv_surface_valid = 1;
1976     }
1977
1978     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1979
1980     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1981         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1982     else {
1983         if (pic_param->picture_fields.bits.top_field_first)
1984             fcm = 2;
1985         else
1986             fcm = 3;
1987     }
1988
1989     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1990         brfd = pic_param->reference_fields.bits.reference_distance;
1991         brfd = (scale_factor * brfd) >> 8;
1992         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1993
1994         if (brfd < 0)
1995             brfd = 0;
1996     }
1997
1998     overlap = pic_param->sequence_fields.bits.overlap;
1999     if (profile != GEN7_VC1_ADVANCED_PROFILE && pic_param->pic_quantizer_fields.bits.pic_quantizer_scale < 9)
2000         overlap = 0;
2001
2002     assert(pic_param->conditional_overlap_flag < 3);
2003     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
2004
2005     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
2006         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
2007          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
2008         interpolation_mode = 9; /* Half-pel bilinear */
2009     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
2010              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
2011               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
2012         interpolation_mode = 1; /* Half-pel bicubic */
2013     else
2014         interpolation_mode = 0; /* Quarter-pel bicubic */
2015
2016     BEGIN_BCS_BATCH(batch, 6);
2017     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
2018     OUT_BCS_BATCH(batch,
2019                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
2020                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
2021     OUT_BCS_BATCH(batch,
2022                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
2023                   dmv_surface_valid << 15 |
2024                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
2025                   pic_param->rounding_control << 13 |
2026                   pic_param->sequence_fields.bits.syncmarker << 12 |
2027                   interpolation_mode << 8 |
2028                   0 << 7 | /* FIXME: scale up or down ??? */
2029                   pic_param->range_reduction_frame << 6 |
2030                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
2031                   overlap << 4 |
2032                   !pic_param->picture_fields.bits.is_first_field << 3 |
2033                   (pic_param->sequence_fields.bits.profile == 3) << 0);
2034     OUT_BCS_BATCH(batch,
2035                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
2036                   picture_type << 26 |
2037                   fcm << 24 |
2038                   alt_pq << 16 |
2039                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
2040                   scale_factor << 0);
2041     OUT_BCS_BATCH(batch,
2042                   unified_mv_mode << 28 |
2043                   pic_param->mv_fields.bits.four_mv_switch << 27 |
2044                   pic_param->fast_uvmc_flag << 26 |
2045                   ref_field_pic_polarity << 25 |
2046                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
2047                   pic_param->reference_fields.bits.reference_distance << 20 |
2048                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
2049                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
2050                   pic_param->mv_fields.bits.extended_mv_range << 8 |
2051                   alt_pquant_edge_mask << 4 |
2052                   alt_pquant_config << 2 |
2053                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
2054                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
2055     OUT_BCS_BATCH(batch,
2056                   !!pic_param->bitplane_present.value << 31 |
2057                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
2058                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
2059                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
2060                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
2061                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
2062                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
2063                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
2064                   pic_param->mv_fields.bits.mv_table << 20 |
2065                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
2066                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
2067                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
2068                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
2069                   pic_param->mb_mode_table << 8 |
2070                   trans_ac_y << 6 |
2071                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
2072                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
2073                   pic_param->cbp_table << 0);
2074     ADVANCE_BCS_BATCH(batch);
2075 }
2076
2077 static void
2078 gen75_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
2079                              struct decode_state *decode_state,
2080                              struct gen7_mfd_context *gen7_mfd_context)
2081 {
2082     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2083     VAPictureParameterBufferVC1 *pic_param;
2084     int intensitycomp_single;
2085
2086     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2087     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2088
2089     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2090     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2091     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
2092
2093     BEGIN_BCS_BATCH(batch, 6);
2094     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
2095     OUT_BCS_BATCH(batch,
2096                   0 << 14 | /* FIXME: double ??? */
2097                   0 << 12 |
2098                   intensitycomp_single << 10 |
2099                   intensitycomp_single << 8 |
2100                   0 << 4 | /* FIXME: interlace mode */
2101                   0);
2102     OUT_BCS_BATCH(batch,
2103                   pic_param->luma_shift << 16 |
2104                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
2105     OUT_BCS_BATCH(batch, 0);
2106     OUT_BCS_BATCH(batch, 0);
2107     OUT_BCS_BATCH(batch, 0);
2108     ADVANCE_BCS_BATCH(batch);
2109 }
2110
2111 static void
2112 gen75_mfd_vc1_directmode_state_bplus(VADriverContextP ctx,
2113                               struct decode_state *decode_state,
2114                               struct gen7_mfd_context *gen7_mfd_context)
2115 {
2116     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2117     VAPictureParameterBufferVC1 *pic_param;
2118     struct i965_driver_data *i965 = i965_driver_data(ctx);
2119     struct object_surface *obj_surface;
2120     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
2121
2122     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2123     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2124
2125     obj_surface = SURFACE(decode_state->current_render_target);
2126
2127     if (obj_surface && obj_surface->private_data) {
2128         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2129     }
2130
2131     obj_surface = SURFACE(pic_param->backward_reference_picture);
2132
2133     if (obj_surface && obj_surface->private_data) {
2134         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2135     }
2136
2137     BEGIN_BCS_BATCH(batch, 7);
2138     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
2139
2140     if (dmv_write_buffer)
2141         OUT_BCS_RELOC(batch, dmv_write_buffer,
2142                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2143                       0);
2144     else
2145         OUT_BCS_BATCH(batch, 0);
2146
2147         OUT_BCS_BATCH(batch, 0);
2148         OUT_BCS_BATCH(batch, 0);
2149
2150     if (dmv_read_buffer)
2151         OUT_BCS_RELOC(batch, dmv_read_buffer,
2152                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2153                       0);
2154     else
2155         OUT_BCS_BATCH(batch, 0);
2156         OUT_BCS_BATCH(batch, 0);
2157         OUT_BCS_BATCH(batch, 0);
2158                   
2159     ADVANCE_BCS_BATCH(batch);
2160 }
2161
2162 static void
2163 gen75_mfd_vc1_directmode_state(VADriverContextP ctx,
2164                               struct decode_state *decode_state,
2165                               struct gen7_mfd_context *gen7_mfd_context)
2166 {
2167     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2168     VAPictureParameterBufferVC1 *pic_param;
2169     struct i965_driver_data *i965 = i965_driver_data(ctx);
2170     struct object_surface *obj_surface;
2171     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
2172
2173     if (IS_STEPPING_BPLUS(i965)) {
2174         gen75_mfd_vc1_directmode_state_bplus(ctx, decode_state, gen7_mfd_context);
2175         return;
2176     }
2177     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2178     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2179
2180     obj_surface = SURFACE(decode_state->current_render_target);
2181
2182     if (obj_surface && obj_surface->private_data) {
2183         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2184     }
2185
2186     obj_surface = SURFACE(pic_param->backward_reference_picture);
2187
2188     if (obj_surface && obj_surface->private_data) {
2189         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2190     }
2191
2192     BEGIN_BCS_BATCH(batch, 3);
2193     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
2194
2195     if (dmv_write_buffer)
2196         OUT_BCS_RELOC(batch, dmv_write_buffer,
2197                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2198                       0);
2199     else
2200         OUT_BCS_BATCH(batch, 0);
2201
2202     if (dmv_read_buffer)
2203         OUT_BCS_RELOC(batch, dmv_read_buffer,
2204                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2205                       0);
2206     else
2207         OUT_BCS_BATCH(batch, 0);
2208                   
2209     ADVANCE_BCS_BATCH(batch);
2210 }
2211
2212 static int
2213 gen75_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
2214 {
2215     int out_slice_data_bit_offset;
2216     int slice_header_size = in_slice_data_bit_offset / 8;
2217     int i, j;
2218
2219     if (profile != 3)
2220         out_slice_data_bit_offset = in_slice_data_bit_offset;
2221     else {
2222         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
2223             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
2224                 i++, j += 2;
2225             }
2226         }
2227
2228         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
2229     }
2230
2231     return out_slice_data_bit_offset;
2232 }
2233
2234 static void
2235 gen75_mfd_vc1_bsd_object(VADriverContextP ctx,
2236                         VAPictureParameterBufferVC1 *pic_param,
2237                         VASliceParameterBufferVC1 *slice_param,
2238                         VASliceParameterBufferVC1 *next_slice_param,
2239                         dri_bo *slice_data_bo,
2240                         struct gen7_mfd_context *gen7_mfd_context)
2241 {
2242     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2243     int next_slice_start_vert_pos;
2244     int macroblock_offset;
2245     uint8_t *slice_data = NULL;
2246
2247     dri_bo_map(slice_data_bo, 0);
2248     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
2249     macroblock_offset = gen75_mfd_vc1_get_macroblock_bit_offset(slice_data, 
2250                                                                slice_param->macroblock_offset,
2251                                                                pic_param->sequence_fields.bits.profile);
2252     dri_bo_unmap(slice_data_bo);
2253
2254     if (next_slice_param)
2255         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
2256     else
2257         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
2258
2259     BEGIN_BCS_BATCH(batch, 5);
2260     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
2261     OUT_BCS_BATCH(batch, 
2262                   slice_param->slice_data_size - (macroblock_offset >> 3));
2263     OUT_BCS_BATCH(batch, 
2264                   slice_param->slice_data_offset + (macroblock_offset >> 3));
2265     OUT_BCS_BATCH(batch,
2266                   slice_param->slice_vertical_position << 16 |
2267                   next_slice_start_vert_pos << 0);
2268     OUT_BCS_BATCH(batch,
2269                   (macroblock_offset & 0x7));
2270     ADVANCE_BCS_BATCH(batch);
2271 }
2272
2273 static void
2274 gen75_mfd_vc1_decode_picture(VADriverContextP ctx,
2275                             struct decode_state *decode_state,
2276                             struct gen7_mfd_context *gen7_mfd_context)
2277 {
2278     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2279     VAPictureParameterBufferVC1 *pic_param;
2280     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
2281     dri_bo *slice_data_bo;
2282     int i, j;
2283
2284     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2285     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2286
2287     gen75_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
2288     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2289     intel_batchbuffer_emit_mi_flush(batch);
2290     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2291     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2292     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2293     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2294     gen75_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
2295     gen75_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
2296     gen75_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
2297
2298     for (j = 0; j < decode_state->num_slice_params; j++) {
2299         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2300         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
2301         slice_data_bo = decode_state->slice_datas[j]->bo;
2302         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
2303
2304         if (j == decode_state->num_slice_params - 1)
2305             next_slice_group_param = NULL;
2306         else
2307             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
2308
2309         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2310             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2311
2312             if (i < decode_state->slice_params[j]->num_elements - 1)
2313                 next_slice_param = slice_param + 1;
2314             else
2315                 next_slice_param = next_slice_group_param;
2316
2317             gen75_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2318             slice_param++;
2319         }
2320     }
2321
2322     intel_batchbuffer_end_atomic(batch);
2323     intel_batchbuffer_flush(batch);
2324 }
2325
2326 static void
2327 gen75_mfd_jpeg_decode_init(VADriverContextP ctx,
2328                           struct decode_state *decode_state,
2329                           struct gen7_mfd_context *gen7_mfd_context)
2330 {
2331     struct i965_driver_data *i965 = i965_driver_data(ctx);
2332     struct object_surface *obj_surface;
2333     VAPictureParameterBufferJPEGBaseline *pic_param;
2334     int subsampling = SUBSAMPLE_YUV420;
2335
2336     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2337
2338     if (pic_param->num_components == 1)
2339         subsampling = SUBSAMPLE_YUV400;
2340     else if (pic_param->num_components == 3) {
2341         int h1 = pic_param->components[0].h_sampling_factor;
2342         int h2 = pic_param->components[1].h_sampling_factor;
2343         int h3 = pic_param->components[2].h_sampling_factor;
2344         int v1 = pic_param->components[0].v_sampling_factor;
2345         int v2 = pic_param->components[1].v_sampling_factor;
2346         int v3 = pic_param->components[2].v_sampling_factor;
2347
2348         if (h1 == 2 && h2 == 1 && h3 == 1 &&
2349             v1 == 2 && v2 == 1 && v3 == 1)
2350             subsampling = SUBSAMPLE_YUV420;
2351         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2352                  v1 == 1 && v2 == 1 && v3 == 1)
2353             subsampling = SUBSAMPLE_YUV422H;
2354         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2355                  v1 == 1 && v2 == 1 && v3 == 1)
2356             subsampling = SUBSAMPLE_YUV444;
2357         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2358                  v1 == 1 && v2 == 1 && v3 == 1)
2359             subsampling = SUBSAMPLE_YUV411;
2360         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2361                  v1 == 2 && v2 == 1 && v3 == 1)
2362             subsampling = SUBSAMPLE_YUV422V;
2363         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2364                  v1 == 2 && v2 == 2 && v3 == 2)
2365             subsampling = SUBSAMPLE_YUV422H;
2366         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2367                  v1 == 2 && v2 == 1 && v3 == 1)
2368             subsampling = SUBSAMPLE_YUV422V;
2369         else
2370             assert(0);
2371     } else {
2372         assert(0);
2373     }
2374
2375     /* Current decoded picture */
2376     obj_surface = SURFACE(decode_state->current_render_target);
2377     assert(obj_surface);
2378     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling);
2379
2380     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2381     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2382     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2383     gen7_mfd_context->pre_deblocking_output.valid = 1;
2384
2385     gen7_mfd_context->post_deblocking_output.bo = NULL;
2386     gen7_mfd_context->post_deblocking_output.valid = 0;
2387
2388     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2389     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
2390
2391     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2392     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
2393
2394     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2395     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
2396
2397     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2398     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
2399
2400     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2401     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2402 }
2403
2404 static const int va_to_gen7_jpeg_rotation[4] = {
2405     GEN7_JPEG_ROTATION_0,
2406     GEN7_JPEG_ROTATION_90,
2407     GEN7_JPEG_ROTATION_180,
2408     GEN7_JPEG_ROTATION_270
2409 };
2410
2411 static void
2412 gen75_mfd_jpeg_pic_state(VADriverContextP ctx,
2413                         struct decode_state *decode_state,
2414                         struct gen7_mfd_context *gen7_mfd_context)
2415 {
2416     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2417     VAPictureParameterBufferJPEGBaseline *pic_param;
2418     int chroma_type = GEN7_YUV420;
2419     int frame_width_in_blks;
2420     int frame_height_in_blks;
2421
2422     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2423     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2424
2425     if (pic_param->num_components == 1)
2426         chroma_type = GEN7_YUV400;
2427     else if (pic_param->num_components == 3) {
2428         int h1 = pic_param->components[0].h_sampling_factor;
2429         int h2 = pic_param->components[1].h_sampling_factor;
2430         int h3 = pic_param->components[2].h_sampling_factor;
2431         int v1 = pic_param->components[0].v_sampling_factor;
2432         int v2 = pic_param->components[1].v_sampling_factor;
2433         int v3 = pic_param->components[2].v_sampling_factor;
2434
2435         if (h1 == 2 && h2 == 1 && h3 == 1 &&
2436             v1 == 2 && v2 == 1 && v3 == 1)
2437             chroma_type = GEN7_YUV420;
2438         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2439                  v1 == 1 && v2 == 1 && v3 == 1)
2440             chroma_type = GEN7_YUV422H_2Y;
2441         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2442                  v1 == 1 && v2 == 1 && v3 == 1)
2443             chroma_type = GEN7_YUV444;
2444         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2445                  v1 == 1 && v2 == 1 && v3 == 1)
2446             chroma_type = GEN7_YUV411;
2447         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2448                  v1 == 2 && v2 == 1 && v3 == 1)
2449             chroma_type = GEN7_YUV422V_2Y;
2450         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2451                  v1 == 2 && v2 == 2 && v3 == 2)
2452             chroma_type = GEN7_YUV422H_4Y;
2453         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2454                  v1 == 2 && v2 == 1 && v3 == 1)
2455             chroma_type = GEN7_YUV422V_4Y;
2456         else
2457             assert(0);
2458     }
2459
2460     if (chroma_type == GEN7_YUV400 ||
2461         chroma_type == GEN7_YUV444 ||
2462         chroma_type == GEN7_YUV422V_2Y) {
2463         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2464         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2465     } else if (chroma_type == GEN7_YUV411) {
2466         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2467         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2468     } else {
2469         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2470         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2471     }
2472
2473     BEGIN_BCS_BATCH(batch, 3);
2474     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2475     OUT_BCS_BATCH(batch,
2476                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2477                   (chroma_type << 0));
2478     OUT_BCS_BATCH(batch,
2479                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2480                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2481     ADVANCE_BCS_BATCH(batch);
2482 }
2483
2484 static const int va_to_gen7_jpeg_hufftable[2] = {
2485     MFX_HUFFTABLE_ID_Y,
2486     MFX_HUFFTABLE_ID_UV
2487 };
2488
2489 static void
2490 gen75_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2491                                struct decode_state *decode_state,
2492                                struct gen7_mfd_context *gen7_mfd_context,
2493                                int num_tables)
2494 {
2495     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2496     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2497     int index;
2498
2499     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2500         return;
2501
2502     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2503
2504     for (index = 0; index < num_tables; index++) {
2505         int id = va_to_gen7_jpeg_hufftable[index];
2506         if (!huffman_table->load_huffman_table[index])
2507             continue;
2508         BEGIN_BCS_BATCH(batch, 53);
2509         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2510         OUT_BCS_BATCH(batch, id);
2511         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2512         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2513         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2514         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2515         ADVANCE_BCS_BATCH(batch);
2516     }
2517 }
2518
2519 static const int va_to_gen7_jpeg_qm[5] = {
2520     -1,
2521     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2522     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2523     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2524     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2525 };
2526
2527 static void
2528 gen75_mfd_jpeg_qm_state(VADriverContextP ctx,
2529                        struct decode_state *decode_state,
2530                        struct gen7_mfd_context *gen7_mfd_context)
2531 {
2532     VAPictureParameterBufferJPEGBaseline *pic_param;
2533     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2534     int index;
2535
2536     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2537         return;
2538
2539     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2540     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2541
2542     assert(pic_param->num_components <= 3);
2543
2544     for (index = 0; index < pic_param->num_components; index++) {
2545         int qm_type = va_to_gen7_jpeg_qm[pic_param->components[index].component_id - pic_param->components[0].component_id + 1];
2546         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2547         unsigned char raster_qm[64];
2548         int j;
2549
2550         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2551             continue;
2552
2553         for (j = 0; j < 64; j++)
2554             raster_qm[zigzag_direct[j]] = qm[j];
2555
2556         gen75_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2557     }
2558 }
2559
2560 static void
2561 gen75_mfd_jpeg_bsd_object(VADriverContextP ctx,
2562                          VAPictureParameterBufferJPEGBaseline *pic_param,
2563                          VASliceParameterBufferJPEGBaseline *slice_param,
2564                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2565                          dri_bo *slice_data_bo,
2566                          struct gen7_mfd_context *gen7_mfd_context)
2567 {
2568     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2569     int scan_component_mask = 0;
2570     int i;
2571
2572     assert(slice_param->num_components > 0);
2573     assert(slice_param->num_components < 4);
2574     assert(slice_param->num_components <= pic_param->num_components);
2575
2576     for (i = 0; i < slice_param->num_components; i++) {
2577         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2578         case 1:
2579             scan_component_mask |= (1 << 0);
2580             break;
2581         case 2:
2582             scan_component_mask |= (1 << 1);
2583             break;
2584         case 3:
2585             scan_component_mask |= (1 << 2);
2586             break;
2587         default:
2588             assert(0);
2589             break;
2590         }
2591     }
2592
2593     BEGIN_BCS_BATCH(batch, 6);
2594     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2595     OUT_BCS_BATCH(batch, 
2596                   slice_param->slice_data_size);
2597     OUT_BCS_BATCH(batch, 
2598                   slice_param->slice_data_offset);
2599     OUT_BCS_BATCH(batch,
2600                   slice_param->slice_horizontal_position << 16 |
2601                   slice_param->slice_vertical_position << 0);
2602     OUT_BCS_BATCH(batch,
2603                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2604                   (scan_component_mask << 27) |                 /* scan components */
2605                   (0 << 26) |   /* disable interrupt allowed */
2606                   (slice_param->num_mcus << 0));                /* MCU count */
2607     OUT_BCS_BATCH(batch,
2608                   (slice_param->restart_interval << 0));    /* RestartInterval */
2609     ADVANCE_BCS_BATCH(batch);
2610 }
2611
2612 /* Workaround for JPEG decoding on Ivybridge */
2613
2614 VAStatus 
2615 i965_DestroySurfaces(VADriverContextP ctx,
2616                      VASurfaceID *surface_list,
2617                      int num_surfaces);
2618 VAStatus 
2619 i965_CreateSurfaces(VADriverContextP ctx,
2620                     int width,
2621                     int height,
2622                     int format,
2623                     int num_surfaces,
2624                     VASurfaceID *surfaces);
2625
2626 static struct {
2627     int width;
2628     int height;
2629     unsigned char data[32];
2630     int data_size;
2631     int data_bit_offset;
2632     int qp;
2633 } gen7_jpeg_wa_clip = {
2634     16,
2635     16,
2636     {
2637         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2638         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2639     },
2640     14,
2641     40,
2642     28,
2643 };
2644
2645 static void
2646 gen75_jpeg_wa_init(VADriverContextP ctx,
2647                   struct gen7_mfd_context *gen7_mfd_context)
2648 {
2649     struct i965_driver_data *i965 = i965_driver_data(ctx);
2650     VAStatus status;
2651     struct object_surface *obj_surface;
2652
2653     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2654         i965_DestroySurfaces(ctx,
2655                              &gen7_mfd_context->jpeg_wa_surface_id,
2656                              1);
2657
2658     status = i965_CreateSurfaces(ctx,
2659                                  gen7_jpeg_wa_clip.width,
2660                                  gen7_jpeg_wa_clip.height,
2661                                  VA_RT_FORMAT_YUV420,
2662                                  1,
2663                                  &gen7_mfd_context->jpeg_wa_surface_id);
2664     assert(status == VA_STATUS_SUCCESS);
2665
2666     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2667     assert(obj_surface);
2668     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
2669
2670     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2671         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2672                                                                "JPEG WA data",
2673                                                                0x1000,
2674                                                                0x1000);
2675         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2676                        0,
2677                        gen7_jpeg_wa_clip.data_size,
2678                        gen7_jpeg_wa_clip.data);
2679     }
2680 }
2681
2682 static void
2683 gen75_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2684                               struct gen7_mfd_context *gen7_mfd_context)
2685 {
2686     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2687
2688     BEGIN_BCS_BATCH(batch, 5);
2689     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2690     OUT_BCS_BATCH(batch,
2691                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2692                   (MFD_MODE_VLD << 15) | /* VLD mode */
2693                   (0 << 10) | /* disable Stream-Out */
2694                   (0 << 9)  | /* Post Deblocking Output */
2695                   (1 << 8)  | /* Pre Deblocking Output */
2696                   (0 << 5)  | /* not in stitch mode */
2697                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2698                   (MFX_FORMAT_AVC << 0));
2699     OUT_BCS_BATCH(batch,
2700                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2701                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2702                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2703                   (0 << 1)  |
2704                   (0 << 0));
2705     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2706     OUT_BCS_BATCH(batch, 0); /* reserved */
2707     ADVANCE_BCS_BATCH(batch);
2708 }
2709
2710 static void
2711 gen75_jpeg_wa_surface_state(VADriverContextP ctx,
2712                            struct gen7_mfd_context *gen7_mfd_context)
2713 {
2714     struct i965_driver_data *i965 = i965_driver_data(ctx);
2715     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2716     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2717
2718     BEGIN_BCS_BATCH(batch, 6);
2719     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2720     OUT_BCS_BATCH(batch, 0);
2721     OUT_BCS_BATCH(batch,
2722                   ((obj_surface->orig_width - 1) << 18) |
2723                   ((obj_surface->orig_height - 1) << 4));
2724     OUT_BCS_BATCH(batch,
2725                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2726                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2727                   (0 << 22) | /* surface object control state, ignored */
2728                   ((obj_surface->width - 1) << 3) | /* pitch */
2729                   (0 << 2)  | /* must be 0 */
2730                   (1 << 1)  | /* must be tiled */
2731                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2732     OUT_BCS_BATCH(batch,
2733                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2734                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2735     OUT_BCS_BATCH(batch,
2736                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2737                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2738     ADVANCE_BCS_BATCH(batch);
2739 }
2740
2741 static void
2742 gen75_jpeg_wa_pipe_buf_addr_state_bplus(VADriverContextP ctx,
2743                                  struct gen7_mfd_context *gen7_mfd_context)
2744 {
2745     struct i965_driver_data *i965 = i965_driver_data(ctx);
2746     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2747     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2748     dri_bo *intra_bo;
2749     int i;
2750
2751     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2752                             "intra row store",
2753                             128 * 64,
2754                             0x1000);
2755
2756     BEGIN_BCS_BATCH(batch, 61);
2757     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2758     OUT_BCS_RELOC(batch,
2759                   obj_surface->bo,
2760                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2761                   0);
2762         OUT_BCS_BATCH(batch, 0);
2763         OUT_BCS_BATCH(batch, 0);
2764     
2765
2766     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2767         OUT_BCS_BATCH(batch, 0);
2768         OUT_BCS_BATCH(batch, 0);
2769
2770         /* uncompressed-video & stream out 7-12 */
2771     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2772     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2773         OUT_BCS_BATCH(batch, 0);
2774         OUT_BCS_BATCH(batch, 0);
2775         OUT_BCS_BATCH(batch, 0);
2776         OUT_BCS_BATCH(batch, 0);
2777
2778         /* the DW 13-15 is for intra row store scratch */
2779     OUT_BCS_RELOC(batch,
2780                   intra_bo,
2781                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2782                   0);
2783         OUT_BCS_BATCH(batch, 0);
2784         OUT_BCS_BATCH(batch, 0);
2785
2786         /* the DW 16-18 is for deblocking filter */ 
2787     OUT_BCS_BATCH(batch, 0);
2788         OUT_BCS_BATCH(batch, 0);
2789         OUT_BCS_BATCH(batch, 0);
2790
2791     /* DW 19..50 */
2792     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2793         OUT_BCS_BATCH(batch, 0);
2794         OUT_BCS_BATCH(batch, 0);
2795     }
2796     OUT_BCS_BATCH(batch, 0);
2797
2798         /* the DW52-54 is for mb status address */
2799     OUT_BCS_BATCH(batch, 0);
2800         OUT_BCS_BATCH(batch, 0);
2801         OUT_BCS_BATCH(batch, 0);
2802         /* the DW56-60 is for ILDB & second ILDB address */
2803     OUT_BCS_BATCH(batch, 0);
2804         OUT_BCS_BATCH(batch, 0);
2805         OUT_BCS_BATCH(batch, 0);
2806     OUT_BCS_BATCH(batch, 0);
2807         OUT_BCS_BATCH(batch, 0);
2808         OUT_BCS_BATCH(batch, 0);
2809
2810     ADVANCE_BCS_BATCH(batch);
2811
2812     dri_bo_unreference(intra_bo);
2813 }
2814 static void
2815 gen75_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2816                                  struct gen7_mfd_context *gen7_mfd_context)
2817 {
2818     struct i965_driver_data *i965 = i965_driver_data(ctx);
2819     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2820     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2821     dri_bo *intra_bo;
2822     int i;
2823
2824         if (IS_STEPPING_BPLUS(i965)) {
2825                 gen75_jpeg_wa_pipe_buf_addr_state_bplus(ctx, gen7_mfd_context);
2826                 return;
2827         }
2828     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2829                             "intra row store",
2830                             128 * 64,
2831                             0x1000);
2832
2833     BEGIN_BCS_BATCH(batch, 25);
2834     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
2835     OUT_BCS_RELOC(batch,
2836                   obj_surface->bo,
2837                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2838                   0);
2839     
2840     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2841
2842     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2843     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2844
2845     OUT_BCS_RELOC(batch,
2846                   intra_bo,
2847                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2848                   0);
2849
2850     OUT_BCS_BATCH(batch, 0);
2851
2852     /* DW 7..22 */
2853     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2854         OUT_BCS_BATCH(batch, 0);
2855     }
2856
2857     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
2858     OUT_BCS_BATCH(batch, 0);
2859     ADVANCE_BCS_BATCH(batch);
2860
2861     dri_bo_unreference(intra_bo);
2862 }
2863
2864 static void
2865 gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
2866                                      struct gen7_mfd_context *gen7_mfd_context)
2867 {
2868     struct i965_driver_data *i965 = i965_driver_data(ctx);
2869     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2870     dri_bo *bsd_mpc_bo, *mpr_bo;
2871
2872     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2873                               "bsd mpc row store",
2874                               11520, /* 1.5 * 120 * 64 */
2875                               0x1000);
2876
2877     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2878                           "mpr row store",
2879                           7680, /* 1. 0 * 120 * 64 */
2880                           0x1000);
2881
2882     BEGIN_BCS_BATCH(batch, 10);
2883     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2884
2885     OUT_BCS_RELOC(batch,
2886                   bsd_mpc_bo,
2887                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2888                   0);
2889
2890     OUT_BCS_BATCH(batch, 0);
2891     OUT_BCS_BATCH(batch, 0);
2892
2893     OUT_BCS_RELOC(batch,
2894                   mpr_bo,
2895                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2896                   0);
2897     OUT_BCS_BATCH(batch, 0);
2898     OUT_BCS_BATCH(batch, 0);
2899
2900     OUT_BCS_BATCH(batch, 0);
2901     OUT_BCS_BATCH(batch, 0);
2902     OUT_BCS_BATCH(batch, 0);
2903
2904     ADVANCE_BCS_BATCH(batch);
2905
2906     dri_bo_unreference(bsd_mpc_bo);
2907     dri_bo_unreference(mpr_bo);
2908 }
2909
2910 static void
2911 gen75_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2912                                      struct gen7_mfd_context *gen7_mfd_context)
2913 {
2914     struct i965_driver_data *i965 = i965_driver_data(ctx);
2915     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2916     dri_bo *bsd_mpc_bo, *mpr_bo;
2917
2918     if (IS_STEPPING_BPLUS(i965)) {
2919         gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(ctx, gen7_mfd_context);
2920         return;
2921     }
2922
2923     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2924                               "bsd mpc row store",
2925                               11520, /* 1.5 * 120 * 64 */
2926                               0x1000);
2927
2928     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2929                           "mpr row store",
2930                           7680, /* 1. 0 * 120 * 64 */
2931                           0x1000);
2932
2933     BEGIN_BCS_BATCH(batch, 4);
2934     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
2935
2936     OUT_BCS_RELOC(batch,
2937                   bsd_mpc_bo,
2938                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2939                   0);
2940
2941     OUT_BCS_RELOC(batch,
2942                   mpr_bo,
2943                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2944                   0);
2945     OUT_BCS_BATCH(batch, 0);
2946
2947     ADVANCE_BCS_BATCH(batch);
2948
2949     dri_bo_unreference(bsd_mpc_bo);
2950     dri_bo_unreference(mpr_bo);
2951 }
2952
2953 static void
2954 gen75_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2955                           struct gen7_mfd_context *gen7_mfd_context)
2956 {
2957
2958 }
2959
2960 static void
2961 gen75_jpeg_wa_avc_img_state(VADriverContextP ctx,
2962                            struct gen7_mfd_context *gen7_mfd_context)
2963 {
2964     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2965     int img_struct = 0;
2966     int mbaff_frame_flag = 0;
2967     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2968
2969     BEGIN_BCS_BATCH(batch, 16);
2970     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2971     OUT_BCS_BATCH(batch, 
2972                   width_in_mbs * height_in_mbs);
2973     OUT_BCS_BATCH(batch, 
2974                   ((height_in_mbs - 1) << 16) | 
2975                   ((width_in_mbs - 1) << 0));
2976     OUT_BCS_BATCH(batch, 
2977                   (0 << 24) |
2978                   (0 << 16) |
2979                   (0 << 14) |
2980                   (0 << 13) |
2981                   (0 << 12) | /* differ from GEN6 */
2982                   (0 << 10) |
2983                   (img_struct << 8));
2984     OUT_BCS_BATCH(batch,
2985                   (1 << 10) | /* 4:2:0 */
2986                   (1 << 7) |  /* CABAC */
2987                   (0 << 6) |
2988                   (0 << 5) |
2989                   (0 << 4) |
2990                   (0 << 3) |
2991                   (1 << 2) |
2992                   (mbaff_frame_flag << 1) |
2993                   (0 << 0));
2994     OUT_BCS_BATCH(batch, 0);
2995     OUT_BCS_BATCH(batch, 0);
2996     OUT_BCS_BATCH(batch, 0);
2997     OUT_BCS_BATCH(batch, 0);
2998     OUT_BCS_BATCH(batch, 0);
2999     OUT_BCS_BATCH(batch, 0);
3000     OUT_BCS_BATCH(batch, 0);
3001     OUT_BCS_BATCH(batch, 0);
3002     OUT_BCS_BATCH(batch, 0);
3003     OUT_BCS_BATCH(batch, 0);
3004     OUT_BCS_BATCH(batch, 0);
3005     ADVANCE_BCS_BATCH(batch);
3006 }
3007
3008 static void
3009 gen75_jpeg_wa_avc_directmode_state_bplus(VADriverContextP ctx,
3010                                   struct gen7_mfd_context *gen7_mfd_context)
3011 {
3012     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3013     int i;
3014
3015     BEGIN_BCS_BATCH(batch, 71);
3016     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
3017
3018     /* reference surfaces 0..15 */
3019     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
3020         OUT_BCS_BATCH(batch, 0); /* top */
3021         OUT_BCS_BATCH(batch, 0); /* bottom */
3022     }
3023         
3024         OUT_BCS_BATCH(batch, 0);
3025
3026     /* the current decoding frame/field */
3027     OUT_BCS_BATCH(batch, 0); /* top */
3028     OUT_BCS_BATCH(batch, 0);
3029     OUT_BCS_BATCH(batch, 0);
3030
3031     /* POC List */
3032     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
3033         OUT_BCS_BATCH(batch, 0);
3034         OUT_BCS_BATCH(batch, 0);
3035     }
3036
3037     OUT_BCS_BATCH(batch, 0);
3038     OUT_BCS_BATCH(batch, 0);
3039
3040     ADVANCE_BCS_BATCH(batch);
3041 }
3042
3043 static void
3044 gen75_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
3045                                   struct gen7_mfd_context *gen7_mfd_context)
3046 {
3047     struct i965_driver_data *i965 = i965_driver_data(ctx);
3048     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3049     int i;
3050
3051     if (IS_STEPPING_BPLUS(i965)) {
3052         gen75_jpeg_wa_avc_directmode_state_bplus(ctx, gen7_mfd_context);
3053         return;
3054     }
3055
3056     BEGIN_BCS_BATCH(batch, 69);
3057     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
3058
3059     /* reference surfaces 0..15 */
3060     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
3061         OUT_BCS_BATCH(batch, 0); /* top */
3062         OUT_BCS_BATCH(batch, 0); /* bottom */
3063     }
3064
3065     /* the current decoding frame/field */
3066     OUT_BCS_BATCH(batch, 0); /* top */
3067     OUT_BCS_BATCH(batch, 0); /* bottom */
3068
3069     /* POC List */
3070     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
3071         OUT_BCS_BATCH(batch, 0);
3072         OUT_BCS_BATCH(batch, 0);
3073     }
3074
3075     OUT_BCS_BATCH(batch, 0);
3076     OUT_BCS_BATCH(batch, 0);
3077
3078     ADVANCE_BCS_BATCH(batch);
3079 }
3080
3081 static void
3082 gen75_jpeg_wa_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
3083                                      struct gen7_mfd_context *gen7_mfd_context)
3084 {
3085     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3086
3087     BEGIN_BCS_BATCH(batch, 11);
3088     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
3089     OUT_BCS_RELOC(batch,
3090                   gen7_mfd_context->jpeg_wa_slice_data_bo,
3091                   I915_GEM_DOMAIN_INSTRUCTION, 0,
3092                   0);
3093     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
3094     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3095     OUT_BCS_BATCH(batch, 0);
3096     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3097     OUT_BCS_BATCH(batch, 0);
3098     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3099     OUT_BCS_BATCH(batch, 0);
3100     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3101     OUT_BCS_BATCH(batch, 0);
3102     ADVANCE_BCS_BATCH(batch);
3103 }
3104
3105 static void
3106 gen75_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
3107                                      struct gen7_mfd_context *gen7_mfd_context)
3108 {
3109     struct i965_driver_data *i965 = i965_driver_data(ctx);
3110     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3111
3112     if (IS_STEPPING_BPLUS(i965)) {
3113         gen75_jpeg_wa_ind_obj_base_addr_state_bplus(ctx, gen7_mfd_context);
3114         return; 
3115     }
3116     BEGIN_BCS_BATCH(batch, 11);
3117     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
3118     OUT_BCS_RELOC(batch,
3119                   gen7_mfd_context->jpeg_wa_slice_data_bo,
3120                   I915_GEM_DOMAIN_INSTRUCTION, 0,
3121                   0);
3122     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
3123     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3124     OUT_BCS_BATCH(batch, 0);
3125     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3126     OUT_BCS_BATCH(batch, 0);
3127     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3128     OUT_BCS_BATCH(batch, 0);
3129     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3130     OUT_BCS_BATCH(batch, 0);
3131     ADVANCE_BCS_BATCH(batch);
3132 }
3133
3134 static void
3135 gen75_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
3136                             struct gen7_mfd_context *gen7_mfd_context)
3137 {
3138     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3139
3140     /* the input bitsteam format on GEN7 differs from GEN6 */
3141     BEGIN_BCS_BATCH(batch, 6);
3142     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
3143     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
3144     OUT_BCS_BATCH(batch, 0);
3145     OUT_BCS_BATCH(batch,
3146                   (0 << 31) |
3147                   (0 << 14) |
3148                   (0 << 12) |
3149                   (0 << 10) |
3150                   (0 << 8));
3151     OUT_BCS_BATCH(batch,
3152                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
3153                   (0 << 5)  |
3154                   (0 << 4)  |
3155                   (1 << 3) | /* LastSlice Flag */
3156                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
3157     OUT_BCS_BATCH(batch, 0);
3158     ADVANCE_BCS_BATCH(batch);
3159 }
3160
3161 static void
3162 gen75_jpeg_wa_avc_slice_state(VADriverContextP ctx,
3163                              struct gen7_mfd_context *gen7_mfd_context)
3164 {
3165     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3166     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
3167     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
3168     int first_mb_in_slice = 0;
3169     int slice_type = SLICE_TYPE_I;
3170
3171     BEGIN_BCS_BATCH(batch, 11);
3172     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
3173     OUT_BCS_BATCH(batch, slice_type);
3174     OUT_BCS_BATCH(batch, 
3175                   (num_ref_idx_l1 << 24) |
3176                   (num_ref_idx_l0 << 16) |
3177                   (0 << 8) |
3178                   (0 << 0));
3179     OUT_BCS_BATCH(batch, 
3180                   (0 << 29) |
3181                   (1 << 27) |   /* disable Deblocking */
3182                   (0 << 24) |
3183                   (gen7_jpeg_wa_clip.qp << 16) |
3184                   (0 << 8) |
3185                   (0 << 0));
3186     OUT_BCS_BATCH(batch, 
3187                   (slice_ver_pos << 24) |
3188                   (slice_hor_pos << 16) | 
3189                   (first_mb_in_slice << 0));
3190     OUT_BCS_BATCH(batch,
3191                   (next_slice_ver_pos << 16) |
3192                   (next_slice_hor_pos << 0));
3193     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
3194     OUT_BCS_BATCH(batch, 0);
3195     OUT_BCS_BATCH(batch, 0);
3196     OUT_BCS_BATCH(batch, 0);
3197     OUT_BCS_BATCH(batch, 0);
3198     ADVANCE_BCS_BATCH(batch);
3199 }
3200
3201 static void
3202 gen75_mfd_jpeg_wa(VADriverContextP ctx,
3203                  struct gen7_mfd_context *gen7_mfd_context)
3204 {
3205     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3206     gen75_jpeg_wa_init(ctx, gen7_mfd_context);
3207     intel_batchbuffer_emit_mi_flush(batch);
3208     gen75_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
3209     gen75_jpeg_wa_surface_state(ctx, gen7_mfd_context);
3210     gen75_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
3211     gen75_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
3212     gen75_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
3213     gen75_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
3214     gen75_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
3215
3216     gen75_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
3217     gen75_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
3218     gen75_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
3219 }
3220
3221 void
3222 gen75_mfd_jpeg_decode_picture(VADriverContextP ctx,
3223                              struct decode_state *decode_state,
3224                              struct gen7_mfd_context *gen7_mfd_context)
3225 {
3226     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3227     VAPictureParameterBufferJPEGBaseline *pic_param;
3228     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
3229     dri_bo *slice_data_bo;
3230     int i, j, max_selector = 0;
3231
3232     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3233     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
3234
3235     /* Currently only support Baseline DCT */
3236     gen75_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
3237     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3238     gen75_mfd_jpeg_wa(ctx, gen7_mfd_context);
3239     intel_batchbuffer_emit_mi_flush(batch);
3240     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3241     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3242     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3243     gen75_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
3244     gen75_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
3245
3246     for (j = 0; j < decode_state->num_slice_params; j++) {
3247         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3248         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3249         slice_data_bo = decode_state->slice_datas[j]->bo;
3250         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3251
3252         if (j == decode_state->num_slice_params - 1)
3253             next_slice_group_param = NULL;
3254         else
3255             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3256
3257         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3258             int component;
3259
3260             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3261
3262             if (i < decode_state->slice_params[j]->num_elements - 1)
3263                 next_slice_param = slice_param + 1;
3264             else
3265                 next_slice_param = next_slice_group_param;
3266
3267             for (component = 0; component < slice_param->num_components; component++) {
3268                 if (max_selector < slice_param->components[component].dc_table_selector)
3269                     max_selector = slice_param->components[component].dc_table_selector;
3270
3271                 if (max_selector < slice_param->components[component].ac_table_selector)
3272                     max_selector = slice_param->components[component].ac_table_selector;
3273             }
3274
3275             slice_param++;
3276         }
3277     }
3278
3279     assert(max_selector < 2);
3280     gen75_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
3281
3282     for (j = 0; j < decode_state->num_slice_params; j++) {
3283         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3284         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3285         slice_data_bo = decode_state->slice_datas[j]->bo;
3286         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3287
3288         if (j == decode_state->num_slice_params - 1)
3289             next_slice_group_param = NULL;
3290         else
3291             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3292
3293         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3294             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3295
3296             if (i < decode_state->slice_params[j]->num_elements - 1)
3297                 next_slice_param = slice_param + 1;
3298             else
3299                 next_slice_param = next_slice_group_param;
3300
3301             gen75_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
3302             slice_param++;
3303         }
3304     }
3305
3306     intel_batchbuffer_end_atomic(batch);
3307     intel_batchbuffer_flush(batch);
3308 }
3309
3310 static void 
3311 gen75_mfd_decode_picture(VADriverContextP ctx, 
3312                         VAProfile profile, 
3313                         union codec_state *codec_state,
3314                         struct hw_context *hw_context)
3315
3316 {
3317     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3318     struct decode_state *decode_state = &codec_state->decode;
3319
3320     assert(gen7_mfd_context);
3321
3322     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3323
3324     switch (profile) {
3325     case VAProfileMPEG2Simple:
3326     case VAProfileMPEG2Main:
3327         gen75_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3328         break;
3329         
3330     case VAProfileH264Baseline:
3331     case VAProfileH264Main:
3332     case VAProfileH264High:
3333         gen75_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3334         break;
3335
3336     case VAProfileVC1Simple:
3337     case VAProfileVC1Main:
3338     case VAProfileVC1Advanced:
3339         gen75_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3340         break;
3341
3342     case VAProfileJPEGBaseline:
3343         gen75_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3344         break;
3345
3346     default:
3347         assert(0);
3348         break;
3349     }
3350 }
3351
3352 static void
3353 gen75_mfd_context_destroy(void *hw_context)
3354 {
3355     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3356
3357     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3358     gen7_mfd_context->post_deblocking_output.bo = NULL;
3359
3360     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3361     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3362
3363     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3364     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3365
3366     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3367     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3368
3369     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3370     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3371
3372     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3373     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3374
3375     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3376     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3377
3378     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3379
3380     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3381     free(gen7_mfd_context);
3382 }
3383
3384 static void gen75_mfd_mpeg2_context_init(VADriverContextP ctx,
3385                                     struct gen7_mfd_context *gen7_mfd_context)
3386 {
3387     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3388     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3389     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3390     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3391 }
3392
3393 struct hw_context *
3394 gen75_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3395 {
3396     struct intel_driver_data *intel = intel_driver_data(ctx);
3397     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3398     int i;
3399
3400     gen7_mfd_context->base.destroy = gen75_mfd_context_destroy;
3401     gen7_mfd_context->base.run = gen75_mfd_decode_picture;
3402     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3403
3404     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3405         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3406         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3407     }
3408
3409     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3410
3411     switch (obj_config->profile) {
3412     case VAProfileMPEG2Simple:
3413     case VAProfileMPEG2Main:
3414         gen75_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3415         break;
3416
3417     case VAProfileH264Baseline:
3418     case VAProfileH264Main:
3419     case VAProfileH264High:
3420         gen75_mfd_avc_context_init(ctx, gen7_mfd_context);
3421         break;
3422     default:
3423         break;
3424     }
3425     return (struct hw_context *)gen7_mfd_context;
3426 }