Always set Fix_Prev_Mb_skipped in AVC_BSD_OBJECT command
[platform/upstream/libva-intel-driver.git] / src / gen75_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui <yakui.zhao@intel.com>
27  *
28  */
29
30 #ifndef HAVE_GEN_AVC_SURFACE
31 #define HAVE_GEN_AVC_SURFACE 1
32 #endif
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38
39 #include "config.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_driver.h"
42
43 #include "i965_defines.h"
44 #include "i965_drv_video.h"
45 #include "i965_decoder_utils.h"
46
47 #include "gen7_mfd.h"
48
49 #define B0_STEP_REV             2
50 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
51
52 static const uint32_t zigzag_direct[64] = {
53     0,   1,  8, 16,  9,  2,  3, 10,
54     17, 24, 32, 25, 18, 11,  4,  5,
55     12, 19, 26, 33, 40, 48, 41, 34,
56     27, 20, 13,  6,  7, 14, 21, 28,
57     35, 42, 49, 56, 57, 50, 43, 36,
58     29, 22, 15, 23, 30, 37, 44, 51,
59     58, 59, 52, 45, 38, 31, 39, 46,
60     53, 60, 61, 54, 47, 55, 62, 63
61 };
62
63 static void
64 gen75_mfd_avc_frame_store_index(VADriverContextP ctx,
65                                VAPictureParameterBufferH264 *pic_param,
66                                struct gen7_mfd_context *gen7_mfd_context)
67 {
68     struct i965_driver_data *i965 = i965_driver_data(ctx);
69     int i, j;
70
71     assert(ARRAY_ELEMS(gen7_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
72
73     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
74         int found = 0;
75
76         if (gen7_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
77             continue;
78
79         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
80             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
81             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
82                 continue;
83
84             if (gen7_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
85                 found = 1;
86                 break;
87             }
88         }
89
90         if (!found) {
91             struct object_surface *obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
92             obj_surface->flags &= ~SURFACE_REFERENCED;
93
94             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
95                 dri_bo_unreference(obj_surface->bo);
96                 obj_surface->bo = NULL;
97                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
98             }
99
100             if (obj_surface->free_private_data)
101                 obj_surface->free_private_data(&obj_surface->private_data);
102
103             gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
104             gen7_mfd_context->reference_surface[i].frame_store_id = -1;
105         }
106     }
107
108     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
109         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
110         int found = 0;
111
112         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
113             continue;
114
115         for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
116             if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
117                 continue;
118             
119             if (gen7_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
120                 found = 1;
121                 break;
122             }
123         }
124
125         if (!found) {
126             int frame_idx;
127             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
128             
129             assert(obj_surface);
130             i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
131
132             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface); frame_idx++) {
133                 for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
134                     if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
135                         continue;
136
137                     if (gen7_mfd_context->reference_surface[j].frame_store_id == frame_idx)
138                         break;
139                 }
140
141                 if (j == ARRAY_ELEMS(gen7_mfd_context->reference_surface))
142                     break;
143             }
144
145             assert(frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface));
146
147             for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
148                 if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
149                     gen7_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
150                     gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
151                     break;
152                 }
153             }
154         }
155     }
156
157     /* sort */
158     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface) - 1; i++) {
159         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
160             gen7_mfd_context->reference_surface[i].frame_store_id == i)
161             continue;
162
163         for (j = i + 1; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
164             if (gen7_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
165                 gen7_mfd_context->reference_surface[j].frame_store_id == i) {
166                 VASurfaceID id = gen7_mfd_context->reference_surface[i].surface_id;
167                 int frame_idx = gen7_mfd_context->reference_surface[i].frame_store_id;
168
169                 gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[j].surface_id;
170                 gen7_mfd_context->reference_surface[i].frame_store_id = gen7_mfd_context->reference_surface[j].frame_store_id;
171                 gen7_mfd_context->reference_surface[j].surface_id = id;
172                 gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
173                 break;
174             }
175         }
176     }
177 }
178
179 static void
180 gen75_mfd_init_avc_surface(VADriverContextP ctx, 
181                           VAPictureParameterBufferH264 *pic_param,
182                           struct object_surface *obj_surface)
183 {
184     struct i965_driver_data *i965 = i965_driver_data(ctx);
185     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
186     int width_in_mbs, height_in_mbs;
187
188     obj_surface->free_private_data = gen_free_avc_surface;
189     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
190     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
191
192     if (!gen7_avc_surface) {
193         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
194         assert((obj_surface->size & 0x3f) == 0);
195         obj_surface->private_data = gen7_avc_surface;
196     }
197
198     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
199                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
200
201     if (gen7_avc_surface->dmv_top == NULL) {
202         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
203                                                  "direct mv w/r buffer",
204                                                  width_in_mbs * height_in_mbs * 128,
205                                                  0x1000);
206         assert(gen7_avc_surface->dmv_top);
207     }
208
209     if (gen7_avc_surface->dmv_bottom_flag &&
210         gen7_avc_surface->dmv_bottom == NULL) {
211         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
212                                                     "direct mv w/r buffer",
213                                                     width_in_mbs * height_in_mbs * 128,                                                    
214                                                     0x1000);
215         assert(gen7_avc_surface->dmv_bottom);
216     }
217 }
218
219 static void
220 gen75_mfd_pipe_mode_select(VADriverContextP ctx,
221                           struct decode_state *decode_state,
222                           int standard_select,
223                           struct gen7_mfd_context *gen7_mfd_context)
224 {
225     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
226
227     assert(standard_select == MFX_FORMAT_MPEG2 ||
228            standard_select == MFX_FORMAT_AVC ||
229            standard_select == MFX_FORMAT_VC1 ||
230            standard_select == MFX_FORMAT_JPEG);
231
232     BEGIN_BCS_BATCH(batch, 5);
233     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
234     OUT_BCS_BATCH(batch,
235                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
236                   (MFD_MODE_VLD << 15) | /* VLD mode */
237                   (0 << 10) | /* disable Stream-Out */
238                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
239                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
240                   (0 << 5)  | /* not in stitch mode */
241                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
242                   (standard_select << 0));
243     OUT_BCS_BATCH(batch,
244                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
245                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
246                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
247                   (0 << 1)  |
248                   (0 << 0));
249     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
250     OUT_BCS_BATCH(batch, 0); /* reserved */
251     ADVANCE_BCS_BATCH(batch);
252 }
253
254 static void
255 gen75_mfd_surface_state(VADriverContextP ctx,
256                        struct decode_state *decode_state,
257                        int standard_select,
258                        struct gen7_mfd_context *gen7_mfd_context)
259 {
260     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
261     struct i965_driver_data *i965 = i965_driver_data(ctx);
262     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
263     unsigned int y_cb_offset;
264     unsigned int y_cr_offset;
265
266     assert(obj_surface);
267
268     y_cb_offset = obj_surface->y_cb_offset;
269     y_cr_offset = obj_surface->y_cr_offset;
270
271     BEGIN_BCS_BATCH(batch, 6);
272     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
273     OUT_BCS_BATCH(batch, 0);
274     OUT_BCS_BATCH(batch,
275                   ((obj_surface->orig_height - 1) << 18) |
276                   ((obj_surface->orig_width - 1) << 4));
277     OUT_BCS_BATCH(batch,
278                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
279                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
280                   (0 << 22) | /* surface object control state, ignored */
281                   ((obj_surface->width - 1) << 3) | /* pitch */
282                   (0 << 2)  | /* must be 0 */
283                   (1 << 1)  | /* must be tiled */
284                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
285     OUT_BCS_BATCH(batch,
286                   (0 << 16) | /* X offset for U(Cb), must be 0 */
287                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
288     OUT_BCS_BATCH(batch,
289                   (0 << 16) | /* X offset for V(Cr), must be 0 */
290                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
291     ADVANCE_BCS_BATCH(batch);
292 }
293
294
295 static void
296 gen75_mfd_pipe_buf_addr_state_bplus(VADriverContextP ctx,
297                              struct decode_state *decode_state,
298                              int standard_select,
299                              struct gen7_mfd_context *gen7_mfd_context)
300 {
301     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
302     struct i965_driver_data *i965 = i965_driver_data(ctx);
303     int i;
304
305     BEGIN_BCS_BATCH(batch, 61);
306     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
307         /* Pre-deblock 1-3 */
308     if (gen7_mfd_context->pre_deblocking_output.valid)
309         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
310                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
311                       0);
312     else
313         OUT_BCS_BATCH(batch, 0);
314
315         OUT_BCS_BATCH(batch, 0);
316         OUT_BCS_BATCH(batch, 0);
317         /* Post-debloing 4-6 */
318     if (gen7_mfd_context->post_deblocking_output.valid)
319         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
320                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
321                       0);
322     else
323         OUT_BCS_BATCH(batch, 0);
324
325         OUT_BCS_BATCH(batch, 0);
326         OUT_BCS_BATCH(batch, 0);
327
328         /* uncompressed-video & stream out 7-12 */
329     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
330     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
331         OUT_BCS_BATCH(batch, 0);
332         OUT_BCS_BATCH(batch, 0);
333         OUT_BCS_BATCH(batch, 0);
334         OUT_BCS_BATCH(batch, 0);
335
336         /* intra row-store scratch 13-15 */
337     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
338         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
339                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
340                       0);
341     else
342         OUT_BCS_BATCH(batch, 0);
343
344         OUT_BCS_BATCH(batch, 0);
345         OUT_BCS_BATCH(batch, 0);
346         /* deblocking-filter-row-store 16-18 */
347     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
348         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
349                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
350                       0);
351     else
352         OUT_BCS_BATCH(batch, 0);
353         OUT_BCS_BATCH(batch, 0);
354         OUT_BCS_BATCH(batch, 0);
355
356     /* DW 19..50 */
357     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
358         struct object_surface *obj_surface;
359
360         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
361             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
362             assert(obj_surface && obj_surface->bo);
363
364             OUT_BCS_RELOC(batch, obj_surface->bo,
365                           I915_GEM_DOMAIN_INSTRUCTION, 0,
366                           0);
367         } else {
368             OUT_BCS_BATCH(batch, 0);
369         }
370             OUT_BCS_BATCH(batch, 0);
371     }
372         /* reference property 51 */
373     OUT_BCS_BATCH(batch, 0);  
374         
375         /* Macroblock status & ILDB 52-57 */
376         OUT_BCS_BATCH(batch, 0);
377         OUT_BCS_BATCH(batch, 0);
378         OUT_BCS_BATCH(batch, 0);
379         OUT_BCS_BATCH(batch, 0);
380         OUT_BCS_BATCH(batch, 0);
381         OUT_BCS_BATCH(batch, 0);
382
383         /* the second Macroblock status 58-60 */        
384         OUT_BCS_BATCH(batch, 0);
385         OUT_BCS_BATCH(batch, 0);
386         OUT_BCS_BATCH(batch, 0);
387     ADVANCE_BCS_BATCH(batch);
388 }
389
390 static void
391 gen75_mfd_pipe_buf_addr_state(VADriverContextP ctx,
392                              struct decode_state *decode_state,
393                              int standard_select,
394                              struct gen7_mfd_context *gen7_mfd_context)
395 {
396     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
397     struct i965_driver_data *i965 = i965_driver_data(ctx);
398     int i;
399     if (IS_STEPPING_BPLUS(i965)) {
400         gen75_mfd_pipe_buf_addr_state_bplus(ctx, decode_state,
401                         standard_select, gen7_mfd_context);
402         return;
403     }
404
405     BEGIN_BCS_BATCH(batch, 25);
406     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
407     if (gen7_mfd_context->pre_deblocking_output.valid)
408         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
409                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
410                       0);
411     else
412         OUT_BCS_BATCH(batch, 0);
413
414     if (gen7_mfd_context->post_deblocking_output.valid)
415         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
416                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
417                       0);
418     else
419         OUT_BCS_BATCH(batch, 0);
420
421     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
422     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
423
424     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
425         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
426                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
427                       0);
428     else
429         OUT_BCS_BATCH(batch, 0);
430
431     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
432         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
433                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
434                       0);
435     else
436         OUT_BCS_BATCH(batch, 0);
437
438     /* DW 7..22 */
439     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
440         struct object_surface *obj_surface;
441
442         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
443             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
444             assert(obj_surface && obj_surface->bo);
445
446             OUT_BCS_RELOC(batch, obj_surface->bo,
447                           I915_GEM_DOMAIN_INSTRUCTION, 0,
448                           0);
449         } else {
450             OUT_BCS_BATCH(batch, 0);
451         }
452     }
453
454     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
455     OUT_BCS_BATCH(batch, 0);   /* ignore DW24 for decoding */
456     ADVANCE_BCS_BATCH(batch);
457 }
458
459 static void
460 gen75_mfd_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
461                                  dri_bo *slice_data_bo,
462                                  int standard_select,
463                                  struct gen7_mfd_context *gen7_mfd_context)
464 {
465     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
466
467     BEGIN_BCS_BATCH(batch, 26);
468     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
469         /* MFX In BS 1-5 */
470     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
471     OUT_BCS_BATCH(batch, 0);
472     OUT_BCS_BATCH(batch, 0);
473         /* Upper bound 4-5 */   
474     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
475     OUT_BCS_BATCH(batch, 0);
476
477         /* MFX indirect MV 6-10 */
478     OUT_BCS_BATCH(batch, 0);
479     OUT_BCS_BATCH(batch, 0);
480     OUT_BCS_BATCH(batch, 0);
481     OUT_BCS_BATCH(batch, 0);
482     OUT_BCS_BATCH(batch, 0);
483         
484         /* MFX IT_COFF 11-15 */
485     OUT_BCS_BATCH(batch, 0);
486     OUT_BCS_BATCH(batch, 0);
487     OUT_BCS_BATCH(batch, 0);
488     OUT_BCS_BATCH(batch, 0);
489     OUT_BCS_BATCH(batch, 0);
490
491         /* MFX IT_DBLK 16-20 */
492     OUT_BCS_BATCH(batch, 0);
493     OUT_BCS_BATCH(batch, 0);
494     OUT_BCS_BATCH(batch, 0);
495     OUT_BCS_BATCH(batch, 0);
496     OUT_BCS_BATCH(batch, 0);
497
498         /* MFX PAK_BSE object for encoder 21-25 */
499     OUT_BCS_BATCH(batch, 0);
500     OUT_BCS_BATCH(batch, 0);
501     OUT_BCS_BATCH(batch, 0);
502     OUT_BCS_BATCH(batch, 0);
503     OUT_BCS_BATCH(batch, 0);
504
505     ADVANCE_BCS_BATCH(batch);
506 }
507  
508 static void
509 gen75_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
510                                  dri_bo *slice_data_bo,
511                                  int standard_select,
512                                  struct gen7_mfd_context *gen7_mfd_context)
513 {
514     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
515     struct i965_driver_data *i965 = i965_driver_data(ctx);
516
517     if (IS_STEPPING_BPLUS(i965)) {
518         gen75_mfd_ind_obj_base_addr_state_bplus(ctx, slice_data_bo,
519                                 standard_select, gen7_mfd_context);
520         return;
521     }
522
523     BEGIN_BCS_BATCH(batch, 11);
524     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
525     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
526     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
527     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
528     OUT_BCS_BATCH(batch, 0);
529     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
530     OUT_BCS_BATCH(batch, 0);
531     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
532     OUT_BCS_BATCH(batch, 0);
533     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
534     OUT_BCS_BATCH(batch, 0);
535     ADVANCE_BCS_BATCH(batch);
536 }
537
538
539 static void
540 gen75_mfd_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
541                                  struct decode_state *decode_state,
542                                  int standard_select,
543                                  struct gen7_mfd_context *gen7_mfd_context)
544 {
545     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
546
547     BEGIN_BCS_BATCH(batch, 10);
548     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
549
550     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
551         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
552                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
553                       0);
554         else
555                 OUT_BCS_BATCH(batch, 0);
556                 
557     OUT_BCS_BATCH(batch, 0);
558     OUT_BCS_BATCH(batch, 0);
559         /* MPR Row Store Scratch buffer 4-6 */
560     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
561         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
562                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
563                       0);
564     else
565             OUT_BCS_BATCH(batch, 0);
566     OUT_BCS_BATCH(batch, 0);
567     OUT_BCS_BATCH(batch, 0);
568
569         /* Bitplane 7-9 */ 
570     if (gen7_mfd_context->bitplane_read_buffer.valid)
571         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
572                       I915_GEM_DOMAIN_INSTRUCTION, 0,
573                       0);
574     else
575         OUT_BCS_BATCH(batch, 0);
576     OUT_BCS_BATCH(batch, 0);
577     OUT_BCS_BATCH(batch, 0);
578
579     ADVANCE_BCS_BATCH(batch);
580 }
581
582 static void
583 gen75_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
584                                  struct decode_state *decode_state,
585                                  int standard_select,
586                                  struct gen7_mfd_context *gen7_mfd_context)
587 {
588     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
589     struct i965_driver_data *i965 = i965_driver_data(ctx);
590
591     if (IS_STEPPING_BPLUS(i965)) {
592         gen75_mfd_bsp_buf_base_addr_state_bplus(ctx, decode_state,
593                                 standard_select, gen7_mfd_context);
594         return;
595      }
596  
597     BEGIN_BCS_BATCH(batch, 4);
598     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
599
600     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
601         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
602                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
603                       0);
604     else
605         OUT_BCS_BATCH(batch, 0);
606
607     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
608         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
609                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
610                       0);
611     else
612         OUT_BCS_BATCH(batch, 0);
613
614     if (gen7_mfd_context->bitplane_read_buffer.valid)
615         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
616                       I915_GEM_DOMAIN_INSTRUCTION, 0,
617                       0);
618     else
619         OUT_BCS_BATCH(batch, 0);
620
621     ADVANCE_BCS_BATCH(batch);
622 }
623
624 static void
625 gen75_mfd_qm_state(VADriverContextP ctx,
626                   int qm_type,
627                   unsigned char *qm,
628                   int qm_length,
629                   struct gen7_mfd_context *gen7_mfd_context)
630 {
631     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
632     unsigned int qm_buffer[16];
633
634     assert(qm_length <= 16 * 4);
635     memcpy(qm_buffer, qm, qm_length);
636
637     BEGIN_BCS_BATCH(batch, 18);
638     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
639     OUT_BCS_BATCH(batch, qm_type << 0);
640     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
641     ADVANCE_BCS_BATCH(batch);
642 }
643
644 static void
645 gen75_mfd_avc_img_state(VADriverContextP ctx,
646                        struct decode_state *decode_state,
647                        struct gen7_mfd_context *gen7_mfd_context)
648 {
649     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
650     int img_struct;
651     int mbaff_frame_flag;
652     unsigned int width_in_mbs, height_in_mbs;
653     VAPictureParameterBufferH264 *pic_param;
654
655     assert(decode_state->pic_param && decode_state->pic_param->buffer);
656     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
657     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
658
659     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
660         img_struct = 1;
661     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
662         img_struct = 3;
663     else
664         img_struct = 0;
665
666     if ((img_struct & 0x1) == 0x1) {
667         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
668     } else {
669         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
670     }
671
672     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
673         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
674         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
675     } else {
676         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
677     }
678
679     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
680                         !pic_param->pic_fields.bits.field_pic_flag);
681
682     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
683     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
684
685     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
686     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
687            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
688     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
689
690     BEGIN_BCS_BATCH(batch, 16);
691     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
692     OUT_BCS_BATCH(batch, 
693                   width_in_mbs * height_in_mbs);
694     OUT_BCS_BATCH(batch, 
695                   ((height_in_mbs - 1) << 16) | 
696                   ((width_in_mbs - 1) << 0));
697     OUT_BCS_BATCH(batch, 
698                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
699                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
700                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
701                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
702                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
703                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
704                   (img_struct << 8));
705     OUT_BCS_BATCH(batch,
706                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
707                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
708                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
709                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
710                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
711                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
712                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
713                   (mbaff_frame_flag << 1) |
714                   (pic_param->pic_fields.bits.field_pic_flag << 0));
715     OUT_BCS_BATCH(batch, 0);
716     OUT_BCS_BATCH(batch, 0);
717     OUT_BCS_BATCH(batch, 0);
718     OUT_BCS_BATCH(batch, 0);
719     OUT_BCS_BATCH(batch, 0);
720     OUT_BCS_BATCH(batch, 0);
721     OUT_BCS_BATCH(batch, 0);
722     OUT_BCS_BATCH(batch, 0);
723     OUT_BCS_BATCH(batch, 0);
724     OUT_BCS_BATCH(batch, 0);
725     OUT_BCS_BATCH(batch, 0);
726     ADVANCE_BCS_BATCH(batch);
727 }
728
729 static void
730 gen75_mfd_avc_qm_state(VADriverContextP ctx,
731                       struct decode_state *decode_state,
732                       struct gen7_mfd_context *gen7_mfd_context)
733 {
734     VAIQMatrixBufferH264 *iq_matrix;
735     VAPictureParameterBufferH264 *pic_param;
736
737     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
738         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
739     else
740         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
741
742     assert(decode_state->pic_param && decode_state->pic_param->buffer);
743     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
744
745     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
746     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
747
748     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
749         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
750         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
751     }
752 }
753
754 static void
755 gen75_mfd_avc_picid_state(VADriverContextP ctx,
756                       struct decode_state *decode_state,
757                       struct gen7_mfd_context *gen7_mfd_context)
758 {
759     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
760
761     BEGIN_BCS_BATCH(batch, 10);
762     OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
763     OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
764     OUT_BCS_BATCH(batch, 0);
765     OUT_BCS_BATCH(batch, 0);
766     OUT_BCS_BATCH(batch, 0);
767     OUT_BCS_BATCH(batch, 0);
768     OUT_BCS_BATCH(batch, 0);
769     OUT_BCS_BATCH(batch, 0);
770     OUT_BCS_BATCH(batch, 0);
771     OUT_BCS_BATCH(batch, 0);
772     ADVANCE_BCS_BATCH(batch);
773 }
774
775
776 static void
777 gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx,
778                               VAPictureParameterBufferH264 *pic_param,
779                               VASliceParameterBufferH264 *slice_param,
780                               struct gen7_mfd_context *gen7_mfd_context)
781 {
782     struct i965_driver_data *i965 = i965_driver_data(ctx);
783     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
784     struct object_surface *obj_surface;
785     GenAvcSurface *gen7_avc_surface;
786     VAPictureH264 *va_pic;
787     int i, j;
788
789     BEGIN_BCS_BATCH(batch, 71);
790     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
791
792     /* reference surfaces 0..15 */
793     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
794         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
795             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
796             assert(obj_surface);
797             gen7_avc_surface = obj_surface->private_data;
798
799             if (gen7_avc_surface == NULL) {
800                 OUT_BCS_BATCH(batch, 0);
801                 OUT_BCS_BATCH(batch, 0);
802             } else {
803                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
804                               I915_GEM_DOMAIN_INSTRUCTION, 0,
805                               0);
806                 OUT_BCS_BATCH(batch, 0);
807             }
808         } else {
809             OUT_BCS_BATCH(batch, 0);
810             OUT_BCS_BATCH(batch, 0);
811         }
812     }
813         OUT_BCS_BATCH(batch, 0);
814
815     /* the current decoding frame/field */
816     va_pic = &pic_param->CurrPic;
817     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
818     obj_surface = SURFACE(va_pic->picture_id);
819     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
820     gen7_avc_surface = obj_surface->private_data;
821
822     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
823                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
824                   0);
825
826         OUT_BCS_BATCH(batch, 0);
827         OUT_BCS_BATCH(batch, 0);
828
829     /* POC List */
830     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
831         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
832             int found = 0;
833             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
834                 va_pic = &pic_param->ReferenceFrames[j];
835                 
836                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
837                     continue;
838
839                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
840                     found = 1;
841                     break;
842                 }
843             }
844
845             assert(found == 1);
846             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
847             
848             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
849             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
850         } else {
851             OUT_BCS_BATCH(batch, 0);
852             OUT_BCS_BATCH(batch, 0);
853         }
854     }
855
856     va_pic = &pic_param->CurrPic;
857     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
858     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
859
860     ADVANCE_BCS_BATCH(batch);
861 }
862
863 static void
864 gen75_mfd_avc_directmode_state(VADriverContextP ctx,
865                               VAPictureParameterBufferH264 *pic_param,
866                               VASliceParameterBufferH264 *slice_param,
867                               struct gen7_mfd_context *gen7_mfd_context)
868 {
869     struct i965_driver_data *i965 = i965_driver_data(ctx);
870     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
871     struct object_surface *obj_surface;
872     GenAvcSurface *gen7_avc_surface;
873     VAPictureH264 *va_pic;
874     int i, j;
875
876     if (IS_STEPPING_BPLUS(i965)) {
877         gen75_mfd_avc_directmode_state_bplus(ctx, pic_param, slice_param,
878                 gen7_mfd_context);
879         return;
880     }
881
882     BEGIN_BCS_BATCH(batch, 69);
883     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
884
885     /* reference surfaces 0..15 */
886     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
887         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
888             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
889             assert(obj_surface);
890             gen7_avc_surface = obj_surface->private_data;
891
892             if (gen7_avc_surface == NULL) {
893                 OUT_BCS_BATCH(batch, 0);
894                 OUT_BCS_BATCH(batch, 0);
895             } else {
896                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
897                               I915_GEM_DOMAIN_INSTRUCTION, 0,
898                               0);
899
900                 if (gen7_avc_surface->dmv_bottom_flag == 1)
901                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
902                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
903                                   0);
904                 else
905                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
906                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
907                                   0);
908             }
909         } else {
910             OUT_BCS_BATCH(batch, 0);
911             OUT_BCS_BATCH(batch, 0);
912         }
913     }
914
915     /* the current decoding frame/field */
916     va_pic = &pic_param->CurrPic;
917     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
918     obj_surface = SURFACE(va_pic->picture_id);
919     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
920     gen7_avc_surface = obj_surface->private_data;
921
922     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
923                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
924                   0);
925
926     if (gen7_avc_surface->dmv_bottom_flag == 1)
927         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
928                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
929                       0);
930     else
931         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
932                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
933                       0);
934
935     /* POC List */
936     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
937         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
938             int found = 0;
939             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
940                 va_pic = &pic_param->ReferenceFrames[j];
941                 
942                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
943                     continue;
944
945                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
946                     found = 1;
947                     break;
948                 }
949             }
950
951             assert(found == 1);
952             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
953             
954             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
955             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
956         } else {
957             OUT_BCS_BATCH(batch, 0);
958             OUT_BCS_BATCH(batch, 0);
959         }
960     }
961
962     va_pic = &pic_param->CurrPic;
963     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
964     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
965
966     ADVANCE_BCS_BATCH(batch);
967 }
968
969 static void
970 gen75_mfd_avc_slice_state(VADriverContextP ctx,
971                          VAPictureParameterBufferH264 *pic_param,
972                          VASliceParameterBufferH264 *slice_param,
973                          VASliceParameterBufferH264 *next_slice_param,
974                          struct gen7_mfd_context *gen7_mfd_context)
975 {
976     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
977     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
978     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
979     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
980     int num_ref_idx_l0, num_ref_idx_l1;
981     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
982                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
983     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
984     int slice_type;
985
986     if (slice_param->slice_type == SLICE_TYPE_I ||
987         slice_param->slice_type == SLICE_TYPE_SI) {
988         slice_type = SLICE_TYPE_I;
989     } else if (slice_param->slice_type == SLICE_TYPE_P ||
990                slice_param->slice_type == SLICE_TYPE_SP) {
991         slice_type = SLICE_TYPE_P;
992     } else { 
993         assert(slice_param->slice_type == SLICE_TYPE_B);
994         slice_type = SLICE_TYPE_B;
995     }
996
997     if (slice_type == SLICE_TYPE_I) {
998         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
999         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
1000         num_ref_idx_l0 = 0;
1001         num_ref_idx_l1 = 0;
1002     } else if (slice_type == SLICE_TYPE_P) {
1003         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
1004         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
1005         num_ref_idx_l1 = 0;
1006     } else {
1007         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
1008         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
1009     }
1010
1011     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
1012     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
1013     slice_ver_pos = first_mb_in_slice / width_in_mbs;
1014
1015     if (next_slice_param) {
1016         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
1017         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
1018         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
1019     } else {
1020         next_slice_hor_pos = 0;
1021         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
1022     }
1023
1024     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
1025     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
1026     OUT_BCS_BATCH(batch, slice_type);
1027     OUT_BCS_BATCH(batch, 
1028                   (num_ref_idx_l1 << 24) |
1029                   (num_ref_idx_l0 << 16) |
1030                   (slice_param->chroma_log2_weight_denom << 8) |
1031                   (slice_param->luma_log2_weight_denom << 0));
1032     OUT_BCS_BATCH(batch, 
1033                   (slice_param->direct_spatial_mv_pred_flag << 29) |
1034                   (slice_param->disable_deblocking_filter_idc << 27) |
1035                   (slice_param->cabac_init_idc << 24) |
1036                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
1037                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
1038                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
1039     OUT_BCS_BATCH(batch, 
1040                   (slice_ver_pos << 24) |
1041                   (slice_hor_pos << 16) | 
1042                   (first_mb_in_slice << 0));
1043     OUT_BCS_BATCH(batch,
1044                   (next_slice_ver_pos << 16) |
1045                   (next_slice_hor_pos << 0));
1046     OUT_BCS_BATCH(batch, 
1047                   (next_slice_param == NULL) << 19); /* last slice flag */
1048     OUT_BCS_BATCH(batch, 0);
1049     OUT_BCS_BATCH(batch, 0);
1050     OUT_BCS_BATCH(batch, 0);
1051     OUT_BCS_BATCH(batch, 0);
1052     ADVANCE_BCS_BATCH(batch);
1053 }
1054
1055 static inline void
1056 gen75_mfd_avc_ref_idx_state(VADriverContextP ctx,
1057                            VAPictureParameterBufferH264 *pic_param,
1058                            VASliceParameterBufferH264 *slice_param,
1059                            struct gen7_mfd_context *gen7_mfd_context)
1060 {
1061     gen6_send_avc_ref_idx_state(
1062         gen7_mfd_context->base.batch,
1063         slice_param,
1064         gen7_mfd_context->reference_surface
1065     );
1066 }
1067
1068 static void
1069 gen75_mfd_avc_weightoffset_state(VADriverContextP ctx,
1070                                 VAPictureParameterBufferH264 *pic_param,
1071                                 VASliceParameterBufferH264 *slice_param,
1072                                 struct gen7_mfd_context *gen7_mfd_context)
1073 {
1074     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1075     int i, j, num_weight_offset_table = 0;
1076     short weightoffsets[32 * 6];
1077
1078     if ((slice_param->slice_type == SLICE_TYPE_P ||
1079          slice_param->slice_type == SLICE_TYPE_SP) &&
1080         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
1081         num_weight_offset_table = 1;
1082     }
1083     
1084     if ((slice_param->slice_type == SLICE_TYPE_B) &&
1085         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
1086         num_weight_offset_table = 2;
1087     }
1088
1089     for (i = 0; i < num_weight_offset_table; i++) {
1090         BEGIN_BCS_BATCH(batch, 98);
1091         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
1092         OUT_BCS_BATCH(batch, i);
1093
1094         if (i == 0) {
1095             for (j = 0; j < 32; j++) {
1096                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
1097                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
1098                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
1099                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
1100                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
1101                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
1102             }
1103         } else {
1104             for (j = 0; j < 32; j++) {
1105                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
1106                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
1107                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
1108                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
1109                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
1110                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
1111             }
1112         }
1113
1114         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
1115         ADVANCE_BCS_BATCH(batch);
1116     }
1117 }
1118
1119 static void
1120 gen75_mfd_avc_bsd_object(VADriverContextP ctx,
1121                         VAPictureParameterBufferH264 *pic_param,
1122                         VASliceParameterBufferH264 *slice_param,
1123                         dri_bo *slice_data_bo,
1124                         VASliceParameterBufferH264 *next_slice_param,
1125                         struct gen7_mfd_context *gen7_mfd_context)
1126 {
1127     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1128     unsigned int slice_data_bit_offset;
1129
1130     slice_data_bit_offset = avc_get_first_mb_bit_offset(
1131         slice_data_bo,
1132         slice_param,
1133         pic_param->pic_fields.bits.entropy_coding_mode_flag
1134     );
1135
1136     /* the input bitsteam format on GEN7 differs from GEN6 */
1137     BEGIN_BCS_BATCH(batch, 6);
1138     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
1139     OUT_BCS_BATCH(batch, 
1140                   (slice_param->slice_data_size - slice_param->slice_data_offset));
1141     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
1142     OUT_BCS_BATCH(batch,
1143                   (0 << 31) |
1144                   (0 << 14) |
1145                   (0 << 12) |
1146                   (0 << 10) |
1147                   (0 << 8));
1148     OUT_BCS_BATCH(batch,
1149                   ((slice_data_bit_offset >> 3) << 16) |
1150                   (1 << 7)  |
1151                   (0 << 5)  |
1152                   (0 << 4)  |
1153                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
1154                   (slice_data_bit_offset & 0x7));
1155     OUT_BCS_BATCH(batch, 0);
1156     ADVANCE_BCS_BATCH(batch);
1157 }
1158
1159 static inline void
1160 gen75_mfd_avc_context_init(
1161     VADriverContextP         ctx,
1162     struct gen7_mfd_context *gen7_mfd_context
1163 )
1164 {
1165     /* Initialize flat scaling lists */
1166     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
1167 }
1168
1169 static void
1170 gen75_mfd_avc_decode_init(VADriverContextP ctx,
1171                          struct decode_state *decode_state,
1172                          struct gen7_mfd_context *gen7_mfd_context)
1173 {
1174     VAPictureParameterBufferH264 *pic_param;
1175     VASliceParameterBufferH264 *slice_param;
1176     VAPictureH264 *va_pic;
1177     struct i965_driver_data *i965 = i965_driver_data(ctx);
1178     struct object_surface *obj_surface;
1179     dri_bo *bo;
1180     int i, j, enable_avc_ildb = 0;
1181     unsigned int width_in_mbs, height_in_mbs;
1182
1183     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
1184         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1185         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1186
1187         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1188             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1189             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1190                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1191                    (slice_param->slice_type == SLICE_TYPE_P) ||
1192                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1193                    (slice_param->slice_type == SLICE_TYPE_B));
1194
1195             if (slice_param->disable_deblocking_filter_idc != 1) {
1196                 enable_avc_ildb = 1;
1197                 break;
1198             }
1199
1200             slice_param++;
1201         }
1202     }
1203
1204     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1205     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1206     gen75_mfd_avc_frame_store_index(ctx, pic_param, gen7_mfd_context);
1207     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
1208     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
1209     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
1210     assert(height_in_mbs > 0 && height_in_mbs <= 256);
1211
1212     /* Current decoded picture */
1213     va_pic = &pic_param->CurrPic;
1214     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
1215     obj_surface = SURFACE(va_pic->picture_id);
1216     assert(obj_surface);
1217     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
1218     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
1219     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1220
1221     /* initial uv component for YUV400 case */
1222     if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
1223          unsigned int uv_offset = obj_surface->width * obj_surface->height; 
1224          unsigned int uv_size   = obj_surface->width * obj_surface->height / 2; 
1225
1226          drm_intel_gem_bo_map_gtt(obj_surface->bo);
1227          memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
1228          drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
1229     }
1230
1231     gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface);
1232
1233     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1234     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1235     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1236     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
1237
1238     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1239     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1240     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1241     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
1242
1243     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1244     bo = dri_bo_alloc(i965->intel.bufmgr,
1245                       "intra row store",
1246                       width_in_mbs * 64,
1247                       0x1000);
1248     assert(bo);
1249     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1250     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1251
1252     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1253     bo = dri_bo_alloc(i965->intel.bufmgr,
1254                       "deblocking filter row store",
1255                       width_in_mbs * 64 * 4,
1256                       0x1000);
1257     assert(bo);
1258     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1259     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1260
1261     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1262     bo = dri_bo_alloc(i965->intel.bufmgr,
1263                       "bsd mpc row store",
1264                       width_in_mbs * 64 * 2,
1265                       0x1000);
1266     assert(bo);
1267     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1268     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1269
1270     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
1271     bo = dri_bo_alloc(i965->intel.bufmgr,
1272                       "mpr row store",
1273                       width_in_mbs * 64 * 2,
1274                       0x1000);
1275     assert(bo);
1276     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
1277     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
1278
1279     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1280 }
1281
1282 static void
1283 gen75_mfd_avc_decode_picture(VADriverContextP ctx,
1284                             struct decode_state *decode_state,
1285                             struct gen7_mfd_context *gen7_mfd_context)
1286 {
1287     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1288     VAPictureParameterBufferH264 *pic_param;
1289     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
1290     dri_bo *slice_data_bo;
1291     int i, j;
1292
1293     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1294     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1295     gen75_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
1296
1297     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1298     intel_batchbuffer_emit_mi_flush(batch);
1299     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1300     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1301     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1302     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1303     gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
1304     gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
1305     gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
1306
1307     for (j = 0; j < decode_state->num_slice_params; j++) {
1308         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1309         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1310         slice_data_bo = decode_state->slice_datas[j]->bo;
1311         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
1312
1313         if (j == decode_state->num_slice_params - 1)
1314             next_slice_group_param = NULL;
1315         else
1316             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1317
1318         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1319             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1320             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1321                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1322                    (slice_param->slice_type == SLICE_TYPE_P) ||
1323                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1324                    (slice_param->slice_type == SLICE_TYPE_B));
1325
1326             if (i < decode_state->slice_params[j]->num_elements - 1)
1327                 next_slice_param = slice_param + 1;
1328             else
1329                 next_slice_param = next_slice_group_param;
1330
1331             gen75_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen7_mfd_context);
1332             gen75_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
1333             gen75_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
1334             gen75_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1335             gen75_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
1336             slice_param++;
1337         }
1338     }
1339
1340     intel_batchbuffer_end_atomic(batch);
1341     intel_batchbuffer_flush(batch);
1342 }
1343
1344 static void
1345 gen75_mfd_mpeg2_decode_init(VADriverContextP ctx,
1346                            struct decode_state *decode_state,
1347                            struct gen7_mfd_context *gen7_mfd_context)
1348 {
1349     VAPictureParameterBufferMPEG2 *pic_param;
1350     struct i965_driver_data *i965 = i965_driver_data(ctx);
1351     struct object_surface *obj_surface;
1352     dri_bo *bo;
1353     unsigned int width_in_mbs;
1354
1355     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1356     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1357     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1358
1359     mpeg2_set_reference_surfaces(
1360         ctx,
1361         gen7_mfd_context->reference_surface,
1362         decode_state,
1363         pic_param
1364     );
1365
1366     /* Current decoded picture */
1367     obj_surface = SURFACE(decode_state->current_render_target);
1368     assert(obj_surface);
1369     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1370
1371     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1372     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1373     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1374     gen7_mfd_context->pre_deblocking_output.valid = 1;
1375
1376     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1377     bo = dri_bo_alloc(i965->intel.bufmgr,
1378                       "bsd mpc row store",
1379                       width_in_mbs * 96,
1380                       0x1000);
1381     assert(bo);
1382     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1383     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1384
1385     gen7_mfd_context->post_deblocking_output.valid = 0;
1386     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1387     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1388     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1389     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1390 }
1391
1392 static void
1393 gen75_mfd_mpeg2_pic_state(VADriverContextP ctx,
1394                          struct decode_state *decode_state,
1395                          struct gen7_mfd_context *gen7_mfd_context)
1396 {
1397     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1398     VAPictureParameterBufferMPEG2 *pic_param;
1399     unsigned int slice_concealment_disable_bit = 0;
1400
1401     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1402     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1403
1404         /* XXX: disable concealment for now */
1405         slice_concealment_disable_bit = 1;
1406
1407     BEGIN_BCS_BATCH(batch, 13);
1408     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1409     OUT_BCS_BATCH(batch,
1410                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1411                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1412                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1413                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1414                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1415                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1416                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1417                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1418                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1419                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1420                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1421                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1422     OUT_BCS_BATCH(batch,
1423                   pic_param->picture_coding_type << 9);
1424     OUT_BCS_BATCH(batch,
1425                   (slice_concealment_disable_bit << 31) |
1426                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1427                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1428     OUT_BCS_BATCH(batch, 0);
1429     OUT_BCS_BATCH(batch, 0);
1430     OUT_BCS_BATCH(batch, 0);
1431     OUT_BCS_BATCH(batch, 0);
1432     OUT_BCS_BATCH(batch, 0);
1433     OUT_BCS_BATCH(batch, 0);
1434     OUT_BCS_BATCH(batch, 0);
1435     OUT_BCS_BATCH(batch, 0);
1436     OUT_BCS_BATCH(batch, 0);
1437     ADVANCE_BCS_BATCH(batch);
1438 }
1439
1440 static void
1441 gen75_mfd_mpeg2_qm_state(VADriverContextP ctx,
1442                         struct decode_state *decode_state,
1443                         struct gen7_mfd_context *gen7_mfd_context)
1444 {
1445     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1446     int i, j;
1447
1448     /* Update internal QM state */
1449     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1450         VAIQMatrixBufferMPEG2 * const iq_matrix =
1451             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1452
1453         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1454             iq_matrix->load_intra_quantiser_matrix) {
1455             gen_iq_matrix->load_intra_quantiser_matrix =
1456                 iq_matrix->load_intra_quantiser_matrix;
1457             if (iq_matrix->load_intra_quantiser_matrix) {
1458                 for (j = 0; j < 64; j++)
1459                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1460                         iq_matrix->intra_quantiser_matrix[j];
1461             }
1462         }
1463
1464         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1465             iq_matrix->load_non_intra_quantiser_matrix) {
1466             gen_iq_matrix->load_non_intra_quantiser_matrix =
1467                 iq_matrix->load_non_intra_quantiser_matrix;
1468             if (iq_matrix->load_non_intra_quantiser_matrix) {
1469                 for (j = 0; j < 64; j++)
1470                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1471                         iq_matrix->non_intra_quantiser_matrix[j];
1472             }
1473         }
1474     }
1475
1476     /* Commit QM state to HW */
1477     for (i = 0; i < 2; i++) {
1478         unsigned char *qm = NULL;
1479         int qm_type;
1480
1481         if (i == 0) {
1482             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1483                 qm = gen_iq_matrix->intra_quantiser_matrix;
1484                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1485             }
1486         } else {
1487             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1488                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1489                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1490             }
1491         }
1492
1493         if (!qm)
1494             continue;
1495
1496         gen75_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1497     }
1498 }
1499
1500 static void
1501 gen75_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1502                           VAPictureParameterBufferMPEG2 *pic_param,
1503                           VASliceParameterBufferMPEG2 *slice_param,
1504                           VASliceParameterBufferMPEG2 *next_slice_param,
1505                           struct gen7_mfd_context *gen7_mfd_context)
1506 {
1507     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1508     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1509     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1510
1511     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1512         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1513         is_field_pic = 1;
1514     is_field_pic_wa = is_field_pic &&
1515         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1516
1517     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1518     hpos0 = slice_param->slice_horizontal_position;
1519
1520     if (next_slice_param == NULL) {
1521         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1522         hpos1 = 0;
1523     } else {
1524         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1525         hpos1 = next_slice_param->slice_horizontal_position;
1526     }
1527
1528     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1529
1530     BEGIN_BCS_BATCH(batch, 5);
1531     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1532     OUT_BCS_BATCH(batch, 
1533                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1534     OUT_BCS_BATCH(batch, 
1535                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1536     OUT_BCS_BATCH(batch,
1537                   hpos0 << 24 |
1538                   vpos0 << 16 |
1539                   mb_count << 8 |
1540                   (next_slice_param == NULL) << 5 |
1541                   (next_slice_param == NULL) << 3 |
1542                   (slice_param->macroblock_offset & 0x7));
1543     OUT_BCS_BATCH(batch,
1544                   (slice_param->quantiser_scale_code << 24) |
1545                   (vpos1 << 8 | hpos1));
1546     ADVANCE_BCS_BATCH(batch);
1547 }
1548
1549 static void
1550 gen75_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1551                               struct decode_state *decode_state,
1552                               struct gen7_mfd_context *gen7_mfd_context)
1553 {
1554     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1555     VAPictureParameterBufferMPEG2 *pic_param;
1556     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1557     dri_bo *slice_data_bo;
1558     int i, j;
1559
1560     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1561     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1562
1563     gen75_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1564     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1565     intel_batchbuffer_emit_mi_flush(batch);
1566     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1567     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1568     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1569     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1570     gen75_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1571     gen75_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1572
1573     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1574         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1575             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1576
1577     for (j = 0; j < decode_state->num_slice_params; j++) {
1578         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1579         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1580         slice_data_bo = decode_state->slice_datas[j]->bo;
1581         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1582
1583         if (j == decode_state->num_slice_params - 1)
1584             next_slice_group_param = NULL;
1585         else
1586             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1587
1588         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1589             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1590
1591             if (i < decode_state->slice_params[j]->num_elements - 1)
1592                 next_slice_param = slice_param + 1;
1593             else
1594                 next_slice_param = next_slice_group_param;
1595
1596             gen75_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1597             slice_param++;
1598         }
1599     }
1600
1601     intel_batchbuffer_end_atomic(batch);
1602     intel_batchbuffer_flush(batch);
1603 }
1604
1605 static const int va_to_gen7_vc1_pic_type[5] = {
1606     GEN7_VC1_I_PICTURE,
1607     GEN7_VC1_P_PICTURE,
1608     GEN7_VC1_B_PICTURE,
1609     GEN7_VC1_BI_PICTURE,
1610     GEN7_VC1_P_PICTURE,
1611 };
1612
1613 static const int va_to_gen7_vc1_mv[4] = {
1614     1, /* 1-MV */
1615     2, /* 1-MV half-pel */
1616     3, /* 1-MV half-pef bilinear */
1617     0, /* Mixed MV */
1618 };
1619
1620 static const int b_picture_scale_factor[21] = {
1621     128, 85,  170, 64,  192,
1622     51,  102, 153, 204, 43,
1623     215, 37,  74,  111, 148,
1624     185, 222, 32,  96,  160, 
1625     224,
1626 };
1627
1628 static const int va_to_gen7_vc1_condover[3] = {
1629     0,
1630     2,
1631     3
1632 };
1633
1634 static const int va_to_gen7_vc1_profile[4] = {
1635     GEN7_VC1_SIMPLE_PROFILE,
1636     GEN7_VC1_MAIN_PROFILE,
1637     GEN7_VC1_RESERVED_PROFILE,
1638     GEN7_VC1_ADVANCED_PROFILE
1639 };
1640
1641 static void 
1642 gen75_mfd_free_vc1_surface(void **data)
1643 {
1644     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1645
1646     if (!gen7_vc1_surface)
1647         return;
1648
1649     dri_bo_unreference(gen7_vc1_surface->dmv);
1650     free(gen7_vc1_surface);
1651     *data = NULL;
1652 }
1653
1654 static void
1655 gen75_mfd_init_vc1_surface(VADriverContextP ctx, 
1656                           VAPictureParameterBufferVC1 *pic_param,
1657                           struct object_surface *obj_surface)
1658 {
1659     struct i965_driver_data *i965 = i965_driver_data(ctx);
1660     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1661     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1662     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1663
1664     obj_surface->free_private_data = gen75_mfd_free_vc1_surface;
1665
1666     if (!gen7_vc1_surface) {
1667         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1668         assert((obj_surface->size & 0x3f) == 0);
1669         obj_surface->private_data = gen7_vc1_surface;
1670     }
1671
1672     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1673
1674     if (gen7_vc1_surface->dmv == NULL) {
1675         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1676                                              "direct mv w/r buffer",
1677                                              width_in_mbs * height_in_mbs * 64,
1678                                              0x1000);
1679     }
1680 }
1681
1682 static void
1683 gen75_mfd_vc1_decode_init(VADriverContextP ctx,
1684                          struct decode_state *decode_state,
1685                          struct gen7_mfd_context *gen7_mfd_context)
1686 {
1687     VAPictureParameterBufferVC1 *pic_param;
1688     struct i965_driver_data *i965 = i965_driver_data(ctx);
1689     struct object_surface *obj_surface;
1690     int i;
1691     dri_bo *bo;
1692     int width_in_mbs;
1693
1694     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1695     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1696     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1697
1698     /* reference picture */
1699     obj_surface = SURFACE(pic_param->forward_reference_picture);
1700
1701     if (obj_surface && obj_surface->bo)
1702         gen7_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1703     else
1704         gen7_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1705
1706     obj_surface = SURFACE(pic_param->backward_reference_picture);
1707
1708     if (obj_surface && obj_surface->bo)
1709         gen7_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1710     else
1711         gen7_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1712
1713     /* must do so !!! */
1714     for (i = 2; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++)
1715         gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[i % 2].surface_id;
1716
1717     /* Current decoded picture */
1718     obj_surface = SURFACE(decode_state->current_render_target);
1719     assert(obj_surface);
1720     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1721     gen75_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1722
1723     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1724     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1725     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1726     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1727
1728     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1729     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1730     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1731     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1732
1733     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1734     bo = dri_bo_alloc(i965->intel.bufmgr,
1735                       "intra row store",
1736                       width_in_mbs * 64,
1737                       0x1000);
1738     assert(bo);
1739     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1740     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1741
1742     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1743     bo = dri_bo_alloc(i965->intel.bufmgr,
1744                       "deblocking filter row store",
1745                       width_in_mbs * 6 * 64,
1746                       0x1000);
1747     assert(bo);
1748     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1749     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1750
1751     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1752     bo = dri_bo_alloc(i965->intel.bufmgr,
1753                       "bsd mpc row store",
1754                       width_in_mbs * 96,
1755                       0x1000);
1756     assert(bo);
1757     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1758     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1759
1760     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1761
1762     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1763     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1764     
1765     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1766         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1767         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1768         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1769         int src_w, src_h;
1770         uint8_t *src = NULL, *dst = NULL;
1771
1772         assert(decode_state->bit_plane->buffer);
1773         src = decode_state->bit_plane->buffer;
1774
1775         bo = dri_bo_alloc(i965->intel.bufmgr,
1776                           "VC-1 Bitplane",
1777                           bitplane_width * height_in_mbs,
1778                           0x1000);
1779         assert(bo);
1780         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1781
1782         dri_bo_map(bo, True);
1783         assert(bo->virtual);
1784         dst = bo->virtual;
1785
1786         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1787             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1788                 int src_index, dst_index;
1789                 int src_shift;
1790                 uint8_t src_value;
1791
1792                 src_index = (src_h * width_in_mbs + src_w) / 2;
1793                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1794                 src_value = ((src[src_index] >> src_shift) & 0xf);
1795
1796                 dst_index = src_w / 2;
1797                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1798             }
1799
1800             if (src_w & 1)
1801                 dst[src_w / 2] >>= 4;
1802
1803             dst += bitplane_width;
1804         }
1805
1806         dri_bo_unmap(bo);
1807     } else
1808         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1809 }
1810
1811 static void
1812 gen75_mfd_vc1_pic_state(VADriverContextP ctx,
1813                        struct decode_state *decode_state,
1814                        struct gen7_mfd_context *gen7_mfd_context)
1815 {
1816     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1817     VAPictureParameterBufferVC1 *pic_param;
1818     struct i965_driver_data *i965 = i965_driver_data(ctx);
1819     struct object_surface *obj_surface;
1820     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1821     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1822     int unified_mv_mode;
1823     int ref_field_pic_polarity = 0;
1824     int scale_factor = 0;
1825     int trans_ac_y = 0;
1826     int dmv_surface_valid = 0;
1827     int brfd = 0;
1828     int fcm = 0;
1829     int picture_type;
1830     int profile;
1831     int overlap;
1832     int interpolation_mode = 0;
1833
1834     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1835     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1836
1837     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1838     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1839     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1840     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1841     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1842     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1843     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1844     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1845
1846     if (dquant == 0) {
1847         alt_pquant_config = 0;
1848         alt_pquant_edge_mask = 0;
1849     } else if (dquant == 2) {
1850         alt_pquant_config = 1;
1851         alt_pquant_edge_mask = 0xf;
1852     } else {
1853         assert(dquant == 1);
1854         if (dquantfrm == 0) {
1855             alt_pquant_config = 0;
1856             alt_pquant_edge_mask = 0;
1857             alt_pq = 0;
1858         } else {
1859             assert(dquantfrm == 1);
1860             alt_pquant_config = 1;
1861
1862             switch (dqprofile) {
1863             case 3:
1864                 if (dqbilevel == 0) {
1865                     alt_pquant_config = 2;
1866                     alt_pquant_edge_mask = 0;
1867                 } else {
1868                     assert(dqbilevel == 1);
1869                     alt_pquant_config = 3;
1870                     alt_pquant_edge_mask = 0;
1871                 }
1872                 break;
1873                 
1874             case 0:
1875                 alt_pquant_edge_mask = 0xf;
1876                 break;
1877
1878             case 1:
1879                 if (dqdbedge == 3)
1880                     alt_pquant_edge_mask = 0x9;
1881                 else
1882                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1883
1884                 break;
1885
1886             case 2:
1887                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1888                 break;
1889
1890             default:
1891                 assert(0);
1892             }
1893         }
1894     }
1895
1896     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1897         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1898         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1899     } else {
1900         assert(pic_param->mv_fields.bits.mv_mode < 4);
1901         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1902     }
1903
1904     if (pic_param->sequence_fields.bits.interlace == 1 &&
1905         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1906         /* FIXME: calculate reference field picture polarity */
1907         assert(0);
1908         ref_field_pic_polarity = 0;
1909     }
1910
1911     if (pic_param->b_picture_fraction < 21)
1912         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1913
1914     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1915     
1916     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1917         picture_type == GEN7_VC1_I_PICTURE)
1918         picture_type = GEN7_VC1_BI_PICTURE;
1919
1920     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1921         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1922     else
1923         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1924
1925
1926     if (picture_type == GEN7_VC1_B_PICTURE) {
1927         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1928
1929         obj_surface = SURFACE(pic_param->backward_reference_picture);
1930         assert(obj_surface);
1931         gen7_vc1_surface = obj_surface->private_data;
1932
1933         if (!gen7_vc1_surface || 
1934             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1935              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1936             dmv_surface_valid = 0;
1937         else
1938             dmv_surface_valid = 1;
1939     }
1940
1941     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1942
1943     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1944         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1945     else {
1946         if (pic_param->picture_fields.bits.top_field_first)
1947             fcm = 2;
1948         else
1949             fcm = 3;
1950     }
1951
1952     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1953         brfd = pic_param->reference_fields.bits.reference_distance;
1954         brfd = (scale_factor * brfd) >> 8;
1955         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1956
1957         if (brfd < 0)
1958             brfd = 0;
1959     }
1960
1961     overlap = pic_param->sequence_fields.bits.overlap;
1962     if (profile != GEN7_VC1_ADVANCED_PROFILE && pic_param->pic_quantizer_fields.bits.pic_quantizer_scale < 9)
1963         overlap = 0;
1964
1965     assert(pic_param->conditional_overlap_flag < 3);
1966     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1967
1968     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1969         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1970          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1971         interpolation_mode = 9; /* Half-pel bilinear */
1972     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1973              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1974               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1975         interpolation_mode = 1; /* Half-pel bicubic */
1976     else
1977         interpolation_mode = 0; /* Quarter-pel bicubic */
1978
1979     BEGIN_BCS_BATCH(batch, 6);
1980     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1981     OUT_BCS_BATCH(batch,
1982                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1983                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1984     OUT_BCS_BATCH(batch,
1985                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1986                   dmv_surface_valid << 15 |
1987                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1988                   pic_param->rounding_control << 13 |
1989                   pic_param->sequence_fields.bits.syncmarker << 12 |
1990                   interpolation_mode << 8 |
1991                   0 << 7 | /* FIXME: scale up or down ??? */
1992                   pic_param->range_reduction_frame << 6 |
1993                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1994                   overlap << 4 |
1995                   !pic_param->picture_fields.bits.is_first_field << 3 |
1996                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1997     OUT_BCS_BATCH(batch,
1998                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1999                   picture_type << 26 |
2000                   fcm << 24 |
2001                   alt_pq << 16 |
2002                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
2003                   scale_factor << 0);
2004     OUT_BCS_BATCH(batch,
2005                   unified_mv_mode << 28 |
2006                   pic_param->mv_fields.bits.four_mv_switch << 27 |
2007                   pic_param->fast_uvmc_flag << 26 |
2008                   ref_field_pic_polarity << 25 |
2009                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
2010                   pic_param->reference_fields.bits.reference_distance << 20 |
2011                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
2012                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
2013                   pic_param->mv_fields.bits.extended_mv_range << 8 |
2014                   alt_pquant_edge_mask << 4 |
2015                   alt_pquant_config << 2 |
2016                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
2017                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
2018     OUT_BCS_BATCH(batch,
2019                   !!pic_param->bitplane_present.value << 31 |
2020                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
2021                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
2022                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
2023                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
2024                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
2025                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
2026                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
2027                   pic_param->mv_fields.bits.mv_table << 20 |
2028                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
2029                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
2030                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
2031                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
2032                   pic_param->mb_mode_table << 8 |
2033                   trans_ac_y << 6 |
2034                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
2035                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
2036                   pic_param->cbp_table << 0);
2037     ADVANCE_BCS_BATCH(batch);
2038 }
2039
2040 static void
2041 gen75_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
2042                              struct decode_state *decode_state,
2043                              struct gen7_mfd_context *gen7_mfd_context)
2044 {
2045     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2046     VAPictureParameterBufferVC1 *pic_param;
2047     int intensitycomp_single;
2048
2049     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2050     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2051
2052     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2053     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2054     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
2055
2056     BEGIN_BCS_BATCH(batch, 6);
2057     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
2058     OUT_BCS_BATCH(batch,
2059                   0 << 14 | /* FIXME: double ??? */
2060                   0 << 12 |
2061                   intensitycomp_single << 10 |
2062                   intensitycomp_single << 8 |
2063                   0 << 4 | /* FIXME: interlace mode */
2064                   0);
2065     OUT_BCS_BATCH(batch,
2066                   pic_param->luma_shift << 16 |
2067                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
2068     OUT_BCS_BATCH(batch, 0);
2069     OUT_BCS_BATCH(batch, 0);
2070     OUT_BCS_BATCH(batch, 0);
2071     ADVANCE_BCS_BATCH(batch);
2072 }
2073
2074 static void
2075 gen75_mfd_vc1_directmode_state_bplus(VADriverContextP ctx,
2076                               struct decode_state *decode_state,
2077                               struct gen7_mfd_context *gen7_mfd_context)
2078 {
2079     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2080     VAPictureParameterBufferVC1 *pic_param;
2081     struct i965_driver_data *i965 = i965_driver_data(ctx);
2082     struct object_surface *obj_surface;
2083     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
2084
2085     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2086     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2087
2088     obj_surface = SURFACE(decode_state->current_render_target);
2089
2090     if (obj_surface && obj_surface->private_data) {
2091         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2092     }
2093
2094     obj_surface = SURFACE(pic_param->backward_reference_picture);
2095
2096     if (obj_surface && obj_surface->private_data) {
2097         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2098     }
2099
2100     BEGIN_BCS_BATCH(batch, 7);
2101     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
2102
2103     if (dmv_write_buffer)
2104         OUT_BCS_RELOC(batch, dmv_write_buffer,
2105                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2106                       0);
2107     else
2108         OUT_BCS_BATCH(batch, 0);
2109
2110         OUT_BCS_BATCH(batch, 0);
2111         OUT_BCS_BATCH(batch, 0);
2112
2113     if (dmv_read_buffer)
2114         OUT_BCS_RELOC(batch, dmv_read_buffer,
2115                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2116                       0);
2117     else
2118         OUT_BCS_BATCH(batch, 0);
2119         OUT_BCS_BATCH(batch, 0);
2120         OUT_BCS_BATCH(batch, 0);
2121                   
2122     ADVANCE_BCS_BATCH(batch);
2123 }
2124
2125 static void
2126 gen75_mfd_vc1_directmode_state(VADriverContextP ctx,
2127                               struct decode_state *decode_state,
2128                               struct gen7_mfd_context *gen7_mfd_context)
2129 {
2130     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2131     VAPictureParameterBufferVC1 *pic_param;
2132     struct i965_driver_data *i965 = i965_driver_data(ctx);
2133     struct object_surface *obj_surface;
2134     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
2135
2136     if (IS_STEPPING_BPLUS(i965)) {
2137         gen75_mfd_vc1_directmode_state_bplus(ctx, decode_state, gen7_mfd_context);
2138         return;
2139     }
2140     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2141     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2142
2143     obj_surface = SURFACE(decode_state->current_render_target);
2144
2145     if (obj_surface && obj_surface->private_data) {
2146         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2147     }
2148
2149     obj_surface = SURFACE(pic_param->backward_reference_picture);
2150
2151     if (obj_surface && obj_surface->private_data) {
2152         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2153     }
2154
2155     BEGIN_BCS_BATCH(batch, 3);
2156     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
2157
2158     if (dmv_write_buffer)
2159         OUT_BCS_RELOC(batch, dmv_write_buffer,
2160                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2161                       0);
2162     else
2163         OUT_BCS_BATCH(batch, 0);
2164
2165     if (dmv_read_buffer)
2166         OUT_BCS_RELOC(batch, dmv_read_buffer,
2167                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2168                       0);
2169     else
2170         OUT_BCS_BATCH(batch, 0);
2171                   
2172     ADVANCE_BCS_BATCH(batch);
2173 }
2174
2175 static int
2176 gen75_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
2177 {
2178     int out_slice_data_bit_offset;
2179     int slice_header_size = in_slice_data_bit_offset / 8;
2180     int i, j;
2181
2182     if (profile != 3)
2183         out_slice_data_bit_offset = in_slice_data_bit_offset;
2184     else {
2185         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
2186             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
2187                 i++, j += 2;
2188             }
2189         }
2190
2191         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
2192     }
2193
2194     return out_slice_data_bit_offset;
2195 }
2196
2197 static void
2198 gen75_mfd_vc1_bsd_object(VADriverContextP ctx,
2199                         VAPictureParameterBufferVC1 *pic_param,
2200                         VASliceParameterBufferVC1 *slice_param,
2201                         VASliceParameterBufferVC1 *next_slice_param,
2202                         dri_bo *slice_data_bo,
2203                         struct gen7_mfd_context *gen7_mfd_context)
2204 {
2205     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2206     int next_slice_start_vert_pos;
2207     int macroblock_offset;
2208     uint8_t *slice_data = NULL;
2209
2210     dri_bo_map(slice_data_bo, 0);
2211     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
2212     macroblock_offset = gen75_mfd_vc1_get_macroblock_bit_offset(slice_data, 
2213                                                                slice_param->macroblock_offset,
2214                                                                pic_param->sequence_fields.bits.profile);
2215     dri_bo_unmap(slice_data_bo);
2216
2217     if (next_slice_param)
2218         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
2219     else
2220         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
2221
2222     BEGIN_BCS_BATCH(batch, 5);
2223     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
2224     OUT_BCS_BATCH(batch, 
2225                   slice_param->slice_data_size - (macroblock_offset >> 3));
2226     OUT_BCS_BATCH(batch, 
2227                   slice_param->slice_data_offset + (macroblock_offset >> 3));
2228     OUT_BCS_BATCH(batch,
2229                   slice_param->slice_vertical_position << 16 |
2230                   next_slice_start_vert_pos << 0);
2231     OUT_BCS_BATCH(batch,
2232                   (macroblock_offset & 0x7));
2233     ADVANCE_BCS_BATCH(batch);
2234 }
2235
2236 static void
2237 gen75_mfd_vc1_decode_picture(VADriverContextP ctx,
2238                             struct decode_state *decode_state,
2239                             struct gen7_mfd_context *gen7_mfd_context)
2240 {
2241     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2242     VAPictureParameterBufferVC1 *pic_param;
2243     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
2244     dri_bo *slice_data_bo;
2245     int i, j;
2246
2247     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2248     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2249
2250     gen75_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
2251     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2252     intel_batchbuffer_emit_mi_flush(batch);
2253     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2254     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2255     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2256     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2257     gen75_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
2258     gen75_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
2259     gen75_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
2260
2261     for (j = 0; j < decode_state->num_slice_params; j++) {
2262         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2263         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
2264         slice_data_bo = decode_state->slice_datas[j]->bo;
2265         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
2266
2267         if (j == decode_state->num_slice_params - 1)
2268             next_slice_group_param = NULL;
2269         else
2270             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
2271
2272         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2273             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2274
2275             if (i < decode_state->slice_params[j]->num_elements - 1)
2276                 next_slice_param = slice_param + 1;
2277             else
2278                 next_slice_param = next_slice_group_param;
2279
2280             gen75_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2281             slice_param++;
2282         }
2283     }
2284
2285     intel_batchbuffer_end_atomic(batch);
2286     intel_batchbuffer_flush(batch);
2287 }
2288
2289 static void
2290 gen75_mfd_jpeg_decode_init(VADriverContextP ctx,
2291                           struct decode_state *decode_state,
2292                           struct gen7_mfd_context *gen7_mfd_context)
2293 {
2294     struct i965_driver_data *i965 = i965_driver_data(ctx);
2295     struct object_surface *obj_surface;
2296     VAPictureParameterBufferJPEGBaseline *pic_param;
2297     int subsampling = SUBSAMPLE_YUV420;
2298
2299     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2300
2301     if (pic_param->num_components == 1)
2302         subsampling = SUBSAMPLE_YUV400;
2303     else if (pic_param->num_components == 3) {
2304         int h1 = pic_param->components[0].h_sampling_factor;
2305         int h2 = pic_param->components[1].h_sampling_factor;
2306         int h3 = pic_param->components[2].h_sampling_factor;
2307         int v1 = pic_param->components[0].v_sampling_factor;
2308         int v2 = pic_param->components[1].v_sampling_factor;
2309         int v3 = pic_param->components[2].v_sampling_factor;
2310
2311         if (h1 == 2 && h2 == 1 && h3 == 1 &&
2312             v1 == 2 && v2 == 1 && v3 == 1)
2313             subsampling = SUBSAMPLE_YUV420;
2314         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2315                  v1 == 1 && v2 == 1 && v3 == 1)
2316             subsampling = SUBSAMPLE_YUV422H;
2317         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2318                  v1 == 1 && v2 == 1 && v3 == 1)
2319             subsampling = SUBSAMPLE_YUV444;
2320         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2321                  v1 == 1 && v2 == 1 && v3 == 1)
2322             subsampling = SUBSAMPLE_YUV411;
2323         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2324                  v1 == 2 && v2 == 1 && v3 == 1)
2325             subsampling = SUBSAMPLE_YUV422V;
2326         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2327                  v1 == 2 && v2 == 2 && v3 == 2)
2328             subsampling = SUBSAMPLE_YUV422H;
2329         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2330                  v1 == 2 && v2 == 1 && v3 == 1)
2331             subsampling = SUBSAMPLE_YUV422V;
2332         else
2333             assert(0);
2334     } else {
2335         assert(0);
2336     }
2337
2338     /* Current decoded picture */
2339     obj_surface = SURFACE(decode_state->current_render_target);
2340     assert(obj_surface);
2341     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling);
2342
2343     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2344     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2345     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2346     gen7_mfd_context->pre_deblocking_output.valid = 1;
2347
2348     gen7_mfd_context->post_deblocking_output.bo = NULL;
2349     gen7_mfd_context->post_deblocking_output.valid = 0;
2350
2351     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2352     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
2353
2354     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2355     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
2356
2357     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2358     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
2359
2360     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2361     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
2362
2363     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2364     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2365 }
2366
2367 static const int va_to_gen7_jpeg_rotation[4] = {
2368     GEN7_JPEG_ROTATION_0,
2369     GEN7_JPEG_ROTATION_90,
2370     GEN7_JPEG_ROTATION_180,
2371     GEN7_JPEG_ROTATION_270
2372 };
2373
2374 static void
2375 gen75_mfd_jpeg_pic_state(VADriverContextP ctx,
2376                         struct decode_state *decode_state,
2377                         struct gen7_mfd_context *gen7_mfd_context)
2378 {
2379     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2380     VAPictureParameterBufferJPEGBaseline *pic_param;
2381     int chroma_type = GEN7_YUV420;
2382     int frame_width_in_blks;
2383     int frame_height_in_blks;
2384
2385     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2386     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2387
2388     if (pic_param->num_components == 1)
2389         chroma_type = GEN7_YUV400;
2390     else if (pic_param->num_components == 3) {
2391         int h1 = pic_param->components[0].h_sampling_factor;
2392         int h2 = pic_param->components[1].h_sampling_factor;
2393         int h3 = pic_param->components[2].h_sampling_factor;
2394         int v1 = pic_param->components[0].v_sampling_factor;
2395         int v2 = pic_param->components[1].v_sampling_factor;
2396         int v3 = pic_param->components[2].v_sampling_factor;
2397
2398         if (h1 == 2 && h2 == 1 && h3 == 1 &&
2399             v1 == 2 && v2 == 1 && v3 == 1)
2400             chroma_type = GEN7_YUV420;
2401         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2402                  v1 == 1 && v2 == 1 && v3 == 1)
2403             chroma_type = GEN7_YUV422H_2Y;
2404         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2405                  v1 == 1 && v2 == 1 && v3 == 1)
2406             chroma_type = GEN7_YUV444;
2407         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2408                  v1 == 1 && v2 == 1 && v3 == 1)
2409             chroma_type = GEN7_YUV411;
2410         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2411                  v1 == 2 && v2 == 1 && v3 == 1)
2412             chroma_type = GEN7_YUV422V_2Y;
2413         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2414                  v1 == 2 && v2 == 2 && v3 == 2)
2415             chroma_type = GEN7_YUV422H_4Y;
2416         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2417                  v1 == 2 && v2 == 1 && v3 == 1)
2418             chroma_type = GEN7_YUV422V_4Y;
2419         else
2420             assert(0);
2421     }
2422
2423     if (chroma_type == GEN7_YUV400 ||
2424         chroma_type == GEN7_YUV444 ||
2425         chroma_type == GEN7_YUV422V_2Y) {
2426         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2427         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2428     } else if (chroma_type == GEN7_YUV411) {
2429         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2430         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2431     } else {
2432         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2433         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2434     }
2435
2436     BEGIN_BCS_BATCH(batch, 3);
2437     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2438     OUT_BCS_BATCH(batch,
2439                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2440                   (chroma_type << 0));
2441     OUT_BCS_BATCH(batch,
2442                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2443                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2444     ADVANCE_BCS_BATCH(batch);
2445 }
2446
2447 static const int va_to_gen7_jpeg_hufftable[2] = {
2448     MFX_HUFFTABLE_ID_Y,
2449     MFX_HUFFTABLE_ID_UV
2450 };
2451
2452 static void
2453 gen75_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2454                                struct decode_state *decode_state,
2455                                struct gen7_mfd_context *gen7_mfd_context,
2456                                int num_tables)
2457 {
2458     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2459     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2460     int index;
2461
2462     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2463         return;
2464
2465     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2466
2467     for (index = 0; index < num_tables; index++) {
2468         int id = va_to_gen7_jpeg_hufftable[index];
2469         BEGIN_BCS_BATCH(batch, 53);
2470         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2471         OUT_BCS_BATCH(batch, id);
2472         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2473         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2474         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2475         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2476         ADVANCE_BCS_BATCH(batch);
2477     }
2478 }
2479
2480 static const int va_to_gen7_jpeg_qm[5] = {
2481     -1,
2482     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2483     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2484     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2485     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2486 };
2487
2488 static void
2489 gen75_mfd_jpeg_qm_state(VADriverContextP ctx,
2490                        struct decode_state *decode_state,
2491                        struct gen7_mfd_context *gen7_mfd_context)
2492 {
2493     VAPictureParameterBufferJPEGBaseline *pic_param;
2494     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2495     int index;
2496
2497     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2498         return;
2499
2500     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2501     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2502
2503     assert(pic_param->num_components <= 3);
2504
2505     for (index = 0; index < pic_param->num_components; index++) {
2506         int qm_type = va_to_gen7_jpeg_qm[pic_param->components[index].component_id - pic_param->components[0].component_id + 1];
2507         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2508         unsigned char raster_qm[64];
2509         int j;
2510
2511         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2512             continue;
2513
2514         for (j = 0; j < 64; j++)
2515             raster_qm[zigzag_direct[j]] = qm[j];
2516
2517         gen75_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2518     }
2519 }
2520
2521 static void
2522 gen75_mfd_jpeg_bsd_object(VADriverContextP ctx,
2523                          VAPictureParameterBufferJPEGBaseline *pic_param,
2524                          VASliceParameterBufferJPEGBaseline *slice_param,
2525                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2526                          dri_bo *slice_data_bo,
2527                          struct gen7_mfd_context *gen7_mfd_context)
2528 {
2529     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2530     int scan_component_mask = 0;
2531     int i;
2532
2533     assert(slice_param->num_components > 0);
2534     assert(slice_param->num_components < 4);
2535     assert(slice_param->num_components <= pic_param->num_components);
2536
2537     for (i = 0; i < slice_param->num_components; i++) {
2538         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2539         case 1:
2540             scan_component_mask |= (1 << 0);
2541             break;
2542         case 2:
2543             scan_component_mask |= (1 << 1);
2544             break;
2545         case 3:
2546             scan_component_mask |= (1 << 2);
2547             break;
2548         default:
2549             assert(0);
2550             break;
2551         }
2552     }
2553
2554     BEGIN_BCS_BATCH(batch, 6);
2555     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2556     OUT_BCS_BATCH(batch, 
2557                   slice_param->slice_data_size);
2558     OUT_BCS_BATCH(batch, 
2559                   slice_param->slice_data_offset);
2560     OUT_BCS_BATCH(batch,
2561                   slice_param->slice_horizontal_position << 16 |
2562                   slice_param->slice_vertical_position << 0);
2563     OUT_BCS_BATCH(batch,
2564                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2565                   (scan_component_mask << 27) |                 /* scan components */
2566                   (0 << 26) |   /* disable interrupt allowed */
2567                   (slice_param->num_mcus << 0));                /* MCU count */
2568     OUT_BCS_BATCH(batch,
2569                   (slice_param->restart_interval << 0));    /* RestartInterval */
2570     ADVANCE_BCS_BATCH(batch);
2571 }
2572
2573 /* Workaround for JPEG decoding on Ivybridge */
2574
2575 VAStatus 
2576 i965_DestroySurfaces(VADriverContextP ctx,
2577                      VASurfaceID *surface_list,
2578                      int num_surfaces);
2579 VAStatus 
2580 i965_CreateSurfaces(VADriverContextP ctx,
2581                     int width,
2582                     int height,
2583                     int format,
2584                     int num_surfaces,
2585                     VASurfaceID *surfaces);
2586
2587 static struct {
2588     int width;
2589     int height;
2590     unsigned char data[32];
2591     int data_size;
2592     int data_bit_offset;
2593     int qp;
2594 } gen7_jpeg_wa_clip = {
2595     16,
2596     16,
2597     {
2598         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2599         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2600     },
2601     14,
2602     40,
2603     28,
2604 };
2605
2606 static void
2607 gen75_jpeg_wa_init(VADriverContextP ctx,
2608                   struct gen7_mfd_context *gen7_mfd_context)
2609 {
2610     struct i965_driver_data *i965 = i965_driver_data(ctx);
2611     VAStatus status;
2612     struct object_surface *obj_surface;
2613
2614     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2615         i965_DestroySurfaces(ctx,
2616                              &gen7_mfd_context->jpeg_wa_surface_id,
2617                              1);
2618
2619     status = i965_CreateSurfaces(ctx,
2620                                  gen7_jpeg_wa_clip.width,
2621                                  gen7_jpeg_wa_clip.height,
2622                                  VA_RT_FORMAT_YUV420,
2623                                  1,
2624                                  &gen7_mfd_context->jpeg_wa_surface_id);
2625     assert(status == VA_STATUS_SUCCESS);
2626
2627     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2628     assert(obj_surface);
2629     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
2630
2631     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2632         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2633                                                                "JPEG WA data",
2634                                                                0x1000,
2635                                                                0x1000);
2636         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2637                        0,
2638                        gen7_jpeg_wa_clip.data_size,
2639                        gen7_jpeg_wa_clip.data);
2640     }
2641 }
2642
2643 static void
2644 gen75_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2645                               struct gen7_mfd_context *gen7_mfd_context)
2646 {
2647     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2648
2649     BEGIN_BCS_BATCH(batch, 5);
2650     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2651     OUT_BCS_BATCH(batch,
2652                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2653                   (MFD_MODE_VLD << 15) | /* VLD mode */
2654                   (0 << 10) | /* disable Stream-Out */
2655                   (0 << 9)  | /* Post Deblocking Output */
2656                   (1 << 8)  | /* Pre Deblocking Output */
2657                   (0 << 5)  | /* not in stitch mode */
2658                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2659                   (MFX_FORMAT_AVC << 0));
2660     OUT_BCS_BATCH(batch,
2661                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2662                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2663                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2664                   (0 << 1)  |
2665                   (0 << 0));
2666     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2667     OUT_BCS_BATCH(batch, 0); /* reserved */
2668     ADVANCE_BCS_BATCH(batch);
2669 }
2670
2671 static void
2672 gen75_jpeg_wa_surface_state(VADriverContextP ctx,
2673                            struct gen7_mfd_context *gen7_mfd_context)
2674 {
2675     struct i965_driver_data *i965 = i965_driver_data(ctx);
2676     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2677     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2678
2679     BEGIN_BCS_BATCH(batch, 6);
2680     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2681     OUT_BCS_BATCH(batch, 0);
2682     OUT_BCS_BATCH(batch,
2683                   ((obj_surface->orig_width - 1) << 18) |
2684                   ((obj_surface->orig_height - 1) << 4));
2685     OUT_BCS_BATCH(batch,
2686                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2687                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2688                   (0 << 22) | /* surface object control state, ignored */
2689                   ((obj_surface->width - 1) << 3) | /* pitch */
2690                   (0 << 2)  | /* must be 0 */
2691                   (1 << 1)  | /* must be tiled */
2692                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2693     OUT_BCS_BATCH(batch,
2694                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2695                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2696     OUT_BCS_BATCH(batch,
2697                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2698                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2699     ADVANCE_BCS_BATCH(batch);
2700 }
2701
2702
2703 static void
2704 gen75_jpeg_wa_pipe_buf_addr_state_bplus(VADriverContextP ctx,
2705                                  struct gen7_mfd_context *gen7_mfd_context)
2706 {
2707     struct i965_driver_data *i965 = i965_driver_data(ctx);
2708     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2709     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2710     dri_bo *intra_bo;
2711     int i;
2712
2713     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2714                             "intra row store",
2715                             128 * 64,
2716                             0x1000);
2717
2718     BEGIN_BCS_BATCH(batch, 61);
2719     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2720     OUT_BCS_RELOC(batch,
2721                   obj_surface->bo,
2722                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2723                   0);
2724         OUT_BCS_BATCH(batch, 0);
2725         OUT_BCS_BATCH(batch, 0);
2726     
2727
2728     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2729         OUT_BCS_BATCH(batch, 0);
2730         OUT_BCS_BATCH(batch, 0);
2731
2732         /* uncompressed-video & stream out 7-12 */
2733     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2734     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2735         OUT_BCS_BATCH(batch, 0);
2736         OUT_BCS_BATCH(batch, 0);
2737         OUT_BCS_BATCH(batch, 0);
2738         OUT_BCS_BATCH(batch, 0);
2739
2740         /* the DW 13-15 is for intra row store scratch */
2741     OUT_BCS_RELOC(batch,
2742                   intra_bo,
2743                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2744                   0);
2745         OUT_BCS_BATCH(batch, 0);
2746         OUT_BCS_BATCH(batch, 0);
2747
2748         /* the DW 16-18 is for deblocking filter */ 
2749     OUT_BCS_BATCH(batch, 0);
2750         OUT_BCS_BATCH(batch, 0);
2751         OUT_BCS_BATCH(batch, 0);
2752
2753     /* DW 19..50 */
2754     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2755         OUT_BCS_BATCH(batch, 0);
2756         OUT_BCS_BATCH(batch, 0);
2757     }
2758     OUT_BCS_BATCH(batch, 0);
2759
2760         /* the DW52-54 is for mb status address */
2761     OUT_BCS_BATCH(batch, 0);
2762         OUT_BCS_BATCH(batch, 0);
2763         OUT_BCS_BATCH(batch, 0);
2764         /* the DW56-60 is for ILDB & second ILDB address */
2765     OUT_BCS_BATCH(batch, 0);
2766         OUT_BCS_BATCH(batch, 0);
2767         OUT_BCS_BATCH(batch, 0);
2768     OUT_BCS_BATCH(batch, 0);
2769         OUT_BCS_BATCH(batch, 0);
2770         OUT_BCS_BATCH(batch, 0);
2771
2772     ADVANCE_BCS_BATCH(batch);
2773
2774     dri_bo_unreference(intra_bo);
2775 }
2776
2777 static void
2778 gen75_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2779                                  struct gen7_mfd_context *gen7_mfd_context)
2780 {
2781     struct i965_driver_data *i965 = i965_driver_data(ctx);
2782     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2783     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2784     dri_bo *intra_bo;
2785     int i;
2786
2787     if (IS_STEPPING_BPLUS(i965)) {
2788         gen75_jpeg_wa_pipe_buf_addr_state_bplus(ctx, gen7_mfd_context);
2789         return;
2790     }
2791     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2792                             "intra row store",
2793                             128 * 64,
2794                             0x1000);
2795
2796     BEGIN_BCS_BATCH(batch, 25);
2797     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
2798     OUT_BCS_RELOC(batch,
2799                   obj_surface->bo,
2800                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2801                   0);
2802     
2803     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2804
2805     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2806     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2807
2808     OUT_BCS_RELOC(batch,
2809                   intra_bo,
2810                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2811                   0);
2812
2813     OUT_BCS_BATCH(batch, 0);
2814
2815     /* DW 7..22 */
2816     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2817         OUT_BCS_BATCH(batch, 0);
2818     }
2819
2820     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
2821     OUT_BCS_BATCH(batch, 0);   /* ignore DW24 for decoding */
2822     ADVANCE_BCS_BATCH(batch);
2823
2824     dri_bo_unreference(intra_bo);
2825 }
2826
2827 static void
2828 gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
2829                                      struct gen7_mfd_context *gen7_mfd_context)
2830 {
2831     struct i965_driver_data *i965 = i965_driver_data(ctx);
2832     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2833     dri_bo *bsd_mpc_bo, *mpr_bo;
2834
2835     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2836                               "bsd mpc row store",
2837                               11520, /* 1.5 * 120 * 64 */
2838                               0x1000);
2839
2840     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2841                           "mpr row store",
2842                           7680, /* 1. 0 * 120 * 64 */
2843                           0x1000);
2844
2845     BEGIN_BCS_BATCH(batch, 10);
2846     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2847
2848     OUT_BCS_RELOC(batch,
2849                   bsd_mpc_bo,
2850                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2851                   0);
2852
2853     OUT_BCS_BATCH(batch, 0);
2854     OUT_BCS_BATCH(batch, 0);
2855
2856     OUT_BCS_RELOC(batch,
2857                   mpr_bo,
2858                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2859                   0);
2860     OUT_BCS_BATCH(batch, 0);
2861     OUT_BCS_BATCH(batch, 0);
2862
2863     OUT_BCS_BATCH(batch, 0);
2864     OUT_BCS_BATCH(batch, 0);
2865     OUT_BCS_BATCH(batch, 0);
2866
2867     ADVANCE_BCS_BATCH(batch);
2868
2869     dri_bo_unreference(bsd_mpc_bo);
2870     dri_bo_unreference(mpr_bo);
2871 }
2872
2873 static void
2874 gen75_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2875                                      struct gen7_mfd_context *gen7_mfd_context)
2876 {
2877     struct i965_driver_data *i965 = i965_driver_data(ctx);
2878     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2879     dri_bo *bsd_mpc_bo, *mpr_bo;
2880
2881     if (IS_STEPPING_BPLUS(i965)) {
2882         gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(ctx, gen7_mfd_context);
2883         return;
2884     }
2885         
2886     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2887                               "bsd mpc row store",
2888                               11520, /* 1.5 * 120 * 64 */
2889                               0x1000);
2890
2891     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2892                           "mpr row store",
2893                           7680, /* 1. 0 * 120 * 64 */
2894                           0x1000);
2895
2896     BEGIN_BCS_BATCH(batch, 4);
2897     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
2898
2899     OUT_BCS_RELOC(batch,
2900                   bsd_mpc_bo,
2901                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2902                   0);
2903
2904     OUT_BCS_RELOC(batch,
2905                   mpr_bo,
2906                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2907                   0);
2908     OUT_BCS_BATCH(batch, 0);
2909
2910     ADVANCE_BCS_BATCH(batch);
2911
2912     dri_bo_unreference(bsd_mpc_bo);
2913     dri_bo_unreference(mpr_bo);
2914 }
2915
2916 static void
2917 gen75_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2918                           struct gen7_mfd_context *gen7_mfd_context)
2919 {
2920
2921 }
2922
2923 static void
2924 gen75_jpeg_wa_avc_img_state(VADriverContextP ctx,
2925                            struct gen7_mfd_context *gen7_mfd_context)
2926 {
2927     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2928     int img_struct = 0;
2929     int mbaff_frame_flag = 0;
2930     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2931
2932     BEGIN_BCS_BATCH(batch, 16);
2933     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2934     OUT_BCS_BATCH(batch, 
2935                   width_in_mbs * height_in_mbs);
2936     OUT_BCS_BATCH(batch, 
2937                   ((height_in_mbs - 1) << 16) | 
2938                   ((width_in_mbs - 1) << 0));
2939     OUT_BCS_BATCH(batch, 
2940                   (0 << 24) |
2941                   (0 << 16) |
2942                   (0 << 14) |
2943                   (0 << 13) |
2944                   (0 << 12) | /* differ from GEN6 */
2945                   (0 << 10) |
2946                   (img_struct << 8));
2947     OUT_BCS_BATCH(batch,
2948                   (1 << 10) | /* 4:2:0 */
2949                   (1 << 7) |  /* CABAC */
2950                   (0 << 6) |
2951                   (0 << 5) |
2952                   (0 << 4) |
2953                   (0 << 3) |
2954                   (1 << 2) |
2955                   (mbaff_frame_flag << 1) |
2956                   (0 << 0));
2957     OUT_BCS_BATCH(batch, 0);
2958     OUT_BCS_BATCH(batch, 0);
2959     OUT_BCS_BATCH(batch, 0);
2960     OUT_BCS_BATCH(batch, 0);
2961     OUT_BCS_BATCH(batch, 0);
2962     OUT_BCS_BATCH(batch, 0);
2963     OUT_BCS_BATCH(batch, 0);
2964     OUT_BCS_BATCH(batch, 0);
2965     OUT_BCS_BATCH(batch, 0);
2966     OUT_BCS_BATCH(batch, 0);
2967     OUT_BCS_BATCH(batch, 0);
2968     ADVANCE_BCS_BATCH(batch);
2969 }
2970
2971 static void
2972 gen75_jpeg_wa_avc_directmode_state_bplus(VADriverContextP ctx,
2973                                   struct gen7_mfd_context *gen7_mfd_context)
2974 {
2975     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2976     int i;
2977
2978     BEGIN_BCS_BATCH(batch, 71);
2979     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2980
2981     /* reference surfaces 0..15 */
2982     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2983         OUT_BCS_BATCH(batch, 0); /* top */
2984         OUT_BCS_BATCH(batch, 0); /* bottom */
2985     }
2986         
2987         OUT_BCS_BATCH(batch, 0);
2988
2989     /* the current decoding frame/field */
2990     OUT_BCS_BATCH(batch, 0); /* top */
2991     OUT_BCS_BATCH(batch, 0);
2992     OUT_BCS_BATCH(batch, 0);
2993
2994     /* POC List */
2995     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2996         OUT_BCS_BATCH(batch, 0);
2997         OUT_BCS_BATCH(batch, 0);
2998     }
2999
3000     OUT_BCS_BATCH(batch, 0);
3001     OUT_BCS_BATCH(batch, 0);
3002
3003     ADVANCE_BCS_BATCH(batch);
3004 }
3005
3006 static void
3007 gen75_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
3008                                   struct gen7_mfd_context *gen7_mfd_context)
3009 {
3010     struct i965_driver_data *i965 = i965_driver_data(ctx);
3011     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3012     int i;
3013
3014     if (IS_STEPPING_BPLUS(i965)) {
3015         gen75_jpeg_wa_avc_directmode_state_bplus(ctx, gen7_mfd_context);
3016         return;
3017     }   
3018
3019     BEGIN_BCS_BATCH(batch, 69);
3020     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
3021
3022     /* reference surfaces 0..15 */
3023     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
3024         OUT_BCS_BATCH(batch, 0); /* top */
3025         OUT_BCS_BATCH(batch, 0); /* bottom */
3026     }
3027
3028     /* the current decoding frame/field */
3029     OUT_BCS_BATCH(batch, 0); /* top */
3030     OUT_BCS_BATCH(batch, 0); /* bottom */
3031
3032     /* POC List */
3033     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
3034         OUT_BCS_BATCH(batch, 0);
3035         OUT_BCS_BATCH(batch, 0);
3036     }
3037
3038     OUT_BCS_BATCH(batch, 0);
3039     OUT_BCS_BATCH(batch, 0);
3040
3041     ADVANCE_BCS_BATCH(batch);
3042 }
3043
3044 static void 
3045 gen75_jpeg_wa_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
3046                                      struct gen7_mfd_context *gen7_mfd_context)
3047 {
3048     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3049
3050     BEGIN_BCS_BATCH(batch, 26);
3051     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
3052     OUT_BCS_RELOC(batch,
3053                   gen7_mfd_context->jpeg_wa_slice_data_bo,
3054                   I915_GEM_DOMAIN_INSTRUCTION, 0,
3055                   0);
3056     OUT_BCS_BATCH(batch, 0);
3057     OUT_BCS_BATCH(batch, 0);
3058         
3059     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
3060     OUT_BCS_BATCH(batch, 0);
3061
3062         /* MFX indirect MV 6-10 */
3063     OUT_BCS_BATCH(batch, 0);
3064     OUT_BCS_BATCH(batch, 0);
3065     OUT_BCS_BATCH(batch, 0);
3066     OUT_BCS_BATCH(batch, 0);
3067     OUT_BCS_BATCH(batch, 0);
3068
3069         /* MFX IT_COFF 11-15 */
3070     OUT_BCS_BATCH(batch, 0);
3071     OUT_BCS_BATCH(batch, 0);
3072     OUT_BCS_BATCH(batch, 0);
3073     OUT_BCS_BATCH(batch, 0);
3074     OUT_BCS_BATCH(batch, 0);
3075
3076         /* MFX IT_DBLK 16-20 */
3077     OUT_BCS_BATCH(batch, 0);
3078     OUT_BCS_BATCH(batch, 0);
3079     OUT_BCS_BATCH(batch, 0);
3080     OUT_BCS_BATCH(batch, 0);
3081     OUT_BCS_BATCH(batch, 0);
3082
3083         /* MFX PAK_BSE object for encoder 21-25 */
3084     OUT_BCS_BATCH(batch, 0);
3085     OUT_BCS_BATCH(batch, 0);
3086     OUT_BCS_BATCH(batch, 0);
3087     OUT_BCS_BATCH(batch, 0);
3088     OUT_BCS_BATCH(batch, 0);
3089     ADVANCE_BCS_BATCH(batch);
3090 }
3091
3092 static void
3093 gen75_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
3094                                      struct gen7_mfd_context *gen7_mfd_context)
3095 {
3096     struct i965_driver_data *i965 = i965_driver_data(ctx);
3097     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3098
3099     if (IS_STEPPING_BPLUS(i965)) {
3100         gen75_jpeg_wa_ind_obj_base_addr_state_bplus(ctx, gen7_mfd_context);
3101         return;
3102     }   
3103
3104     BEGIN_BCS_BATCH(batch, 11);
3105     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
3106     OUT_BCS_RELOC(batch,
3107                   gen7_mfd_context->jpeg_wa_slice_data_bo,
3108                   I915_GEM_DOMAIN_INSTRUCTION, 0,
3109                   0);
3110     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
3111     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3112     OUT_BCS_BATCH(batch, 0);
3113     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3114     OUT_BCS_BATCH(batch, 0);
3115     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3116     OUT_BCS_BATCH(batch, 0);
3117     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3118     OUT_BCS_BATCH(batch, 0);
3119     ADVANCE_BCS_BATCH(batch);
3120 }
3121
3122 static void
3123 gen75_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
3124                             struct gen7_mfd_context *gen7_mfd_context)
3125 {
3126     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3127
3128     /* the input bitsteam format on GEN7 differs from GEN6 */
3129     BEGIN_BCS_BATCH(batch, 6);
3130     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
3131     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
3132     OUT_BCS_BATCH(batch, 0);
3133     OUT_BCS_BATCH(batch,
3134                   (0 << 31) |
3135                   (0 << 14) |
3136                   (0 << 12) |
3137                   (0 << 10) |
3138                   (0 << 8));
3139     OUT_BCS_BATCH(batch,
3140                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
3141                   (0 << 5)  |
3142                   (0 << 4)  |
3143                   (1 << 3) | /* LastSlice Flag */
3144                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
3145     OUT_BCS_BATCH(batch, 0);
3146     ADVANCE_BCS_BATCH(batch);
3147 }
3148
3149 static void
3150 gen75_jpeg_wa_avc_slice_state(VADriverContextP ctx,
3151                              struct gen7_mfd_context *gen7_mfd_context)
3152 {
3153     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3154     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
3155     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
3156     int first_mb_in_slice = 0;
3157     int slice_type = SLICE_TYPE_I;
3158
3159     BEGIN_BCS_BATCH(batch, 11);
3160     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
3161     OUT_BCS_BATCH(batch, slice_type);
3162     OUT_BCS_BATCH(batch, 
3163                   (num_ref_idx_l1 << 24) |
3164                   (num_ref_idx_l0 << 16) |
3165                   (0 << 8) |
3166                   (0 << 0));
3167     OUT_BCS_BATCH(batch, 
3168                   (0 << 29) |
3169                   (1 << 27) |   /* disable Deblocking */
3170                   (0 << 24) |
3171                   (gen7_jpeg_wa_clip.qp << 16) |
3172                   (0 << 8) |
3173                   (0 << 0));
3174     OUT_BCS_BATCH(batch, 
3175                   (slice_ver_pos << 24) |
3176                   (slice_hor_pos << 16) | 
3177                   (first_mb_in_slice << 0));
3178     OUT_BCS_BATCH(batch,
3179                   (next_slice_ver_pos << 16) |
3180                   (next_slice_hor_pos << 0));
3181     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
3182     OUT_BCS_BATCH(batch, 0);
3183     OUT_BCS_BATCH(batch, 0);
3184     OUT_BCS_BATCH(batch, 0);
3185     OUT_BCS_BATCH(batch, 0);
3186     ADVANCE_BCS_BATCH(batch);
3187 }
3188
3189 static void
3190 gen75_mfd_jpeg_wa(VADriverContextP ctx,
3191                  struct gen7_mfd_context *gen7_mfd_context)
3192 {
3193     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3194     gen75_jpeg_wa_init(ctx, gen7_mfd_context);
3195     intel_batchbuffer_emit_mi_flush(batch);
3196     gen75_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
3197     gen75_jpeg_wa_surface_state(ctx, gen7_mfd_context);
3198     gen75_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
3199     gen75_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
3200     gen75_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
3201     gen75_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
3202     gen75_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
3203
3204     gen75_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
3205     gen75_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
3206     gen75_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
3207 }
3208
3209 void
3210 gen75_mfd_jpeg_decode_picture(VADriverContextP ctx,
3211                              struct decode_state *decode_state,
3212                              struct gen7_mfd_context *gen7_mfd_context)
3213 {
3214     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3215     VAPictureParameterBufferJPEGBaseline *pic_param;
3216     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
3217     dri_bo *slice_data_bo;
3218     int i, j, max_selector = 0;
3219
3220     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3221     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
3222
3223     /* Currently only support Baseline DCT */
3224     gen75_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
3225     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3226     gen75_mfd_jpeg_wa(ctx, gen7_mfd_context);
3227     intel_batchbuffer_emit_mi_flush(batch);
3228     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3229     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3230     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3231     gen75_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
3232     gen75_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
3233
3234     for (j = 0; j < decode_state->num_slice_params; j++) {
3235         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3236         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3237         slice_data_bo = decode_state->slice_datas[j]->bo;
3238         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3239
3240         if (j == decode_state->num_slice_params - 1)
3241             next_slice_group_param = NULL;
3242         else
3243             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3244
3245         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3246             int component;
3247
3248             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3249
3250             if (i < decode_state->slice_params[j]->num_elements - 1)
3251                 next_slice_param = slice_param + 1;
3252             else
3253                 next_slice_param = next_slice_group_param;
3254
3255             for (component = 0; component < slice_param->num_components; component++) {
3256                 if (max_selector < slice_param->components[component].dc_table_selector)
3257                     max_selector = slice_param->components[component].dc_table_selector;
3258
3259                 if (max_selector < slice_param->components[component].ac_table_selector)
3260                     max_selector = slice_param->components[component].ac_table_selector;
3261             }
3262
3263             slice_param++;
3264         }
3265     }
3266
3267     assert(max_selector < 2);
3268     gen75_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
3269
3270     for (j = 0; j < decode_state->num_slice_params; j++) {
3271         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3272         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3273         slice_data_bo = decode_state->slice_datas[j]->bo;
3274         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3275
3276         if (j == decode_state->num_slice_params - 1)
3277             next_slice_group_param = NULL;
3278         else
3279             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3280
3281         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3282             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3283
3284             if (i < decode_state->slice_params[j]->num_elements - 1)
3285                 next_slice_param = slice_param + 1;
3286             else
3287                 next_slice_param = next_slice_group_param;
3288
3289             gen75_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
3290             slice_param++;
3291         }
3292     }
3293
3294     intel_batchbuffer_end_atomic(batch);
3295     intel_batchbuffer_flush(batch);
3296 }
3297
3298 static void 
3299 gen75_mfd_decode_picture(VADriverContextP ctx, 
3300                         VAProfile profile, 
3301                         union codec_state *codec_state,
3302                         struct hw_context *hw_context)
3303
3304 {
3305     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3306     struct decode_state *decode_state = &codec_state->decode;
3307
3308     assert(gen7_mfd_context);
3309
3310     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3311
3312     switch (profile) {
3313     case VAProfileMPEG2Simple:
3314     case VAProfileMPEG2Main:
3315         gen75_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3316         break;
3317         
3318     case VAProfileH264Baseline:
3319     case VAProfileH264Main:
3320     case VAProfileH264High:
3321         gen75_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3322         break;
3323
3324     case VAProfileVC1Simple:
3325     case VAProfileVC1Main:
3326     case VAProfileVC1Advanced:
3327         gen75_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3328         break;
3329
3330     case VAProfileJPEGBaseline:
3331         gen75_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3332         break;
3333
3334     default:
3335         assert(0);
3336         break;
3337     }
3338 }
3339
3340 static void
3341 gen75_mfd_context_destroy(void *hw_context)
3342 {
3343     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3344
3345     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3346     gen7_mfd_context->post_deblocking_output.bo = NULL;
3347
3348     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3349     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3350
3351     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3352     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3353
3354     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3355     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3356
3357     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3358     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3359
3360     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3361     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3362
3363     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3364     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3365
3366     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3367
3368     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3369     free(gen7_mfd_context);
3370 }
3371
3372 static void gen75_mfd_mpeg2_context_init(VADriverContextP ctx,
3373                                     struct gen7_mfd_context *gen7_mfd_context)
3374 {
3375     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3376     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3377     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3378     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3379 }
3380
3381 struct hw_context *
3382 gen75_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
3383 {
3384     struct intel_driver_data *intel = intel_driver_data(ctx);
3385     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3386     int i;
3387
3388     gen7_mfd_context->base.destroy = gen75_mfd_context_destroy;
3389     gen7_mfd_context->base.run = gen75_mfd_decode_picture;
3390     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3391
3392     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3393         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3394         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3395     }
3396
3397     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3398
3399     switch (profile) {
3400     case VAProfileMPEG2Simple:
3401     case VAProfileMPEG2Main:
3402         gen75_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3403         break;
3404
3405     case VAProfileH264Baseline:
3406     case VAProfileH264Main:
3407     case VAProfileH264High:
3408         gen75_mfd_avc_context_init(ctx, gen7_mfd_context);
3409         break;
3410     default:
3411         break;
3412     }
3413     return (struct hw_context *)gen7_mfd_context;
3414 }