Handle the MFX change between A stepping and B-stepping for haswell
[profile/ivi/vaapi-intel-driver.git] / src / gen75_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <va/va_dec_jpeg.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38
39 #include "i965_defines.h"
40 #include "i965_drv_video.h"
41 #include "i965_decoder_utils.h"
42
43 #include "gen7_mfd.h"
44
45 #define B0_STEP_REV             2
46 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
47
48 static const uint32_t zigzag_direct[64] = {
49     0,   1,  8, 16,  9,  2,  3, 10,
50     17, 24, 32, 25, 18, 11,  4,  5,
51     12, 19, 26, 33, 40, 48, 41, 34,
52     27, 20, 13,  6,  7, 14, 21, 28,
53     35, 42, 49, 56, 57, 50, 43, 36,
54     29, 22, 15, 23, 30, 37, 44, 51,
55     58, 59, 52, 45, 38, 31, 39, 46,
56     53, 60, 61, 54, 47, 55, 62, 63
57 };
58
59 static void
60 gen75_mfd_avc_frame_store_index(VADriverContextP ctx,
61                                VAPictureParameterBufferH264 *pic_param,
62                                struct gen7_mfd_context *gen7_mfd_context)
63 {
64     struct i965_driver_data *i965 = i965_driver_data(ctx);
65     int i, j;
66
67     assert(ARRAY_ELEMS(gen7_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
68
69     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
70         int found = 0;
71
72         if (gen7_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
73             continue;
74
75         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
76             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
77             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
78                 continue;
79
80             if (gen7_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
81                 found = 1;
82                 break;
83             }
84         }
85
86         if (!found) {
87             struct object_surface *obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
88             obj_surface->flags &= ~SURFACE_REFERENCED;
89
90             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
91                 dri_bo_unreference(obj_surface->bo);
92                 obj_surface->bo = NULL;
93                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
94             }
95
96             if (obj_surface->free_private_data)
97                 obj_surface->free_private_data(&obj_surface->private_data);
98
99             gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
100             gen7_mfd_context->reference_surface[i].frame_store_id = -1;
101         }
102     }
103
104     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
105         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
106         int found = 0;
107
108         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
109             continue;
110
111         for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
112             if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
113                 continue;
114             
115             if (gen7_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
116                 found = 1;
117                 break;
118             }
119         }
120
121         if (!found) {
122             int frame_idx;
123             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
124             
125             assert(obj_surface);
126             i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
127
128             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface); frame_idx++) {
129                 for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
130                     if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
131                         continue;
132
133                     if (gen7_mfd_context->reference_surface[j].frame_store_id == frame_idx)
134                         break;
135                 }
136
137                 if (j == ARRAY_ELEMS(gen7_mfd_context->reference_surface))
138                     break;
139             }
140
141             assert(frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface));
142
143             for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
144                 if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
145                     gen7_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
146                     gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
147                     break;
148                 }
149             }
150         }
151     }
152
153     /* sort */
154     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface) - 1; i++) {
155         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
156             gen7_mfd_context->reference_surface[i].frame_store_id == i)
157             continue;
158
159         for (j = i + 1; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
160             if (gen7_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
161                 gen7_mfd_context->reference_surface[j].frame_store_id == i) {
162                 VASurfaceID id = gen7_mfd_context->reference_surface[i].surface_id;
163                 int frame_idx = gen7_mfd_context->reference_surface[i].frame_store_id;
164
165                 gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[j].surface_id;
166                 gen7_mfd_context->reference_surface[i].frame_store_id = gen7_mfd_context->reference_surface[j].frame_store_id;
167                 gen7_mfd_context->reference_surface[j].surface_id = id;
168                 gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
169                 break;
170             }
171         }
172     }
173 }
174
175 static void 
176 gen75_mfd_free_avc_surface(void **data)
177 {
178     struct gen7_avc_surface *gen7_avc_surface = *data;
179
180     if (!gen7_avc_surface)
181         return;
182
183     dri_bo_unreference(gen7_avc_surface->dmv_top);
184     gen7_avc_surface->dmv_top = NULL;
185     dri_bo_unreference(gen7_avc_surface->dmv_bottom);
186     gen7_avc_surface->dmv_bottom = NULL;
187
188     free(gen7_avc_surface);
189     *data = NULL;
190 }
191
192 static void
193 gen75_mfd_init_avc_surface(VADriverContextP ctx, 
194                           VAPictureParameterBufferH264 *pic_param,
195                           struct object_surface *obj_surface)
196 {
197     struct i965_driver_data *i965 = i965_driver_data(ctx);
198     struct gen7_avc_surface *gen7_avc_surface = obj_surface->private_data;
199     int width_in_mbs, height_in_mbs;
200
201     obj_surface->free_private_data = gen75_mfd_free_avc_surface;
202     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
203     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
204
205     if (!gen7_avc_surface) {
206         gen7_avc_surface = calloc(sizeof(struct gen7_avc_surface), 1);
207         assert((obj_surface->size & 0x3f) == 0);
208         obj_surface->private_data = gen7_avc_surface;
209     }
210
211     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
212                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
213
214     if (gen7_avc_surface->dmv_top == NULL) {
215         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
216                                                  "direct mv w/r buffer",
217                                                  width_in_mbs * height_in_mbs * 128,
218                                                  0x1000);
219         assert(gen7_avc_surface->dmv_top);
220     }
221
222     if (gen7_avc_surface->dmv_bottom_flag &&
223         gen7_avc_surface->dmv_bottom == NULL) {
224         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
225                                                     "direct mv w/r buffer",
226                                                     width_in_mbs * height_in_mbs * 128,                                                    
227                                                     0x1000);
228         assert(gen7_avc_surface->dmv_bottom);
229     }
230 }
231
232 static void
233 gen75_mfd_pipe_mode_select(VADriverContextP ctx,
234                           struct decode_state *decode_state,
235                           int standard_select,
236                           struct gen7_mfd_context *gen7_mfd_context)
237 {
238     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
239
240     assert(standard_select == MFX_FORMAT_MPEG2 ||
241            standard_select == MFX_FORMAT_AVC ||
242            standard_select == MFX_FORMAT_VC1 ||
243            standard_select == MFX_FORMAT_JPEG);
244
245     BEGIN_BCS_BATCH(batch, 5);
246     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
247     OUT_BCS_BATCH(batch,
248                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
249                   (MFD_MODE_VLD << 15) | /* VLD mode */
250                   (0 << 10) | /* disable Stream-Out */
251                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
252                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
253                   (0 << 5)  | /* not in stitch mode */
254                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
255                   (standard_select << 0));
256     OUT_BCS_BATCH(batch,
257                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
258                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
259                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
260                   (0 << 1)  |
261                   (0 << 0));
262     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
263     OUT_BCS_BATCH(batch, 0); /* reserved */
264     ADVANCE_BCS_BATCH(batch);
265 }
266
267 static void
268 gen75_mfd_surface_state(VADriverContextP ctx,
269                        struct decode_state *decode_state,
270                        int standard_select,
271                        struct gen7_mfd_context *gen7_mfd_context)
272 {
273     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
274     struct i965_driver_data *i965 = i965_driver_data(ctx);
275     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
276     unsigned int y_cb_offset;
277     unsigned int y_cr_offset;
278
279     assert(obj_surface);
280
281     y_cb_offset = obj_surface->y_cb_offset;
282     y_cr_offset = obj_surface->y_cr_offset;
283
284     BEGIN_BCS_BATCH(batch, 6);
285     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
286     OUT_BCS_BATCH(batch, 0);
287     OUT_BCS_BATCH(batch,
288                   ((obj_surface->orig_height - 1) << 18) |
289                   ((obj_surface->orig_width - 1) << 4));
290     OUT_BCS_BATCH(batch,
291                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
292                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
293                   (0 << 22) | /* surface object control state, ignored */
294                   ((obj_surface->width - 1) << 3) | /* pitch */
295                   (0 << 2)  | /* must be 0 */
296                   (1 << 1)  | /* must be tiled */
297                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
298     OUT_BCS_BATCH(batch,
299                   (0 << 16) | /* X offset for U(Cb), must be 0 */
300                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
301     OUT_BCS_BATCH(batch,
302                   (0 << 16) | /* X offset for V(Cr), must be 0 */
303                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
304     ADVANCE_BCS_BATCH(batch);
305 }
306
307 static void
308 gen75_mfd_pipe_buf_addr_state_bplus(VADriverContextP ctx,
309                              struct decode_state *decode_state,
310                              int standard_select,
311                              struct gen7_mfd_context *gen7_mfd_context)
312 {
313     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
314     struct i965_driver_data *i965 = i965_driver_data(ctx);
315     int i;
316
317     BEGIN_BCS_BATCH(batch, 61);
318     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
319         /* Pre-deblock 1-3 */
320     if (gen7_mfd_context->pre_deblocking_output.valid)
321         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
322                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
323                       0);
324     else
325         OUT_BCS_BATCH(batch, 0);
326
327         OUT_BCS_BATCH(batch, 0);
328         OUT_BCS_BATCH(batch, 0);
329         /* Post-debloing 4-6 */
330     if (gen7_mfd_context->post_deblocking_output.valid)
331         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
332                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
333                       0);
334     else
335         OUT_BCS_BATCH(batch, 0);
336
337         OUT_BCS_BATCH(batch, 0);
338         OUT_BCS_BATCH(batch, 0);
339
340         /* uncompressed-video & stream out 7-12 */
341     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
342     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
343         OUT_BCS_BATCH(batch, 0);
344         OUT_BCS_BATCH(batch, 0);
345         OUT_BCS_BATCH(batch, 0);
346         OUT_BCS_BATCH(batch, 0);
347
348         /* intra row-store scratch 13-15 */
349     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
350         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
351                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
352                       0);
353     else
354         OUT_BCS_BATCH(batch, 0);
355
356         OUT_BCS_BATCH(batch, 0);
357         OUT_BCS_BATCH(batch, 0);
358         /* deblocking-filter-row-store 16-18 */
359     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
360         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
361                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
362                       0);
363     else
364         OUT_BCS_BATCH(batch, 0);
365         OUT_BCS_BATCH(batch, 0);
366         OUT_BCS_BATCH(batch, 0);
367
368     /* DW 19..50 */
369     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
370         struct object_surface *obj_surface;
371
372         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
373             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
374             assert(obj_surface && obj_surface->bo);
375
376             OUT_BCS_RELOC(batch, obj_surface->bo,
377                           I915_GEM_DOMAIN_INSTRUCTION, 0,
378                           0);
379         } else {
380             OUT_BCS_BATCH(batch, 0);
381         }
382             OUT_BCS_BATCH(batch, 0);
383     }
384         /* reference property 51 */
385     OUT_BCS_BATCH(batch, 0);  
386         
387         /* Macroblock status & ILDB 52-57 */
388         OUT_BCS_BATCH(batch, 0);
389         OUT_BCS_BATCH(batch, 0);
390         OUT_BCS_BATCH(batch, 0);
391         OUT_BCS_BATCH(batch, 0);
392         OUT_BCS_BATCH(batch, 0);
393         OUT_BCS_BATCH(batch, 0);
394
395         /* the second Macroblock status 58-60 */        
396         OUT_BCS_BATCH(batch, 0);
397         OUT_BCS_BATCH(batch, 0);
398         OUT_BCS_BATCH(batch, 0);
399     ADVANCE_BCS_BATCH(batch);
400 }
401
402 static void
403 gen75_mfd_pipe_buf_addr_state(VADriverContextP ctx,
404                              struct decode_state *decode_state,
405                              int standard_select,
406                              struct gen7_mfd_context *gen7_mfd_context)
407 {
408     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
409     struct i965_driver_data *i965 = i965_driver_data(ctx);
410     int i;
411
412         if (IS_STEPPING_BPLUS(i965)) {
413                 gen75_mfd_pipe_buf_addr_state_bplus(ctx, decode_state,
414                                 standard_select, gen7_mfd_context);
415                 return;
416         }
417     BEGIN_BCS_BATCH(batch, 25);
418     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
419     if (gen7_mfd_context->pre_deblocking_output.valid)
420         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
421                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
422                       0);
423     else
424         OUT_BCS_BATCH(batch, 0);
425
426     if (gen7_mfd_context->post_deblocking_output.valid)
427         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
428                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
429                       0);
430     else
431         OUT_BCS_BATCH(batch, 0);
432
433     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
434     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
435
436     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
437         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
438                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
439                       0);
440     else
441         OUT_BCS_BATCH(batch, 0);
442
443     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
444         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
445                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
446                       0);
447     else
448         OUT_BCS_BATCH(batch, 0);
449
450     /* DW 7..22 */
451     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
452         struct object_surface *obj_surface;
453
454         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
455             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
456             assert(obj_surface && obj_surface->bo);
457
458             OUT_BCS_RELOC(batch, obj_surface->bo,
459                           I915_GEM_DOMAIN_INSTRUCTION, 0,
460                           0);
461         } else {
462             OUT_BCS_BATCH(batch, 0);
463         }
464     }
465
466     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
467     OUT_BCS_BATCH(batch, 0);   /* ignore DW24 for decoding */
468     ADVANCE_BCS_BATCH(batch);
469 }
470
471 static void
472 gen75_mfd_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
473                                  dri_bo *slice_data_bo,
474                                  int standard_select,
475                                  struct gen7_mfd_context *gen7_mfd_context)
476 {
477     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
478
479     BEGIN_BCS_BATCH(batch, 26);
480     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
481         /* MFX In BS 1-5 */
482     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
483     OUT_BCS_BATCH(batch, 0);
484     OUT_BCS_BATCH(batch, 0);
485         /* Upper bound 4-5 */   
486     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
487     OUT_BCS_BATCH(batch, 0);
488
489         /* MFX indirect MV 6-10 */
490     OUT_BCS_BATCH(batch, 0);
491     OUT_BCS_BATCH(batch, 0);
492     OUT_BCS_BATCH(batch, 0);
493     OUT_BCS_BATCH(batch, 0);
494     OUT_BCS_BATCH(batch, 0);
495         
496         /* MFX IT_COFF 11-15 */
497     OUT_BCS_BATCH(batch, 0);
498     OUT_BCS_BATCH(batch, 0);
499     OUT_BCS_BATCH(batch, 0);
500     OUT_BCS_BATCH(batch, 0);
501     OUT_BCS_BATCH(batch, 0);
502
503         /* MFX IT_DBLK 16-20 */
504     OUT_BCS_BATCH(batch, 0);
505     OUT_BCS_BATCH(batch, 0);
506     OUT_BCS_BATCH(batch, 0);
507     OUT_BCS_BATCH(batch, 0);
508     OUT_BCS_BATCH(batch, 0);
509
510         /* MFX PAK_BSE object for encoder 21-25 */
511     OUT_BCS_BATCH(batch, 0);
512     OUT_BCS_BATCH(batch, 0);
513     OUT_BCS_BATCH(batch, 0);
514     OUT_BCS_BATCH(batch, 0);
515     OUT_BCS_BATCH(batch, 0);
516
517     ADVANCE_BCS_BATCH(batch);
518 }
519
520 static void
521 gen75_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
522                                  dri_bo *slice_data_bo,
523                                  int standard_select,
524                                  struct gen7_mfd_context *gen7_mfd_context)
525 {
526     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527
528     struct i965_driver_data *i965 = i965_driver_data(ctx);
529
530         if (IS_STEPPING_BPLUS(i965)) {
531                 gen75_mfd_ind_obj_base_addr_state_bplus(ctx, slice_data_bo,
532                                         standard_select, gen7_mfd_context);
533                 return;
534         }
535     BEGIN_BCS_BATCH(batch, 11);
536     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
537     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
538     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
539     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
540     OUT_BCS_BATCH(batch, 0);
541     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
542     OUT_BCS_BATCH(batch, 0);
543     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
544     OUT_BCS_BATCH(batch, 0);
545     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
546     OUT_BCS_BATCH(batch, 0);
547     ADVANCE_BCS_BATCH(batch);
548 }
549
550 static void
551 gen75_mfd_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
552                                  struct decode_state *decode_state,
553                                  int standard_select,
554                                  struct gen7_mfd_context *gen7_mfd_context)
555 {
556     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
557
558     BEGIN_BCS_BATCH(batch, 10);
559     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
560
561     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
562         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
563                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
564                       0);
565         else
566                 OUT_BCS_BATCH(batch, 0);
567                 
568     OUT_BCS_BATCH(batch, 0);
569     OUT_BCS_BATCH(batch, 0);
570         /* MPR Row Store Scratch buffer 4-6 */
571     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
572         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
573                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
574                       0);
575     else
576             OUT_BCS_BATCH(batch, 0);
577     OUT_BCS_BATCH(batch, 0);
578     OUT_BCS_BATCH(batch, 0);
579
580         /* Bitplane 7-9 */ 
581     if (gen7_mfd_context->bitplane_read_buffer.valid)
582         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
583                       I915_GEM_DOMAIN_INSTRUCTION, 0,
584                       0);
585     else
586         OUT_BCS_BATCH(batch, 0);
587     OUT_BCS_BATCH(batch, 0);
588     OUT_BCS_BATCH(batch, 0);
589     ADVANCE_BCS_BATCH(batch);
590 }
591
592 static void
593 gen75_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
594                                  struct decode_state *decode_state,
595                                  int standard_select,
596                                  struct gen7_mfd_context *gen7_mfd_context)
597 {
598     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
599     struct i965_driver_data *i965 = i965_driver_data(ctx);
600
601         if (IS_STEPPING_BPLUS(i965)) {
602                 gen75_mfd_bsp_buf_base_addr_state_bplus(ctx, decode_state,
603                                         standard_select, gen7_mfd_context);
604                 return;
605         }
606
607     BEGIN_BCS_BATCH(batch, 4);
608     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
609
610     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
611         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
612                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
613                       0);
614     else
615         OUT_BCS_BATCH(batch, 0);
616                 
617     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
618         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
619                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
620                       0);
621     else
622         OUT_BCS_BATCH(batch, 0);
623
624     if (gen7_mfd_context->bitplane_read_buffer.valid)
625         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
626                       I915_GEM_DOMAIN_INSTRUCTION, 0,
627                       0);
628     else
629         OUT_BCS_BATCH(batch, 0);
630
631     ADVANCE_BCS_BATCH(batch);
632 }
633
634 #if 0
635 static void
636 gen7_mfd_aes_state(VADriverContextP ctx,
637                    struct decode_state *decode_state,
638                    int standard_select)
639 {
640     /* FIXME */
641 }
642 #endif
643
644 static void
645 gen75_mfd_qm_state(VADriverContextP ctx,
646                   int qm_type,
647                   unsigned char *qm,
648                   int qm_length,
649                   struct gen7_mfd_context *gen7_mfd_context)
650 {
651     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
652     unsigned int qm_buffer[16];
653
654     assert(qm_length <= 16 * 4);
655     memcpy(qm_buffer, qm, qm_length);
656
657     BEGIN_BCS_BATCH(batch, 18);
658     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
659     OUT_BCS_BATCH(batch, qm_type << 0);
660     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
661     ADVANCE_BCS_BATCH(batch);
662 }
663
664 #if 0
665 static void
666 gen7_mfd_wait(VADriverContextP ctx,
667               struct decode_state *decode_state,
668               int standard_select,
669               struct gen7_mfd_context *gen7_mfd_context)
670 {
671     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
672
673     BEGIN_BCS_BATCH(batch, 1);
674     OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8));
675     ADVANCE_BCS_BATCH(batch);
676 }
677 #endif
678
679 static void
680 gen75_mfd_avc_img_state(VADriverContextP ctx,
681                        struct decode_state *decode_state,
682                        struct gen7_mfd_context *gen7_mfd_context)
683 {
684     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
685     int img_struct;
686     int mbaff_frame_flag;
687     unsigned int width_in_mbs, height_in_mbs;
688     VAPictureParameterBufferH264 *pic_param;
689
690     assert(decode_state->pic_param && decode_state->pic_param->buffer);
691     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
692     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
693
694     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
695         img_struct = 1;
696     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
697         img_struct = 3;
698     else
699         img_struct = 0;
700
701     if ((img_struct & 0x1) == 0x1) {
702         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
703     } else {
704         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
705     }
706
707     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
708         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
709         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
710     } else {
711         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
712     }
713
714     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
715                         !pic_param->pic_fields.bits.field_pic_flag);
716
717     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
718     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
719
720     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
721     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
722            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
723     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
724
725     BEGIN_BCS_BATCH(batch, 17);
726     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
727     OUT_BCS_BATCH(batch, 
728                   width_in_mbs * height_in_mbs);
729     OUT_BCS_BATCH(batch, 
730                   ((height_in_mbs - 1) << 16) | 
731                   ((width_in_mbs - 1) << 0));
732     OUT_BCS_BATCH(batch, 
733                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
734                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
735                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
736                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
737                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
738                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
739                   (img_struct << 8));
740     OUT_BCS_BATCH(batch,
741                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
742                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
743                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
744                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
745                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
746                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
747                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
748                   (mbaff_frame_flag << 1) |
749                   (pic_param->pic_fields.bits.field_pic_flag << 0));
750     OUT_BCS_BATCH(batch, 0);
751     OUT_BCS_BATCH(batch, 0);
752     OUT_BCS_BATCH(batch, 0);
753     OUT_BCS_BATCH(batch, 0);
754     OUT_BCS_BATCH(batch, 0);
755     OUT_BCS_BATCH(batch, 0);
756     OUT_BCS_BATCH(batch, 0);
757     OUT_BCS_BATCH(batch, 0);
758     OUT_BCS_BATCH(batch, 0);
759     OUT_BCS_BATCH(batch, 0);
760     OUT_BCS_BATCH(batch, 0);
761     OUT_BCS_BATCH(batch, 0);
762     ADVANCE_BCS_BATCH(batch);
763 }
764
765 static void
766 gen75_mfd_avc_qm_state(VADriverContextP ctx,
767                       struct decode_state *decode_state,
768                       struct gen7_mfd_context *gen7_mfd_context)
769 {
770     VAIQMatrixBufferH264 *iq_matrix;
771     VAPictureParameterBufferH264 *pic_param;
772
773     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
774         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
775     else
776         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
777
778     assert(decode_state->pic_param && decode_state->pic_param->buffer);
779     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
780
781     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
782     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
783
784     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
785         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
786         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
787     }
788 }
789
790 static void
791 gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx,
792                               VAPictureParameterBufferH264 *pic_param,
793                               VASliceParameterBufferH264 *slice_param,
794                               struct gen7_mfd_context *gen7_mfd_context)
795 {
796     struct i965_driver_data *i965 = i965_driver_data(ctx);
797     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
798     struct object_surface *obj_surface;
799     struct gen7_avc_surface *gen7_avc_surface;
800     VAPictureH264 *va_pic;
801     int i, j;
802
803     BEGIN_BCS_BATCH(batch, 71);
804     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
805
806     /* reference surfaces 0..15 */
807     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
808         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
809             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
810             assert(obj_surface);
811             gen7_avc_surface = obj_surface->private_data;
812
813             if (gen7_avc_surface == NULL) {
814                 OUT_BCS_BATCH(batch, 0);
815                 OUT_BCS_BATCH(batch, 0);
816             } else {
817                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
818                               I915_GEM_DOMAIN_INSTRUCTION, 0,
819                               0);
820                 OUT_BCS_BATCH(batch, 0);
821             }
822         } else {
823             OUT_BCS_BATCH(batch, 0);
824             OUT_BCS_BATCH(batch, 0);
825         }
826     }
827         OUT_BCS_BATCH(batch, 0);
828
829     /* the current decoding frame/field */
830     va_pic = &pic_param->CurrPic;
831     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
832     obj_surface = SURFACE(va_pic->picture_id);
833     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
834     gen7_avc_surface = obj_surface->private_data;
835
836     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
837                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
838                   0);
839
840         OUT_BCS_BATCH(batch, 0);
841         OUT_BCS_BATCH(batch, 0);
842
843     /* POC List */
844     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
845         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
846             int found = 0;
847             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
848                 va_pic = &pic_param->ReferenceFrames[j];
849                 
850                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
851                     continue;
852
853                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
854                     found = 1;
855                     break;
856                 }
857             }
858
859             assert(found == 1);
860             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
861             
862             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
863             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
864         } else {
865             OUT_BCS_BATCH(batch, 0);
866             OUT_BCS_BATCH(batch, 0);
867         }
868     }
869
870     va_pic = &pic_param->CurrPic;
871     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
872     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
873
874     ADVANCE_BCS_BATCH(batch);
875 }
876
877 static void
878 gen75_mfd_avc_directmode_state(VADriverContextP ctx,
879                               VAPictureParameterBufferH264 *pic_param,
880                               VASliceParameterBufferH264 *slice_param,
881                               struct gen7_mfd_context *gen7_mfd_context)
882 {
883     struct i965_driver_data *i965 = i965_driver_data(ctx);
884     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
885     struct object_surface *obj_surface;
886     struct gen7_avc_surface *gen7_avc_surface;
887     VAPictureH264 *va_pic;
888     int i, j;
889
890         if (IS_STEPPING_BPLUS(i965)) {
891                 gen75_mfd_avc_directmode_state_bplus(ctx, pic_param, slice_param,
892                         gen7_mfd_context);
893
894                 return;
895         }
896
897     BEGIN_BCS_BATCH(batch, 69);
898     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
899
900     /* reference surfaces 0..15 */
901     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
902         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
903             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
904             assert(obj_surface);
905             gen7_avc_surface = obj_surface->private_data;
906
907             if (gen7_avc_surface == NULL) {
908                 OUT_BCS_BATCH(batch, 0);
909                 OUT_BCS_BATCH(batch, 0);
910             } else {
911                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
912                               I915_GEM_DOMAIN_INSTRUCTION, 0,
913                               0);
914
915                 if (gen7_avc_surface->dmv_bottom_flag == 1)
916                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
917                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
918                                   0);
919                 else
920                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
921                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
922                                   0);
923             }
924         } else {
925             OUT_BCS_BATCH(batch, 0);
926             OUT_BCS_BATCH(batch, 0);
927         }
928     }
929
930     /* the current decoding frame/field */
931     va_pic = &pic_param->CurrPic;
932     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
933     obj_surface = SURFACE(va_pic->picture_id);
934     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
935     gen7_avc_surface = obj_surface->private_data;
936
937     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
938                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
939                   0);
940
941     if (gen7_avc_surface->dmv_bottom_flag == 1)
942         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
943                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
944                       0);
945     else
946         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
947                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
948                       0);
949
950     /* POC List */
951     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
952         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
953             int found = 0;
954             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
955                 va_pic = &pic_param->ReferenceFrames[j];
956                 
957                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
958                     continue;
959
960                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
961                     found = 1;
962                     break;
963                 }
964             }
965
966             assert(found == 1);
967             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
968             
969             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
970             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
971         } else {
972             OUT_BCS_BATCH(batch, 0);
973             OUT_BCS_BATCH(batch, 0);
974         }
975     }
976
977     va_pic = &pic_param->CurrPic;
978     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
979     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
980
981     ADVANCE_BCS_BATCH(batch);
982 }
983
984 static void
985 gen75_mfd_avc_slice_state(VADriverContextP ctx,
986                          VAPictureParameterBufferH264 *pic_param,
987                          VASliceParameterBufferH264 *slice_param,
988                          VASliceParameterBufferH264 *next_slice_param,
989                          struct gen7_mfd_context *gen7_mfd_context)
990 {
991     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
992     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
993     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
994     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
995     int num_ref_idx_l0, num_ref_idx_l1;
996     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
997                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
998     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
999     int slice_type;
1000
1001     if (slice_param->slice_type == SLICE_TYPE_I ||
1002         slice_param->slice_type == SLICE_TYPE_SI) {
1003         slice_type = SLICE_TYPE_I;
1004     } else if (slice_param->slice_type == SLICE_TYPE_P ||
1005                slice_param->slice_type == SLICE_TYPE_SP) {
1006         slice_type = SLICE_TYPE_P;
1007     } else { 
1008         assert(slice_param->slice_type == SLICE_TYPE_B);
1009         slice_type = SLICE_TYPE_B;
1010     }
1011
1012     if (slice_type == SLICE_TYPE_I) {
1013         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
1014         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
1015         num_ref_idx_l0 = 0;
1016         num_ref_idx_l1 = 0;
1017     } else if (slice_type == SLICE_TYPE_P) {
1018         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
1019         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
1020         num_ref_idx_l1 = 0;
1021     } else {
1022         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
1023         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
1024     }
1025
1026     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
1027     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
1028     slice_ver_pos = first_mb_in_slice / width_in_mbs;
1029
1030     if (next_slice_param) {
1031         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
1032         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
1033         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
1034     } else {
1035         next_slice_hor_pos = 0;
1036         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
1037     }
1038
1039     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
1040     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
1041     OUT_BCS_BATCH(batch, slice_type);
1042     OUT_BCS_BATCH(batch, 
1043                   (num_ref_idx_l1 << 24) |
1044                   (num_ref_idx_l0 << 16) |
1045                   (slice_param->chroma_log2_weight_denom << 8) |
1046                   (slice_param->luma_log2_weight_denom << 0));
1047     OUT_BCS_BATCH(batch, 
1048                   (slice_param->direct_spatial_mv_pred_flag << 29) |
1049                   (slice_param->disable_deblocking_filter_idc << 27) |
1050                   (slice_param->cabac_init_idc << 24) |
1051                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
1052                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
1053                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
1054     OUT_BCS_BATCH(batch, 
1055                   (slice_ver_pos << 24) |
1056                   (slice_hor_pos << 16) | 
1057                   (first_mb_in_slice << 0));
1058     OUT_BCS_BATCH(batch,
1059                   (next_slice_ver_pos << 16) |
1060                   (next_slice_hor_pos << 0));
1061     OUT_BCS_BATCH(batch, 
1062                   (next_slice_param == NULL) << 19); /* last slice flag */
1063     OUT_BCS_BATCH(batch, 0);
1064     OUT_BCS_BATCH(batch, 0);
1065     OUT_BCS_BATCH(batch, 0);
1066     OUT_BCS_BATCH(batch, 0);
1067     ADVANCE_BCS_BATCH(batch);
1068 }
1069
1070 static inline void
1071 gen75_mfd_avc_ref_idx_state(VADriverContextP ctx,
1072                            VAPictureParameterBufferH264 *pic_param,
1073                            VASliceParameterBufferH264 *slice_param,
1074                            struct gen7_mfd_context *gen7_mfd_context)
1075 {
1076     gen6_send_avc_ref_idx_state(
1077         gen7_mfd_context->base.batch,
1078         slice_param,
1079         gen7_mfd_context->reference_surface
1080     );
1081 }
1082
1083 static void
1084 gen75_mfd_avc_weightoffset_state(VADriverContextP ctx,
1085                                 VAPictureParameterBufferH264 *pic_param,
1086                                 VASliceParameterBufferH264 *slice_param,
1087                                 struct gen7_mfd_context *gen7_mfd_context)
1088 {
1089     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1090     int i, j, num_weight_offset_table = 0;
1091     short weightoffsets[32 * 6];
1092
1093     if ((slice_param->slice_type == SLICE_TYPE_P ||
1094          slice_param->slice_type == SLICE_TYPE_SP) &&
1095         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
1096         num_weight_offset_table = 1;
1097     }
1098     
1099     if ((slice_param->slice_type == SLICE_TYPE_B) &&
1100         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
1101         num_weight_offset_table = 2;
1102     }
1103
1104     for (i = 0; i < num_weight_offset_table; i++) {
1105         BEGIN_BCS_BATCH(batch, 98);
1106         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
1107         OUT_BCS_BATCH(batch, i);
1108
1109         if (i == 0) {
1110             for (j = 0; j < 32; j++) {
1111                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
1112                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
1113                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
1114                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
1115                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
1116                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
1117             }
1118         } else {
1119             for (j = 0; j < 32; j++) {
1120                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
1121                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
1122                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
1123                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
1124                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
1125                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
1126             }
1127         }
1128
1129         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
1130         ADVANCE_BCS_BATCH(batch);
1131     }
1132 }
1133
1134 static int
1135 gen75_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
1136 {
1137     int out_slice_data_bit_offset;
1138     int slice_header_size = in_slice_data_bit_offset / 8;
1139     int i, j;
1140
1141     for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1142         if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
1143             i++, j += 2;
1144         }
1145     }
1146
1147     out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1148
1149     if (mode_flag == ENTROPY_CABAC)
1150         out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
1151
1152     return out_slice_data_bit_offset;
1153 }
1154
1155 static void
1156 gen75_mfd_avc_bsd_object(VADriverContextP ctx,
1157                         VAPictureParameterBufferH264 *pic_param,
1158                         VASliceParameterBufferH264 *slice_param,
1159                         dri_bo *slice_data_bo,
1160                         VASliceParameterBufferH264 *next_slice_param,
1161                         struct gen7_mfd_context *gen7_mfd_context)
1162 {
1163     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1164     int slice_data_bit_offset;
1165     uint8_t *slice_data = NULL;
1166
1167     dri_bo_map(slice_data_bo, 0);
1168     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1169     slice_data_bit_offset = gen75_mfd_avc_get_slice_bit_offset(slice_data,
1170                                                               pic_param->pic_fields.bits.entropy_coding_mode_flag,
1171                                                               slice_param->slice_data_bit_offset);
1172     dri_bo_unmap(slice_data_bo);
1173
1174     /* the input bitsteam format on GEN7 differs from GEN6 */
1175     BEGIN_BCS_BATCH(batch, 6);
1176     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
1177     OUT_BCS_BATCH(batch, 
1178                   (slice_param->slice_data_size));
1179     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
1180     OUT_BCS_BATCH(batch,
1181                   (0 << 31) |
1182                   (0 << 14) |
1183                   (0 << 12) |
1184                   (0 << 10) |
1185                   (0 << 8));
1186     OUT_BCS_BATCH(batch,
1187                   ((slice_data_bit_offset >> 3) << 16) |
1188                   (0 << 5)  |
1189                   (0 << 4)  |
1190                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
1191                   (slice_data_bit_offset & 0x7));
1192     OUT_BCS_BATCH(batch, 0);
1193     ADVANCE_BCS_BATCH(batch);
1194 }
1195
1196 static inline void
1197 gen75_mfd_avc_context_init(
1198     VADriverContextP         ctx,
1199     struct gen7_mfd_context *gen7_mfd_context
1200 )
1201 {
1202     /* Initialize flat scaling lists */
1203     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
1204 }
1205
1206 static void
1207 gen75_mfd_avc_decode_init(VADriverContextP ctx,
1208                          struct decode_state *decode_state,
1209                          struct gen7_mfd_context *gen7_mfd_context)
1210 {
1211     VAPictureParameterBufferH264 *pic_param;
1212     VASliceParameterBufferH264 *slice_param;
1213     VAPictureH264 *va_pic;
1214     struct i965_driver_data *i965 = i965_driver_data(ctx);
1215     struct object_surface *obj_surface;
1216     dri_bo *bo;
1217     int i, j, enable_avc_ildb = 0;
1218     unsigned int width_in_mbs, height_in_mbs;
1219
1220     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
1221         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1222         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1223
1224         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1225             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1226             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1227                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1228                    (slice_param->slice_type == SLICE_TYPE_P) ||
1229                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1230                    (slice_param->slice_type == SLICE_TYPE_B));
1231
1232             if (slice_param->disable_deblocking_filter_idc != 1) {
1233                 enable_avc_ildb = 1;
1234                 break;
1235             }
1236
1237             slice_param++;
1238         }
1239     }
1240
1241     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1242     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1243     gen75_mfd_avc_frame_store_index(ctx, pic_param, gen7_mfd_context);
1244     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
1245     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
1246     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
1247     assert(height_in_mbs > 0 && height_in_mbs <= 256);
1248
1249     /* Current decoded picture */
1250     va_pic = &pic_param->CurrPic;
1251     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
1252     obj_surface = SURFACE(va_pic->picture_id);
1253     assert(obj_surface);
1254     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
1255     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
1256     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1257     gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface);
1258
1259     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1260     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1261     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1262     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
1263
1264     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1265     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1266     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1267     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
1268
1269     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1270     bo = dri_bo_alloc(i965->intel.bufmgr,
1271                       "intra row store",
1272                       width_in_mbs * 64,
1273                       0x1000);
1274     assert(bo);
1275     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1276     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1277
1278     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1279     bo = dri_bo_alloc(i965->intel.bufmgr,
1280                       "deblocking filter row store",
1281                       width_in_mbs * 64 * 4,
1282                       0x1000);
1283     assert(bo);
1284     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1285     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1286
1287     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1288     bo = dri_bo_alloc(i965->intel.bufmgr,
1289                       "bsd mpc row store",
1290                       width_in_mbs * 64 * 2,
1291                       0x1000);
1292     assert(bo);
1293     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1294     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1295
1296     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
1297     bo = dri_bo_alloc(i965->intel.bufmgr,
1298                       "mpr row store",
1299                       width_in_mbs * 64 * 2,
1300                       0x1000);
1301     assert(bo);
1302     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
1303     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
1304
1305     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1306 }
1307
1308 static void
1309 gen75_mfd_avc_decode_picture(VADriverContextP ctx,
1310                             struct decode_state *decode_state,
1311                             struct gen7_mfd_context *gen7_mfd_context)
1312 {
1313     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1314     VAPictureParameterBufferH264 *pic_param;
1315     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
1316     dri_bo *slice_data_bo;
1317     int i, j;
1318
1319     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1320     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1321     gen75_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
1322
1323     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1324     intel_batchbuffer_emit_mi_flush(batch);
1325     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1326     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1327     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1328     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1329     gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
1330     gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
1331
1332     for (j = 0; j < decode_state->num_slice_params; j++) {
1333         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1334         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1335         slice_data_bo = decode_state->slice_datas[j]->bo;
1336         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
1337
1338         if (j == decode_state->num_slice_params - 1)
1339             next_slice_group_param = NULL;
1340         else
1341             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1342
1343         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1344             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1345             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1346                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1347                    (slice_param->slice_type == SLICE_TYPE_P) ||
1348                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1349                    (slice_param->slice_type == SLICE_TYPE_B));
1350
1351             if (i < decode_state->slice_params[j]->num_elements - 1)
1352                 next_slice_param = slice_param + 1;
1353             else
1354                 next_slice_param = next_slice_group_param;
1355
1356             gen75_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen7_mfd_context);
1357             gen75_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
1358             gen75_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
1359             gen75_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1360             gen75_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
1361             slice_param++;
1362         }
1363     }
1364
1365     intel_batchbuffer_end_atomic(batch);
1366     intel_batchbuffer_flush(batch);
1367 }
1368
1369 static void
1370 gen75_mfd_mpeg2_decode_init(VADriverContextP ctx,
1371                            struct decode_state *decode_state,
1372                            struct gen7_mfd_context *gen7_mfd_context)
1373 {
1374     VAPictureParameterBufferMPEG2 *pic_param;
1375     struct i965_driver_data *i965 = i965_driver_data(ctx);
1376     struct object_surface *obj_surface;
1377     dri_bo *bo;
1378     unsigned int width_in_mbs;
1379
1380     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1381     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1382     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1383
1384     mpeg2_set_reference_surfaces(
1385         ctx,
1386         gen7_mfd_context->reference_surface,
1387         decode_state,
1388         pic_param
1389     );
1390
1391     /* Current decoded picture */
1392     obj_surface = SURFACE(decode_state->current_render_target);
1393     assert(obj_surface);
1394     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1395
1396     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1397     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1398     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1399     gen7_mfd_context->pre_deblocking_output.valid = 1;
1400
1401     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1402     bo = dri_bo_alloc(i965->intel.bufmgr,
1403                       "bsd mpc row store",
1404                       width_in_mbs * 96,
1405                       0x1000);
1406     assert(bo);
1407     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1408     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1409
1410     gen7_mfd_context->post_deblocking_output.valid = 0;
1411     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1412     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1413     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1414     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1415 }
1416
1417 static void
1418 gen75_mfd_mpeg2_pic_state(VADriverContextP ctx,
1419                          struct decode_state *decode_state,
1420                          struct gen7_mfd_context *gen7_mfd_context)
1421 {
1422     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1423     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1424     VAPictureParameterBufferMPEG2 *pic_param;
1425     unsigned int slice_concealment_disable_bit = 0;
1426
1427     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1428     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1429
1430     slice_concealment_disable_bit = 1;
1431
1432     BEGIN_BCS_BATCH(batch, 13);
1433     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1434     OUT_BCS_BATCH(batch,
1435                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1436                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1437                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1438                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1439                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1440                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1441                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1442                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1443                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1444                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1445                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1446                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1447     OUT_BCS_BATCH(batch,
1448                   pic_param->picture_coding_type << 9);
1449     OUT_BCS_BATCH(batch,
1450                   (slice_concealment_disable_bit << 31) |
1451                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1452                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1453     OUT_BCS_BATCH(batch, 0);
1454     OUT_BCS_BATCH(batch, 0);
1455     OUT_BCS_BATCH(batch, 0);
1456     OUT_BCS_BATCH(batch, 0);
1457     OUT_BCS_BATCH(batch, 0);
1458     OUT_BCS_BATCH(batch, 0);
1459     OUT_BCS_BATCH(batch, 0);
1460     OUT_BCS_BATCH(batch, 0);
1461     OUT_BCS_BATCH(batch, 0);
1462     ADVANCE_BCS_BATCH(batch);
1463 }
1464
1465 static void
1466 gen75_mfd_mpeg2_qm_state(VADriverContextP ctx,
1467                         struct decode_state *decode_state,
1468                         struct gen7_mfd_context *gen7_mfd_context)
1469 {
1470     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1471     int i, j;
1472
1473     /* Update internal QM state */
1474     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1475         VAIQMatrixBufferMPEG2 * const iq_matrix =
1476             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1477
1478         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1479             iq_matrix->load_intra_quantiser_matrix) {
1480             gen_iq_matrix->load_intra_quantiser_matrix =
1481                 iq_matrix->load_intra_quantiser_matrix;
1482             if (iq_matrix->load_intra_quantiser_matrix) {
1483                 for (j = 0; j < 64; j++)
1484                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1485                         iq_matrix->intra_quantiser_matrix[j];
1486             }
1487         }
1488
1489         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1490             iq_matrix->load_non_intra_quantiser_matrix) {
1491             gen_iq_matrix->load_non_intra_quantiser_matrix =
1492                 iq_matrix->load_non_intra_quantiser_matrix;
1493             if (iq_matrix->load_non_intra_quantiser_matrix) {
1494                 for (j = 0; j < 64; j++)
1495                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1496                         iq_matrix->non_intra_quantiser_matrix[j];
1497             }
1498         }
1499     }
1500
1501     /* Commit QM state to HW */
1502     for (i = 0; i < 2; i++) {
1503         unsigned char *qm = NULL;
1504         int qm_type;
1505
1506         if (i == 0) {
1507             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1508                 qm = gen_iq_matrix->intra_quantiser_matrix;
1509                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1510             }
1511         } else {
1512             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1513                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1514                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1515             }
1516         }
1517
1518         if (!qm)
1519             continue;
1520
1521         gen75_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1522     }
1523 }
1524
1525 static void
1526 gen75_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1527                           VAPictureParameterBufferMPEG2 *pic_param,
1528                           VASliceParameterBufferMPEG2 *slice_param,
1529                           VASliceParameterBufferMPEG2 *next_slice_param,
1530                           struct gen7_mfd_context *gen7_mfd_context)
1531 {
1532     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1533     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1534     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1535     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1536
1537     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1538         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1539         is_field_pic = 1;
1540     is_field_pic_wa = is_field_pic &&
1541         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1542
1543     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1544     hpos0 = slice_param->slice_horizontal_position;
1545
1546     if (next_slice_param == NULL) {
1547         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1548         hpos1 = 0;
1549     } else {
1550         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1551         hpos1 = next_slice_param->slice_horizontal_position;
1552     }
1553
1554     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1555
1556     BEGIN_BCS_BATCH(batch, 5);
1557     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1558     OUT_BCS_BATCH(batch, 
1559                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1560     OUT_BCS_BATCH(batch, 
1561                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1562     OUT_BCS_BATCH(batch,
1563                   hpos0 << 24 |
1564                   vpos0 << 16 |
1565                   mb_count << 8 |
1566                   (next_slice_param == NULL) << 5 |
1567                   (next_slice_param == NULL) << 3 |
1568                   (slice_param->macroblock_offset & 0x7));
1569     OUT_BCS_BATCH(batch,
1570                   (slice_param->quantiser_scale_code << 24) |
1571                   (vpos1 << 8 | hpos1));
1572     ADVANCE_BCS_BATCH(batch);
1573 }
1574
1575 static void
1576 gen75_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1577                               struct decode_state *decode_state,
1578                               struct gen7_mfd_context *gen7_mfd_context)
1579 {
1580     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1581     VAPictureParameterBufferMPEG2 *pic_param;
1582     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1583     dri_bo *slice_data_bo;
1584     int i, j;
1585
1586     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1587     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1588
1589     gen75_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1590     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1591     intel_batchbuffer_emit_mi_flush(batch);
1592     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1593     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1594     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1595     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1596     gen75_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1597     gen75_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1598
1599     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1600         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1601             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1602
1603     for (j = 0; j < decode_state->num_slice_params; j++) {
1604         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1605         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1606         slice_data_bo = decode_state->slice_datas[j]->bo;
1607         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1608
1609         if (j == decode_state->num_slice_params - 1)
1610             next_slice_group_param = NULL;
1611         else
1612             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1613
1614         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1615             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1616
1617             if (i < decode_state->slice_params[j]->num_elements - 1)
1618                 next_slice_param = slice_param + 1;
1619             else
1620                 next_slice_param = next_slice_group_param;
1621
1622             gen75_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1623             slice_param++;
1624         }
1625     }
1626
1627     intel_batchbuffer_end_atomic(batch);
1628     intel_batchbuffer_flush(batch);
1629 }
1630
1631 static const int va_to_gen7_vc1_pic_type[5] = {
1632     GEN7_VC1_I_PICTURE,
1633     GEN7_VC1_P_PICTURE,
1634     GEN7_VC1_B_PICTURE,
1635     GEN7_VC1_BI_PICTURE,
1636     GEN7_VC1_P_PICTURE,
1637 };
1638
1639 static const int va_to_gen7_vc1_mv[4] = {
1640     1, /* 1-MV */
1641     2, /* 1-MV half-pel */
1642     3, /* 1-MV half-pef bilinear */
1643     0, /* Mixed MV */
1644 };
1645
1646 static const int b_picture_scale_factor[21] = {
1647     128, 85,  170, 64,  192,
1648     51,  102, 153, 204, 43,
1649     215, 37,  74,  111, 148,
1650     185, 222, 32,  96,  160, 
1651     224,
1652 };
1653
1654 static const int va_to_gen7_vc1_condover[3] = {
1655     0,
1656     2,
1657     3
1658 };
1659
1660 static const int va_to_gen7_vc1_profile[4] = {
1661     GEN7_VC1_SIMPLE_PROFILE,
1662     GEN7_VC1_MAIN_PROFILE,
1663     GEN7_VC1_RESERVED_PROFILE,
1664     GEN7_VC1_ADVANCED_PROFILE
1665 };
1666
1667 static void 
1668 gen75_mfd_free_vc1_surface(void **data)
1669 {
1670     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1671
1672     if (!gen7_vc1_surface)
1673         return;
1674
1675     dri_bo_unreference(gen7_vc1_surface->dmv);
1676     free(gen7_vc1_surface);
1677     *data = NULL;
1678 }
1679
1680 static void
1681 gen75_mfd_init_vc1_surface(VADriverContextP ctx, 
1682                           VAPictureParameterBufferVC1 *pic_param,
1683                           struct object_surface *obj_surface)
1684 {
1685     struct i965_driver_data *i965 = i965_driver_data(ctx);
1686     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1687     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1688     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1689
1690     obj_surface->free_private_data = gen75_mfd_free_vc1_surface;
1691
1692     if (!gen7_vc1_surface) {
1693         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1694         assert((obj_surface->size & 0x3f) == 0);
1695         obj_surface->private_data = gen7_vc1_surface;
1696     }
1697
1698     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1699
1700     if (gen7_vc1_surface->dmv == NULL) {
1701         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1702                                              "direct mv w/r buffer",
1703                                              width_in_mbs * height_in_mbs * 64,
1704                                              0x1000);
1705     }
1706 }
1707
1708 static void
1709 gen75_mfd_vc1_decode_init(VADriverContextP ctx,
1710                          struct decode_state *decode_state,
1711                          struct gen7_mfd_context *gen7_mfd_context)
1712 {
1713     VAPictureParameterBufferVC1 *pic_param;
1714     struct i965_driver_data *i965 = i965_driver_data(ctx);
1715     struct object_surface *obj_surface;
1716     int i;
1717     dri_bo *bo;
1718     int width_in_mbs;
1719
1720     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1721     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1722     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1723
1724     /* reference picture */
1725     obj_surface = SURFACE(pic_param->forward_reference_picture);
1726
1727     if (obj_surface && obj_surface->bo)
1728         gen7_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1729     else
1730         gen7_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1731
1732     obj_surface = SURFACE(pic_param->backward_reference_picture);
1733
1734     if (obj_surface && obj_surface->bo)
1735         gen7_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1736     else
1737         gen7_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1738
1739     /* must do so !!! */
1740     for (i = 2; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++)
1741         gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[i % 2].surface_id;
1742
1743     /* Current decoded picture */
1744     obj_surface = SURFACE(decode_state->current_render_target);
1745     assert(obj_surface);
1746     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1747     gen75_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1748
1749     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1750     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1751     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1752     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1753
1754     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1755     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1756     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1757     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1758
1759     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1760     bo = dri_bo_alloc(i965->intel.bufmgr,
1761                       "intra row store",
1762                       width_in_mbs * 64,
1763                       0x1000);
1764     assert(bo);
1765     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1766     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1767
1768     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1769     bo = dri_bo_alloc(i965->intel.bufmgr,
1770                       "deblocking filter row store",
1771                       width_in_mbs * 6 * 64,
1772                       0x1000);
1773     assert(bo);
1774     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1775     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1776
1777     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1778     bo = dri_bo_alloc(i965->intel.bufmgr,
1779                       "bsd mpc row store",
1780                       width_in_mbs * 96,
1781                       0x1000);
1782     assert(bo);
1783     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1784     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1785
1786     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1787
1788     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1789     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1790     
1791     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1792         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1793         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1794         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1795         int src_w, src_h;
1796         uint8_t *src = NULL, *dst = NULL;
1797
1798         assert(decode_state->bit_plane->buffer);
1799         src = decode_state->bit_plane->buffer;
1800
1801         bo = dri_bo_alloc(i965->intel.bufmgr,
1802                           "VC-1 Bitplane",
1803                           bitplane_width * bitplane_width,
1804                           0x1000);
1805         assert(bo);
1806         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1807
1808         dri_bo_map(bo, True);
1809         assert(bo->virtual);
1810         dst = bo->virtual;
1811
1812         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1813             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1814                 int src_index, dst_index;
1815                 int src_shift;
1816                 uint8_t src_value;
1817
1818                 src_index = (src_h * width_in_mbs + src_w) / 2;
1819                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1820                 src_value = ((src[src_index] >> src_shift) & 0xf);
1821
1822                 dst_index = src_w / 2;
1823                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1824             }
1825
1826             if (src_w & 1)
1827                 dst[src_w / 2] >>= 4;
1828
1829             dst += bitplane_width;
1830         }
1831
1832         dri_bo_unmap(bo);
1833     } else
1834         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1835 }
1836
1837 static void
1838 gen75_mfd_vc1_pic_state(VADriverContextP ctx,
1839                        struct decode_state *decode_state,
1840                        struct gen7_mfd_context *gen7_mfd_context)
1841 {
1842     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1843     VAPictureParameterBufferVC1 *pic_param;
1844     struct i965_driver_data *i965 = i965_driver_data(ctx);
1845     struct object_surface *obj_surface;
1846     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1847     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1848     int unified_mv_mode;
1849     int ref_field_pic_polarity = 0;
1850     int scale_factor = 0;
1851     int trans_ac_y = 0;
1852     int dmv_surface_valid = 0;
1853     int brfd = 0;
1854     int fcm = 0;
1855     int picture_type;
1856     int profile;
1857     int overlap;
1858     int interpolation_mode = 0;
1859
1860     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1861     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1862
1863     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1864     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1865     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1866     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1867     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1868     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1869     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1870     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1871
1872     if (dquant == 0) {
1873         alt_pquant_config = 0;
1874         alt_pquant_edge_mask = 0;
1875     } else if (dquant == 2) {
1876         alt_pquant_config = 1;
1877         alt_pquant_edge_mask = 0xf;
1878     } else {
1879         assert(dquant == 1);
1880         if (dquantfrm == 0) {
1881             alt_pquant_config = 0;
1882             alt_pquant_edge_mask = 0;
1883             alt_pq = 0;
1884         } else {
1885             assert(dquantfrm == 1);
1886             alt_pquant_config = 1;
1887
1888             switch (dqprofile) {
1889             case 3:
1890                 if (dqbilevel == 0) {
1891                     alt_pquant_config = 2;
1892                     alt_pquant_edge_mask = 0;
1893                 } else {
1894                     assert(dqbilevel == 1);
1895                     alt_pquant_config = 3;
1896                     alt_pquant_edge_mask = 0;
1897                 }
1898                 break;
1899                 
1900             case 0:
1901                 alt_pquant_edge_mask = 0xf;
1902                 break;
1903
1904             case 1:
1905                 if (dqdbedge == 3)
1906                     alt_pquant_edge_mask = 0x9;
1907                 else
1908                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1909
1910                 break;
1911
1912             case 2:
1913                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1914                 break;
1915
1916             default:
1917                 assert(0);
1918             }
1919         }
1920     }
1921
1922     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1923         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1924         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1925     } else {
1926         assert(pic_param->mv_fields.bits.mv_mode < 4);
1927         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1928     }
1929
1930     if (pic_param->sequence_fields.bits.interlace == 1 &&
1931         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1932         /* FIXME: calculate reference field picture polarity */
1933         assert(0);
1934         ref_field_pic_polarity = 0;
1935     }
1936
1937     if (pic_param->b_picture_fraction < 21)
1938         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1939
1940     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1941     
1942     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1943         picture_type == GEN7_VC1_I_PICTURE)
1944         picture_type = GEN7_VC1_BI_PICTURE;
1945
1946     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1947         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1948     else
1949         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1950
1951
1952     if (picture_type == GEN7_VC1_B_PICTURE) {
1953         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1954
1955         obj_surface = SURFACE(pic_param->backward_reference_picture);
1956         assert(obj_surface);
1957         gen7_vc1_surface = obj_surface->private_data;
1958
1959         if (!gen7_vc1_surface || 
1960             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1961              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1962             dmv_surface_valid = 0;
1963         else
1964             dmv_surface_valid = 1;
1965     }
1966
1967     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1968
1969     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1970         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1971     else {
1972         if (pic_param->picture_fields.bits.top_field_first)
1973             fcm = 2;
1974         else
1975             fcm = 3;
1976     }
1977
1978     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1979         brfd = pic_param->reference_fields.bits.reference_distance;
1980         brfd = (scale_factor * brfd) >> 8;
1981         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1982
1983         if (brfd < 0)
1984             brfd = 0;
1985     }
1986
1987     overlap = pic_param->sequence_fields.bits.overlap;
1988     if (profile != GEN7_VC1_ADVANCED_PROFILE && pic_param->pic_quantizer_fields.bits.pic_quantizer_scale < 9)
1989         overlap = 0;
1990
1991     assert(pic_param->conditional_overlap_flag < 3);
1992     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1993
1994     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1995         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1996          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1997         interpolation_mode = 9; /* Half-pel bilinear */
1998     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1999              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
2000               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
2001         interpolation_mode = 1; /* Half-pel bicubic */
2002     else
2003         interpolation_mode = 0; /* Quarter-pel bicubic */
2004
2005     BEGIN_BCS_BATCH(batch, 6);
2006     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
2007     OUT_BCS_BATCH(batch,
2008                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
2009                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
2010     OUT_BCS_BATCH(batch,
2011                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
2012                   dmv_surface_valid << 15 |
2013                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
2014                   pic_param->rounding_control << 13 |
2015                   pic_param->sequence_fields.bits.syncmarker << 12 |
2016                   interpolation_mode << 8 |
2017                   0 << 7 | /* FIXME: scale up or down ??? */
2018                   pic_param->range_reduction_frame << 6 |
2019                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
2020                   overlap << 4 |
2021                   !pic_param->picture_fields.bits.is_first_field << 3 |
2022                   (pic_param->sequence_fields.bits.profile == 3) << 0);
2023     OUT_BCS_BATCH(batch,
2024                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
2025                   picture_type << 26 |
2026                   fcm << 24 |
2027                   alt_pq << 16 |
2028                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
2029                   scale_factor << 0);
2030     OUT_BCS_BATCH(batch,
2031                   unified_mv_mode << 28 |
2032                   pic_param->mv_fields.bits.four_mv_switch << 27 |
2033                   pic_param->fast_uvmc_flag << 26 |
2034                   ref_field_pic_polarity << 25 |
2035                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
2036                   pic_param->reference_fields.bits.reference_distance << 20 |
2037                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
2038                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
2039                   pic_param->mv_fields.bits.extended_mv_range << 8 |
2040                   alt_pquant_edge_mask << 4 |
2041                   alt_pquant_config << 2 |
2042                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
2043                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
2044     OUT_BCS_BATCH(batch,
2045                   !!pic_param->bitplane_present.value << 31 |
2046                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
2047                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
2048                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
2049                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
2050                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
2051                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
2052                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
2053                   pic_param->mv_fields.bits.mv_table << 20 |
2054                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
2055                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
2056                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
2057                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
2058                   pic_param->mb_mode_table << 8 |
2059                   trans_ac_y << 6 |
2060                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
2061                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
2062                   pic_param->cbp_table << 0);
2063     ADVANCE_BCS_BATCH(batch);
2064 }
2065
2066 static void
2067 gen75_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
2068                              struct decode_state *decode_state,
2069                              struct gen7_mfd_context *gen7_mfd_context)
2070 {
2071     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2072     VAPictureParameterBufferVC1 *pic_param;
2073     int intensitycomp_single;
2074
2075     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2076     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2077
2078     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2079     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2080     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
2081
2082     BEGIN_BCS_BATCH(batch, 6);
2083     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
2084     OUT_BCS_BATCH(batch,
2085                   0 << 14 | /* FIXME: double ??? */
2086                   0 << 12 |
2087                   intensitycomp_single << 10 |
2088                   intensitycomp_single << 8 |
2089                   0 << 4 | /* FIXME: interlace mode */
2090                   0);
2091     OUT_BCS_BATCH(batch,
2092                   pic_param->luma_shift << 16 |
2093                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
2094     OUT_BCS_BATCH(batch, 0);
2095     OUT_BCS_BATCH(batch, 0);
2096     OUT_BCS_BATCH(batch, 0);
2097     ADVANCE_BCS_BATCH(batch);
2098 }
2099
2100 static void
2101 gen75_mfd_vc1_directmode_state_bplus(VADriverContextP ctx,
2102                               struct decode_state *decode_state,
2103                               struct gen7_mfd_context *gen7_mfd_context)
2104 {
2105     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2106     VAPictureParameterBufferVC1 *pic_param;
2107     struct i965_driver_data *i965 = i965_driver_data(ctx);
2108     struct object_surface *obj_surface;
2109     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
2110
2111     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2112     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2113
2114     obj_surface = SURFACE(decode_state->current_render_target);
2115
2116     if (obj_surface && obj_surface->private_data) {
2117         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2118     }
2119
2120     obj_surface = SURFACE(pic_param->backward_reference_picture);
2121
2122     if (obj_surface && obj_surface->private_data) {
2123         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2124     }
2125
2126     BEGIN_BCS_BATCH(batch, 7);
2127     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
2128
2129     if (dmv_write_buffer)
2130         OUT_BCS_RELOC(batch, dmv_write_buffer,
2131                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2132                       0);
2133     else
2134         OUT_BCS_BATCH(batch, 0);
2135
2136         OUT_BCS_BATCH(batch, 0);
2137         OUT_BCS_BATCH(batch, 0);
2138
2139     if (dmv_read_buffer)
2140         OUT_BCS_RELOC(batch, dmv_read_buffer,
2141                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2142                       0);
2143     else
2144         OUT_BCS_BATCH(batch, 0);
2145         OUT_BCS_BATCH(batch, 0);
2146         OUT_BCS_BATCH(batch, 0);
2147                   
2148     ADVANCE_BCS_BATCH(batch);
2149 }
2150
2151
2152 static void
2153 gen75_mfd_vc1_directmode_state(VADriverContextP ctx,
2154                               struct decode_state *decode_state,
2155                               struct gen7_mfd_context *gen7_mfd_context)
2156 {
2157     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2158     VAPictureParameterBufferVC1 *pic_param;
2159     struct i965_driver_data *i965 = i965_driver_data(ctx);
2160     struct object_surface *obj_surface;
2161     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
2162
2163     if (IS_STEPPING_BPLUS(i965)) {
2164         gen75_mfd_vc1_directmode_state_bplus(ctx, decode_state, gen7_mfd_context);
2165         return;
2166     }
2167     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2168     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2169
2170     obj_surface = SURFACE(decode_state->current_render_target);
2171
2172     if (obj_surface && obj_surface->private_data) {
2173         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2174     }
2175
2176     obj_surface = SURFACE(pic_param->backward_reference_picture);
2177
2178     if (obj_surface && obj_surface->private_data) {
2179         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2180     }
2181
2182     BEGIN_BCS_BATCH(batch, 3);
2183     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
2184
2185     if (dmv_write_buffer)
2186         OUT_BCS_RELOC(batch, dmv_write_buffer,
2187                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2188                       0);
2189     else
2190         OUT_BCS_BATCH(batch, 0);
2191
2192     if (dmv_read_buffer)
2193         OUT_BCS_RELOC(batch, dmv_read_buffer,
2194                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2195                       0);
2196     else
2197         OUT_BCS_BATCH(batch, 0);
2198                   
2199     ADVANCE_BCS_BATCH(batch);
2200 }
2201
2202 static int
2203 gen75_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
2204 {
2205     int out_slice_data_bit_offset;
2206     int slice_header_size = in_slice_data_bit_offset / 8;
2207     int i, j;
2208
2209     if (profile != 3)
2210         out_slice_data_bit_offset = in_slice_data_bit_offset;
2211     else {
2212         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
2213             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
2214                 i++, j += 2;
2215             }
2216         }
2217
2218         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
2219     }
2220
2221     return out_slice_data_bit_offset;
2222 }
2223
2224 static void
2225 gen75_mfd_vc1_bsd_object(VADriverContextP ctx,
2226                         VAPictureParameterBufferVC1 *pic_param,
2227                         VASliceParameterBufferVC1 *slice_param,
2228                         VASliceParameterBufferVC1 *next_slice_param,
2229                         dri_bo *slice_data_bo,
2230                         struct gen7_mfd_context *gen7_mfd_context)
2231 {
2232     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2233     int next_slice_start_vert_pos;
2234     int macroblock_offset;
2235     uint8_t *slice_data = NULL;
2236
2237     dri_bo_map(slice_data_bo, 0);
2238     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
2239     macroblock_offset = gen75_mfd_vc1_get_macroblock_bit_offset(slice_data, 
2240                                                                slice_param->macroblock_offset,
2241                                                                pic_param->sequence_fields.bits.profile);
2242     dri_bo_unmap(slice_data_bo);
2243
2244     if (next_slice_param)
2245         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
2246     else
2247         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
2248
2249     BEGIN_BCS_BATCH(batch, 5);
2250     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
2251     OUT_BCS_BATCH(batch, 
2252                   slice_param->slice_data_size - (macroblock_offset >> 3));
2253     OUT_BCS_BATCH(batch, 
2254                   slice_param->slice_data_offset + (macroblock_offset >> 3));
2255     OUT_BCS_BATCH(batch,
2256                   slice_param->slice_vertical_position << 16 |
2257                   next_slice_start_vert_pos << 0);
2258     OUT_BCS_BATCH(batch,
2259                   (macroblock_offset & 0x7));
2260     ADVANCE_BCS_BATCH(batch);
2261 }
2262
2263 static void
2264 gen75_mfd_vc1_decode_picture(VADriverContextP ctx,
2265                             struct decode_state *decode_state,
2266                             struct gen7_mfd_context *gen7_mfd_context)
2267 {
2268     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2269     VAPictureParameterBufferVC1 *pic_param;
2270     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
2271     dri_bo *slice_data_bo;
2272     int i, j;
2273
2274     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2275     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2276
2277     gen75_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
2278     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2279     intel_batchbuffer_emit_mi_flush(batch);
2280     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2281     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2282     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2283     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2284     gen75_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
2285     gen75_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
2286     gen75_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
2287
2288     for (j = 0; j < decode_state->num_slice_params; j++) {
2289         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2290         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
2291         slice_data_bo = decode_state->slice_datas[j]->bo;
2292         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
2293
2294         if (j == decode_state->num_slice_params - 1)
2295             next_slice_group_param = NULL;
2296         else
2297             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
2298
2299         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2300             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2301
2302             if (i < decode_state->slice_params[j]->num_elements - 1)
2303                 next_slice_param = slice_param + 1;
2304             else
2305                 next_slice_param = next_slice_group_param;
2306
2307             gen75_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2308             slice_param++;
2309         }
2310     }
2311
2312     intel_batchbuffer_end_atomic(batch);
2313     intel_batchbuffer_flush(batch);
2314 }
2315
2316 static void
2317 gen75_mfd_jpeg_decode_init(VADriverContextP ctx,
2318                           struct decode_state *decode_state,
2319                           struct gen7_mfd_context *gen7_mfd_context)
2320 {
2321     struct i965_driver_data *i965 = i965_driver_data(ctx);
2322     struct object_surface *obj_surface;
2323     VAPictureParameterBufferJPEGBaseline *pic_param;
2324     int subsampling = SUBSAMPLE_YUV420;
2325
2326     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2327
2328     if (pic_param->num_components == 1)
2329         subsampling = SUBSAMPLE_YUV400;
2330     else if (pic_param->num_components == 3) {
2331         int h1 = pic_param->components[0].h_sampling_factor;
2332         int h2 = pic_param->components[1].h_sampling_factor;
2333         int h3 = pic_param->components[2].h_sampling_factor;
2334         int v1 = pic_param->components[0].v_sampling_factor;
2335         int v2 = pic_param->components[1].v_sampling_factor;
2336         int v3 = pic_param->components[2].v_sampling_factor;
2337
2338         if (h1 == 2 && h2 == 1 && h3 == 1 &&
2339             v1 == 2 && v2 == 1 && v3 == 1)
2340             subsampling = SUBSAMPLE_YUV420;
2341         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2342                  v1 == 1 && v2 == 1 && v3 == 1)
2343             subsampling = SUBSAMPLE_YUV422H;
2344         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2345                  v1 == 1 && v2 == 1 && v3 == 1)
2346             subsampling = SUBSAMPLE_YUV444;
2347         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2348                  v1 == 1 && v2 == 1 && v3 == 1)
2349             subsampling = SUBSAMPLE_YUV411;
2350         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2351                  v1 == 2 && v2 == 1 && v3 == 1)
2352             subsampling = SUBSAMPLE_YUV422V;
2353         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2354                  v1 == 2 && v2 == 2 && v3 == 2)
2355             subsampling = SUBSAMPLE_YUV422H;
2356         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2357                  v1 == 2 && v2 == 1 && v3 == 1)
2358             subsampling = SUBSAMPLE_YUV422V;
2359         else
2360             assert(0);
2361     } else {
2362         assert(0);
2363     }
2364
2365     /* Current decoded picture */
2366     obj_surface = SURFACE(decode_state->current_render_target);
2367     assert(obj_surface);
2368     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling);
2369
2370     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2371     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2372     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2373     gen7_mfd_context->pre_deblocking_output.valid = 1;
2374
2375     gen7_mfd_context->post_deblocking_output.bo = NULL;
2376     gen7_mfd_context->post_deblocking_output.valid = 0;
2377
2378     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2379     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
2380
2381     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2382     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
2383
2384     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2385     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
2386
2387     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2388     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
2389
2390     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2391     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2392 }
2393
2394 static const int va_to_gen7_jpeg_rotation[4] = {
2395     GEN7_JPEG_ROTATION_0,
2396     GEN7_JPEG_ROTATION_90,
2397     GEN7_JPEG_ROTATION_180,
2398     GEN7_JPEG_ROTATION_270
2399 };
2400
2401 static void
2402 gen75_mfd_jpeg_pic_state(VADriverContextP ctx,
2403                         struct decode_state *decode_state,
2404                         struct gen7_mfd_context *gen7_mfd_context)
2405 {
2406     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2407     VAPictureParameterBufferJPEGBaseline *pic_param;
2408     int chroma_type = GEN7_YUV420;
2409     int frame_width_in_blks;
2410     int frame_height_in_blks;
2411
2412     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2413     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2414
2415     if (pic_param->num_components == 1)
2416         chroma_type = GEN7_YUV400;
2417     else if (pic_param->num_components == 3) {
2418         int h1 = pic_param->components[0].h_sampling_factor;
2419         int h2 = pic_param->components[1].h_sampling_factor;
2420         int h3 = pic_param->components[2].h_sampling_factor;
2421         int v1 = pic_param->components[0].v_sampling_factor;
2422         int v2 = pic_param->components[1].v_sampling_factor;
2423         int v3 = pic_param->components[2].v_sampling_factor;
2424
2425         if (h1 == 2 && h2 == 1 && h3 == 1 &&
2426             v1 == 2 && v2 == 1 && v3 == 1)
2427             chroma_type = GEN7_YUV420;
2428         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2429                  v1 == 1 && v2 == 1 && v3 == 1)
2430             chroma_type = GEN7_YUV422H_2Y;
2431         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2432                  v1 == 1 && v2 == 1 && v3 == 1)
2433             chroma_type = GEN7_YUV444;
2434         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2435                  v1 == 1 && v2 == 1 && v3 == 1)
2436             chroma_type = GEN7_YUV411;
2437         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2438                  v1 == 2 && v2 == 1 && v3 == 1)
2439             chroma_type = GEN7_YUV422V_2Y;
2440         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2441                  v1 == 2 && v2 == 2 && v3 == 2)
2442             chroma_type = GEN7_YUV422H_4Y;
2443         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2444                  v1 == 2 && v2 == 1 && v3 == 1)
2445             chroma_type = GEN7_YUV422V_4Y;
2446         else
2447             assert(0);
2448     }
2449
2450     if (chroma_type == GEN7_YUV400 ||
2451         chroma_type == GEN7_YUV444 ||
2452         chroma_type == GEN7_YUV422V_2Y) {
2453         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2454         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2455     } else if (chroma_type == GEN7_YUV411) {
2456         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2457         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2458     } else {
2459         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2460         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2461     }
2462
2463     BEGIN_BCS_BATCH(batch, 3);
2464     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2465     OUT_BCS_BATCH(batch,
2466                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2467                   (chroma_type << 0));
2468     OUT_BCS_BATCH(batch,
2469                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2470                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2471     ADVANCE_BCS_BATCH(batch);
2472 }
2473
2474 static const int va_to_gen7_jpeg_hufftable[2] = {
2475     MFX_HUFFTABLE_ID_Y,
2476     MFX_HUFFTABLE_ID_UV
2477 };
2478
2479 static void
2480 gen75_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2481                                struct decode_state *decode_state,
2482                                struct gen7_mfd_context *gen7_mfd_context,
2483                                int num_tables)
2484 {
2485     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2486     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2487     int index;
2488
2489     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2490         return;
2491
2492     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2493
2494     for (index = 0; index < num_tables; index++) {
2495         int id = va_to_gen7_jpeg_hufftable[index];
2496         if (!huffman_table->load_huffman_table[index])
2497             continue;
2498         BEGIN_BCS_BATCH(batch, 53);
2499         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2500         OUT_BCS_BATCH(batch, id);
2501         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2502         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2503         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2504         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2505         ADVANCE_BCS_BATCH(batch);
2506     }
2507 }
2508
2509 static const int va_to_gen7_jpeg_qm[5] = {
2510     -1,
2511     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2512     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2513     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2514     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2515 };
2516
2517 static void
2518 gen75_mfd_jpeg_qm_state(VADriverContextP ctx,
2519                        struct decode_state *decode_state,
2520                        struct gen7_mfd_context *gen7_mfd_context)
2521 {
2522     VAPictureParameterBufferJPEGBaseline *pic_param;
2523     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2524     int index;
2525
2526     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2527         return;
2528
2529     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2530     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2531
2532     assert(pic_param->num_components <= 3);
2533
2534     for (index = 0; index < pic_param->num_components; index++) {
2535         int qm_type = va_to_gen7_jpeg_qm[pic_param->components[index].component_id - pic_param->components[0].component_id + 1];
2536         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2537         unsigned char raster_qm[64];
2538         int j;
2539
2540         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2541             continue;
2542
2543         for (j = 0; j < 64; j++)
2544             raster_qm[zigzag_direct[j]] = qm[j];
2545
2546         gen75_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2547     }
2548 }
2549
2550 static void
2551 gen75_mfd_jpeg_bsd_object(VADriverContextP ctx,
2552                          VAPictureParameterBufferJPEGBaseline *pic_param,
2553                          VASliceParameterBufferJPEGBaseline *slice_param,
2554                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2555                          dri_bo *slice_data_bo,
2556                          struct gen7_mfd_context *gen7_mfd_context)
2557 {
2558     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2559     int scan_component_mask = 0;
2560     int i;
2561
2562     assert(slice_param->num_components > 0);
2563     assert(slice_param->num_components < 4);
2564     assert(slice_param->num_components <= pic_param->num_components);
2565
2566     for (i = 0; i < slice_param->num_components; i++) {
2567         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2568         case 1:
2569             scan_component_mask |= (1 << 0);
2570             break;
2571         case 2:
2572             scan_component_mask |= (1 << 1);
2573             break;
2574         case 3:
2575             scan_component_mask |= (1 << 2);
2576             break;
2577         default:
2578             assert(0);
2579             break;
2580         }
2581     }
2582
2583     BEGIN_BCS_BATCH(batch, 6);
2584     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2585     OUT_BCS_BATCH(batch, 
2586                   slice_param->slice_data_size);
2587     OUT_BCS_BATCH(batch, 
2588                   slice_param->slice_data_offset);
2589     OUT_BCS_BATCH(batch,
2590                   slice_param->slice_horizontal_position << 16 |
2591                   slice_param->slice_vertical_position << 0);
2592     OUT_BCS_BATCH(batch,
2593                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2594                   (scan_component_mask << 27) |                 /* scan components */
2595                   (0 << 26) |   /* disable interrupt allowed */
2596                   (slice_param->num_mcus << 0));                /* MCU count */
2597     OUT_BCS_BATCH(batch,
2598                   (slice_param->restart_interval << 0));    /* RestartInterval */
2599     ADVANCE_BCS_BATCH(batch);
2600 }
2601
2602 /* Workaround for JPEG decoding on Ivybridge */
2603
2604 VAStatus 
2605 i965_DestroySurfaces(VADriverContextP ctx,
2606                      VASurfaceID *surface_list,
2607                      int num_surfaces);
2608 VAStatus 
2609 i965_CreateSurfaces(VADriverContextP ctx,
2610                     int width,
2611                     int height,
2612                     int format,
2613                     int num_surfaces,
2614                     VASurfaceID *surfaces);
2615
2616 static struct {
2617     int width;
2618     int height;
2619     unsigned char data[32];
2620     int data_size;
2621     int data_bit_offset;
2622     int qp;
2623 } gen7_jpeg_wa_clip = {
2624     16,
2625     16,
2626     {
2627         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2628         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2629     },
2630     14,
2631     40,
2632     28,
2633 };
2634
2635 static void
2636 gen75_jpeg_wa_init(VADriverContextP ctx,
2637                   struct gen7_mfd_context *gen7_mfd_context)
2638 {
2639     struct i965_driver_data *i965 = i965_driver_data(ctx);
2640     VAStatus status;
2641     struct object_surface *obj_surface;
2642
2643     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2644         i965_DestroySurfaces(ctx,
2645                              &gen7_mfd_context->jpeg_wa_surface_id,
2646                              1);
2647
2648     status = i965_CreateSurfaces(ctx,
2649                                  gen7_jpeg_wa_clip.width,
2650                                  gen7_jpeg_wa_clip.height,
2651                                  VA_RT_FORMAT_YUV420,
2652                                  1,
2653                                  &gen7_mfd_context->jpeg_wa_surface_id);
2654     assert(status == VA_STATUS_SUCCESS);
2655
2656     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2657     assert(obj_surface);
2658     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
2659
2660     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2661         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2662                                                                "JPEG WA data",
2663                                                                0x1000,
2664                                                                0x1000);
2665         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2666                        0,
2667                        gen7_jpeg_wa_clip.data_size,
2668                        gen7_jpeg_wa_clip.data);
2669     }
2670 }
2671
2672 static void
2673 gen75_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2674                               struct gen7_mfd_context *gen7_mfd_context)
2675 {
2676     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2677
2678     BEGIN_BCS_BATCH(batch, 5);
2679     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2680     OUT_BCS_BATCH(batch,
2681                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2682                   (MFD_MODE_VLD << 15) | /* VLD mode */
2683                   (0 << 10) | /* disable Stream-Out */
2684                   (0 << 9)  | /* Post Deblocking Output */
2685                   (1 << 8)  | /* Pre Deblocking Output */
2686                   (0 << 5)  | /* not in stitch mode */
2687                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2688                   (MFX_FORMAT_AVC << 0));
2689     OUT_BCS_BATCH(batch,
2690                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2691                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2692                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2693                   (0 << 1)  |
2694                   (0 << 0));
2695     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2696     OUT_BCS_BATCH(batch, 0); /* reserved */
2697     ADVANCE_BCS_BATCH(batch);
2698 }
2699
2700 static void
2701 gen75_jpeg_wa_surface_state(VADriverContextP ctx,
2702                            struct gen7_mfd_context *gen7_mfd_context)
2703 {
2704     struct i965_driver_data *i965 = i965_driver_data(ctx);
2705     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2706     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2707
2708     BEGIN_BCS_BATCH(batch, 6);
2709     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2710     OUT_BCS_BATCH(batch, 0);
2711     OUT_BCS_BATCH(batch,
2712                   ((obj_surface->orig_width - 1) << 18) |
2713                   ((obj_surface->orig_height - 1) << 4));
2714     OUT_BCS_BATCH(batch,
2715                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2716                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2717                   (0 << 22) | /* surface object control state, ignored */
2718                   ((obj_surface->width - 1) << 3) | /* pitch */
2719                   (0 << 2)  | /* must be 0 */
2720                   (1 << 1)  | /* must be tiled */
2721                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2722     OUT_BCS_BATCH(batch,
2723                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2724                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2725     OUT_BCS_BATCH(batch,
2726                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2727                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2728     ADVANCE_BCS_BATCH(batch);
2729 }
2730
2731 static void
2732 gen75_jpeg_wa_pipe_buf_addr_state_bplus(VADriverContextP ctx,
2733                                  struct gen7_mfd_context *gen7_mfd_context)
2734 {
2735     struct i965_driver_data *i965 = i965_driver_data(ctx);
2736     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2737     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2738     dri_bo *intra_bo;
2739     int i;
2740
2741     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2742                             "intra row store",
2743                             128 * 64,
2744                             0x1000);
2745
2746     BEGIN_BCS_BATCH(batch, 61);
2747     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2748     OUT_BCS_RELOC(batch,
2749                   obj_surface->bo,
2750                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2751                   0);
2752         OUT_BCS_BATCH(batch, 0);
2753         OUT_BCS_BATCH(batch, 0);
2754     
2755
2756     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2757         OUT_BCS_BATCH(batch, 0);
2758         OUT_BCS_BATCH(batch, 0);
2759
2760         /* uncompressed-video & stream out 7-12 */
2761     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2762     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2763         OUT_BCS_BATCH(batch, 0);
2764         OUT_BCS_BATCH(batch, 0);
2765         OUT_BCS_BATCH(batch, 0);
2766         OUT_BCS_BATCH(batch, 0);
2767
2768         /* the DW 13-15 is for intra row store scratch */
2769     OUT_BCS_RELOC(batch,
2770                   intra_bo,
2771                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2772                   0);
2773         OUT_BCS_BATCH(batch, 0);
2774         OUT_BCS_BATCH(batch, 0);
2775
2776         /* the DW 16-18 is for deblocking filter */ 
2777     OUT_BCS_BATCH(batch, 0);
2778         OUT_BCS_BATCH(batch, 0);
2779         OUT_BCS_BATCH(batch, 0);
2780
2781     /* DW 19..50 */
2782     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2783         OUT_BCS_BATCH(batch, 0);
2784         OUT_BCS_BATCH(batch, 0);
2785     }
2786     OUT_BCS_BATCH(batch, 0);
2787
2788         /* the DW52-54 is for mb status address */
2789     OUT_BCS_BATCH(batch, 0);
2790         OUT_BCS_BATCH(batch, 0);
2791         OUT_BCS_BATCH(batch, 0);
2792         /* the DW56-60 is for ILDB & second ILDB address */
2793     OUT_BCS_BATCH(batch, 0);
2794         OUT_BCS_BATCH(batch, 0);
2795         OUT_BCS_BATCH(batch, 0);
2796     OUT_BCS_BATCH(batch, 0);
2797         OUT_BCS_BATCH(batch, 0);
2798         OUT_BCS_BATCH(batch, 0);
2799
2800     ADVANCE_BCS_BATCH(batch);
2801
2802     dri_bo_unreference(intra_bo);
2803 }
2804 static void
2805 gen75_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2806                                  struct gen7_mfd_context *gen7_mfd_context)
2807 {
2808     struct i965_driver_data *i965 = i965_driver_data(ctx);
2809     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2810     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2811     dri_bo *intra_bo;
2812     int i;
2813
2814         if (IS_STEPPING_BPLUS(i965)) {
2815                 gen75_jpeg_wa_pipe_buf_addr_state_bplus(ctx, gen7_mfd_context);
2816                 return;
2817         }
2818     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2819                             "intra row store",
2820                             128 * 64,
2821                             0x1000);
2822
2823     BEGIN_BCS_BATCH(batch, 25);
2824     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
2825     OUT_BCS_RELOC(batch,
2826                   obj_surface->bo,
2827                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2828                   0);
2829     
2830     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2831
2832     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2833     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2834
2835     OUT_BCS_RELOC(batch,
2836                   intra_bo,
2837                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2838                   0);
2839
2840     OUT_BCS_BATCH(batch, 0);
2841
2842     /* DW 7..22 */
2843     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2844         OUT_BCS_BATCH(batch, 0);
2845     }
2846
2847     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
2848     OUT_BCS_BATCH(batch, 0);
2849     ADVANCE_BCS_BATCH(batch);
2850
2851     dri_bo_unreference(intra_bo);
2852 }
2853
2854 static void
2855 gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
2856                                      struct gen7_mfd_context *gen7_mfd_context)
2857 {
2858     struct i965_driver_data *i965 = i965_driver_data(ctx);
2859     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2860     dri_bo *bsd_mpc_bo, *mpr_bo;
2861
2862     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2863                               "bsd mpc row store",
2864                               11520, /* 1.5 * 120 * 64 */
2865                               0x1000);
2866
2867     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2868                           "mpr row store",
2869                           7680, /* 1. 0 * 120 * 64 */
2870                           0x1000);
2871
2872     BEGIN_BCS_BATCH(batch, 10);
2873     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2874
2875     OUT_BCS_RELOC(batch,
2876                   bsd_mpc_bo,
2877                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2878                   0);
2879
2880     OUT_BCS_BATCH(batch, 0);
2881     OUT_BCS_BATCH(batch, 0);
2882
2883     OUT_BCS_RELOC(batch,
2884                   mpr_bo,
2885                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2886                   0);
2887     OUT_BCS_BATCH(batch, 0);
2888     OUT_BCS_BATCH(batch, 0);
2889
2890     OUT_BCS_BATCH(batch, 0);
2891     OUT_BCS_BATCH(batch, 0);
2892     OUT_BCS_BATCH(batch, 0);
2893
2894     ADVANCE_BCS_BATCH(batch);
2895
2896     dri_bo_unreference(bsd_mpc_bo);
2897     dri_bo_unreference(mpr_bo);
2898 }
2899
2900 static void
2901 gen75_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2902                                      struct gen7_mfd_context *gen7_mfd_context)
2903 {
2904     struct i965_driver_data *i965 = i965_driver_data(ctx);
2905     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2906     dri_bo *bsd_mpc_bo, *mpr_bo;
2907
2908     if (IS_STEPPING_BPLUS(i965)) {
2909         gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(ctx, gen7_mfd_context);
2910         return;
2911     }
2912
2913     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2914                               "bsd mpc row store",
2915                               11520, /* 1.5 * 120 * 64 */
2916                               0x1000);
2917
2918     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2919                           "mpr row store",
2920                           7680, /* 1. 0 * 120 * 64 */
2921                           0x1000);
2922
2923     BEGIN_BCS_BATCH(batch, 4);
2924     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
2925
2926     OUT_BCS_RELOC(batch,
2927                   bsd_mpc_bo,
2928                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2929                   0);
2930
2931     OUT_BCS_RELOC(batch,
2932                   mpr_bo,
2933                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2934                   0);
2935     OUT_BCS_BATCH(batch, 0);
2936
2937     ADVANCE_BCS_BATCH(batch);
2938
2939     dri_bo_unreference(bsd_mpc_bo);
2940     dri_bo_unreference(mpr_bo);
2941 }
2942
2943 static void
2944 gen75_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2945                           struct gen7_mfd_context *gen7_mfd_context)
2946 {
2947
2948 }
2949
2950 static void
2951 gen75_jpeg_wa_avc_img_state(VADriverContextP ctx,
2952                            struct gen7_mfd_context *gen7_mfd_context)
2953 {
2954     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2955     int img_struct = 0;
2956     int mbaff_frame_flag = 0;
2957     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2958
2959     BEGIN_BCS_BATCH(batch, 16);
2960     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2961     OUT_BCS_BATCH(batch, 
2962                   width_in_mbs * height_in_mbs);
2963     OUT_BCS_BATCH(batch, 
2964                   ((height_in_mbs - 1) << 16) | 
2965                   ((width_in_mbs - 1) << 0));
2966     OUT_BCS_BATCH(batch, 
2967                   (0 << 24) |
2968                   (0 << 16) |
2969                   (0 << 14) |
2970                   (0 << 13) |
2971                   (0 << 12) | /* differ from GEN6 */
2972                   (0 << 10) |
2973                   (img_struct << 8));
2974     OUT_BCS_BATCH(batch,
2975                   (1 << 10) | /* 4:2:0 */
2976                   (1 << 7) |  /* CABAC */
2977                   (0 << 6) |
2978                   (0 << 5) |
2979                   (0 << 4) |
2980                   (0 << 3) |
2981                   (1 << 2) |
2982                   (mbaff_frame_flag << 1) |
2983                   (0 << 0));
2984     OUT_BCS_BATCH(batch, 0);
2985     OUT_BCS_BATCH(batch, 0);
2986     OUT_BCS_BATCH(batch, 0);
2987     OUT_BCS_BATCH(batch, 0);
2988     OUT_BCS_BATCH(batch, 0);
2989     OUT_BCS_BATCH(batch, 0);
2990     OUT_BCS_BATCH(batch, 0);
2991     OUT_BCS_BATCH(batch, 0);
2992     OUT_BCS_BATCH(batch, 0);
2993     OUT_BCS_BATCH(batch, 0);
2994     OUT_BCS_BATCH(batch, 0);
2995     ADVANCE_BCS_BATCH(batch);
2996 }
2997
2998 static void
2999 gen75_jpeg_wa_avc_directmode_state_bplus(VADriverContextP ctx,
3000                                   struct gen7_mfd_context *gen7_mfd_context)
3001 {
3002     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3003     int i;
3004
3005     BEGIN_BCS_BATCH(batch, 71);
3006     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
3007
3008     /* reference surfaces 0..15 */
3009     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
3010         OUT_BCS_BATCH(batch, 0); /* top */
3011         OUT_BCS_BATCH(batch, 0); /* bottom */
3012     }
3013         
3014         OUT_BCS_BATCH(batch, 0);
3015
3016     /* the current decoding frame/field */
3017     OUT_BCS_BATCH(batch, 0); /* top */
3018     OUT_BCS_BATCH(batch, 0);
3019     OUT_BCS_BATCH(batch, 0);
3020
3021     /* POC List */
3022     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
3023         OUT_BCS_BATCH(batch, 0);
3024         OUT_BCS_BATCH(batch, 0);
3025     }
3026
3027     OUT_BCS_BATCH(batch, 0);
3028     OUT_BCS_BATCH(batch, 0);
3029
3030     ADVANCE_BCS_BATCH(batch);
3031 }
3032
3033 static void
3034 gen75_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
3035                                   struct gen7_mfd_context *gen7_mfd_context)
3036 {
3037     struct i965_driver_data *i965 = i965_driver_data(ctx);
3038     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3039     int i;
3040
3041     if (IS_STEPPING_BPLUS(i965)) {
3042         gen75_jpeg_wa_avc_directmode_state_bplus(ctx, gen7_mfd_context);
3043         return;
3044     }
3045
3046     BEGIN_BCS_BATCH(batch, 69);
3047     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
3048
3049     /* reference surfaces 0..15 */
3050     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
3051         OUT_BCS_BATCH(batch, 0); /* top */
3052         OUT_BCS_BATCH(batch, 0); /* bottom */
3053     }
3054
3055     /* the current decoding frame/field */
3056     OUT_BCS_BATCH(batch, 0); /* top */
3057     OUT_BCS_BATCH(batch, 0); /* bottom */
3058
3059     /* POC List */
3060     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
3061         OUT_BCS_BATCH(batch, 0);
3062         OUT_BCS_BATCH(batch, 0);
3063     }
3064
3065     OUT_BCS_BATCH(batch, 0);
3066     OUT_BCS_BATCH(batch, 0);
3067
3068     ADVANCE_BCS_BATCH(batch);
3069 }
3070
3071 static void
3072 gen75_jpeg_wa_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
3073                                      struct gen7_mfd_context *gen7_mfd_context)
3074 {
3075     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3076
3077     BEGIN_BCS_BATCH(batch, 11);
3078     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
3079     OUT_BCS_RELOC(batch,
3080                   gen7_mfd_context->jpeg_wa_slice_data_bo,
3081                   I915_GEM_DOMAIN_INSTRUCTION, 0,
3082                   0);
3083     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
3084     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3085     OUT_BCS_BATCH(batch, 0);
3086     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3087     OUT_BCS_BATCH(batch, 0);
3088     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3089     OUT_BCS_BATCH(batch, 0);
3090     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3091     OUT_BCS_BATCH(batch, 0);
3092     ADVANCE_BCS_BATCH(batch);
3093 }
3094
3095 static void
3096 gen75_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
3097                                      struct gen7_mfd_context *gen7_mfd_context)
3098 {
3099     struct i965_driver_data *i965 = i965_driver_data(ctx);
3100     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3101
3102     if (IS_STEPPING_BPLUS(i965)) {
3103         gen75_jpeg_wa_ind_obj_base_addr_state_bplus(ctx, gen7_mfd_context);
3104         return; 
3105     }
3106     BEGIN_BCS_BATCH(batch, 11);
3107     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
3108     OUT_BCS_RELOC(batch,
3109                   gen7_mfd_context->jpeg_wa_slice_data_bo,
3110                   I915_GEM_DOMAIN_INSTRUCTION, 0,
3111                   0);
3112     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
3113     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3114     OUT_BCS_BATCH(batch, 0);
3115     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3116     OUT_BCS_BATCH(batch, 0);
3117     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3118     OUT_BCS_BATCH(batch, 0);
3119     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
3120     OUT_BCS_BATCH(batch, 0);
3121     ADVANCE_BCS_BATCH(batch);
3122 }
3123
3124 static void
3125 gen75_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
3126                             struct gen7_mfd_context *gen7_mfd_context)
3127 {
3128     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3129
3130     /* the input bitsteam format on GEN7 differs from GEN6 */
3131     BEGIN_BCS_BATCH(batch, 6);
3132     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
3133     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
3134     OUT_BCS_BATCH(batch, 0);
3135     OUT_BCS_BATCH(batch,
3136                   (0 << 31) |
3137                   (0 << 14) |
3138                   (0 << 12) |
3139                   (0 << 10) |
3140                   (0 << 8));
3141     OUT_BCS_BATCH(batch,
3142                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
3143                   (0 << 5)  |
3144                   (0 << 4)  |
3145                   (1 << 3) | /* LastSlice Flag */
3146                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
3147     OUT_BCS_BATCH(batch, 0);
3148     ADVANCE_BCS_BATCH(batch);
3149 }
3150
3151 static void
3152 gen75_jpeg_wa_avc_slice_state(VADriverContextP ctx,
3153                              struct gen7_mfd_context *gen7_mfd_context)
3154 {
3155     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3156     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
3157     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
3158     int first_mb_in_slice = 0;
3159     int slice_type = SLICE_TYPE_I;
3160
3161     BEGIN_BCS_BATCH(batch, 11);
3162     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
3163     OUT_BCS_BATCH(batch, slice_type);
3164     OUT_BCS_BATCH(batch, 
3165                   (num_ref_idx_l1 << 24) |
3166                   (num_ref_idx_l0 << 16) |
3167                   (0 << 8) |
3168                   (0 << 0));
3169     OUT_BCS_BATCH(batch, 
3170                   (0 << 29) |
3171                   (1 << 27) |   /* disable Deblocking */
3172                   (0 << 24) |
3173                   (gen7_jpeg_wa_clip.qp << 16) |
3174                   (0 << 8) |
3175                   (0 << 0));
3176     OUT_BCS_BATCH(batch, 
3177                   (slice_ver_pos << 24) |
3178                   (slice_hor_pos << 16) | 
3179                   (first_mb_in_slice << 0));
3180     OUT_BCS_BATCH(batch,
3181                   (next_slice_ver_pos << 16) |
3182                   (next_slice_hor_pos << 0));
3183     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
3184     OUT_BCS_BATCH(batch, 0);
3185     OUT_BCS_BATCH(batch, 0);
3186     OUT_BCS_BATCH(batch, 0);
3187     OUT_BCS_BATCH(batch, 0);
3188     ADVANCE_BCS_BATCH(batch);
3189 }
3190
3191 static void
3192 gen75_mfd_jpeg_wa(VADriverContextP ctx,
3193                  struct gen7_mfd_context *gen7_mfd_context)
3194 {
3195     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3196     gen75_jpeg_wa_init(ctx, gen7_mfd_context);
3197     intel_batchbuffer_emit_mi_flush(batch);
3198     gen75_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
3199     gen75_jpeg_wa_surface_state(ctx, gen7_mfd_context);
3200     gen75_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
3201     gen75_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
3202     gen75_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
3203     gen75_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
3204     gen75_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
3205
3206     gen75_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
3207     gen75_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
3208     gen75_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
3209 }
3210
3211 void
3212 gen75_mfd_jpeg_decode_picture(VADriverContextP ctx,
3213                              struct decode_state *decode_state,
3214                              struct gen7_mfd_context *gen7_mfd_context)
3215 {
3216     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3217     VAPictureParameterBufferJPEGBaseline *pic_param;
3218     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
3219     dri_bo *slice_data_bo;
3220     int i, j, max_selector = 0;
3221
3222     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3223     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
3224
3225     /* Currently only support Baseline DCT */
3226     gen75_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
3227     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3228     gen75_mfd_jpeg_wa(ctx, gen7_mfd_context);
3229     intel_batchbuffer_emit_mi_flush(batch);
3230     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3231     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3232     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3233     gen75_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
3234     gen75_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
3235
3236     for (j = 0; j < decode_state->num_slice_params; j++) {
3237         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3238         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3239         slice_data_bo = decode_state->slice_datas[j]->bo;
3240         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3241
3242         if (j == decode_state->num_slice_params - 1)
3243             next_slice_group_param = NULL;
3244         else
3245             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3246
3247         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3248             int component;
3249
3250             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3251
3252             if (i < decode_state->slice_params[j]->num_elements - 1)
3253                 next_slice_param = slice_param + 1;
3254             else
3255                 next_slice_param = next_slice_group_param;
3256
3257             for (component = 0; component < slice_param->num_components; component++) {
3258                 if (max_selector < slice_param->components[component].dc_table_selector)
3259                     max_selector = slice_param->components[component].dc_table_selector;
3260
3261                 if (max_selector < slice_param->components[component].ac_table_selector)
3262                     max_selector = slice_param->components[component].ac_table_selector;
3263             }
3264
3265             slice_param++;
3266         }
3267     }
3268
3269     assert(max_selector < 2);
3270     gen75_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
3271
3272     for (j = 0; j < decode_state->num_slice_params; j++) {
3273         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3274         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3275         slice_data_bo = decode_state->slice_datas[j]->bo;
3276         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3277
3278         if (j == decode_state->num_slice_params - 1)
3279             next_slice_group_param = NULL;
3280         else
3281             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3282
3283         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3284             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3285
3286             if (i < decode_state->slice_params[j]->num_elements - 1)
3287                 next_slice_param = slice_param + 1;
3288             else
3289                 next_slice_param = next_slice_group_param;
3290
3291             gen75_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
3292             slice_param++;
3293         }
3294     }
3295
3296     intel_batchbuffer_end_atomic(batch);
3297     intel_batchbuffer_flush(batch);
3298 }
3299
3300 static void 
3301 gen75_mfd_decode_picture(VADriverContextP ctx, 
3302                         VAProfile profile, 
3303                         union codec_state *codec_state,
3304                         struct hw_context *hw_context)
3305
3306 {
3307     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3308     struct decode_state *decode_state = &codec_state->decode;
3309
3310     assert(gen7_mfd_context);
3311
3312     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3313
3314     switch (profile) {
3315     case VAProfileMPEG2Simple:
3316     case VAProfileMPEG2Main:
3317         gen75_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3318         break;
3319         
3320     case VAProfileH264Baseline:
3321     case VAProfileH264Main:
3322     case VAProfileH264High:
3323         gen75_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3324         break;
3325
3326     case VAProfileVC1Simple:
3327     case VAProfileVC1Main:
3328     case VAProfileVC1Advanced:
3329         gen75_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3330         break;
3331
3332     case VAProfileJPEGBaseline:
3333         gen75_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3334         break;
3335
3336     default:
3337         assert(0);
3338         break;
3339     }
3340 }
3341
3342 static void
3343 gen75_mfd_context_destroy(void *hw_context)
3344 {
3345     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3346
3347     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3348     gen7_mfd_context->post_deblocking_output.bo = NULL;
3349
3350     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3351     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3352
3353     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3354     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3355
3356     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3357     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3358
3359     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3360     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3361
3362     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3363     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3364
3365     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3366     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3367
3368     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3369
3370     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3371     free(gen7_mfd_context);
3372 }
3373
3374 static void gen75_mfd_mpeg2_context_init(VADriverContextP ctx,
3375                                     struct gen7_mfd_context *gen7_mfd_context)
3376 {
3377     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3378     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3379     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3380     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3381 }
3382
3383 struct hw_context *
3384 gen75_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3385 {
3386     struct intel_driver_data *intel = intel_driver_data(ctx);
3387     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3388     int i;
3389
3390     gen7_mfd_context->base.destroy = gen75_mfd_context_destroy;
3391     gen7_mfd_context->base.run = gen75_mfd_decode_picture;
3392     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
3393
3394     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3395         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3396         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3397     }
3398
3399     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3400
3401     switch (obj_config->profile) {
3402     case VAProfileMPEG2Simple:
3403     case VAProfileMPEG2Main:
3404         gen75_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3405         break;
3406
3407     case VAProfileH264Baseline:
3408     case VAProfileH264Main:
3409     case VAProfileH264High:
3410         gen75_mfd_avc_context_init(ctx, gen7_mfd_context);
3411         break;
3412     default:
3413         break;
3414     }
3415     return (struct hw_context *)gen7_mfd_context;
3416 }