Enlarge deblocking filter row store on BDW
[platform/upstream/libva-intel-driver.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <va/va_dec_jpeg.h>
35
36 #include "intel_batchbuffer.h"
37 #include "intel_driver.h"
38
39 #include "i965_defines.h"
40 #include "i965_drv_video.h"
41 #include "i965_decoder_utils.h"
42
43 #include "gen7_mfd.h"
44 #include "intel_media.h"
45
46 #define B0_STEP_REV             2
47 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
48
49 static const uint32_t zigzag_direct[64] = {
50     0,   1,  8, 16,  9,  2,  3, 10,
51     17, 24, 32, 25, 18, 11,  4,  5,
52     12, 19, 26, 33, 40, 48, 41, 34,
53     27, 20, 13,  6,  7, 14, 21, 28,
54     35, 42, 49, 56, 57, 50, 43, 36,
55     29, 22, 15, 23, 30, 37, 44, 51,
56     58, 59, 52, 45, 38, 31, 39, 46,
57     53, 60, 61, 54, 47, 55, 62, 63
58 };
59
60 static void
61 gen8_mfd_init_avc_surface(VADriverContextP ctx, 
62                           VAPictureParameterBufferH264 *pic_param,
63                           struct object_surface *obj_surface)
64 {
65     struct i965_driver_data *i965 = i965_driver_data(ctx);
66     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
67     int width_in_mbs, height_in_mbs;
68
69     obj_surface->free_private_data = gen_free_avc_surface;
70     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
71     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
72
73     if (!gen7_avc_surface) {
74         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
75         assert((obj_surface->size & 0x3f) == 0);
76         obj_surface->private_data = gen7_avc_surface;
77     }
78
79     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
80                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
81
82     if (gen7_avc_surface->dmv_top == NULL) {
83         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
84                                                  "direct mv w/r buffer",
85                                                  width_in_mbs * height_in_mbs * 128,
86                                                  0x1000);
87         assert(gen7_avc_surface->dmv_top);
88     }
89
90     if (gen7_avc_surface->dmv_bottom_flag &&
91         gen7_avc_surface->dmv_bottom == NULL) {
92         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
93                                                     "direct mv w/r buffer",
94                                                     width_in_mbs * height_in_mbs * 128,                                                    
95                                                     0x1000);
96         assert(gen7_avc_surface->dmv_bottom);
97     }
98 }
99
100 static void
101 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
102                           struct decode_state *decode_state,
103                           int standard_select,
104                           struct gen7_mfd_context *gen7_mfd_context)
105 {
106     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
107
108     assert(standard_select == MFX_FORMAT_MPEG2 ||
109            standard_select == MFX_FORMAT_AVC ||
110            standard_select == MFX_FORMAT_VC1 ||
111            standard_select == MFX_FORMAT_JPEG);
112
113     BEGIN_BCS_BATCH(batch, 5);
114     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
115     OUT_BCS_BATCH(batch,
116                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
117                   (MFD_MODE_VLD << 15) | /* VLD mode */
118                   (0 << 10) | /* disable Stream-Out */
119                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
120                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
121                   (0 << 5)  | /* not in stitch mode */
122                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
123                   (standard_select << 0));
124     OUT_BCS_BATCH(batch,
125                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
126                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
127                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
128                   (0 << 1)  |
129                   (0 << 0));
130     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
131     OUT_BCS_BATCH(batch, 0); /* reserved */
132     ADVANCE_BCS_BATCH(batch);
133 }
134
135 static void
136 gen8_mfd_surface_state(VADriverContextP ctx,
137                        struct decode_state *decode_state,
138                        int standard_select,
139                        struct gen7_mfd_context *gen7_mfd_context)
140 {
141     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
142     struct object_surface *obj_surface = decode_state->render_object;
143     unsigned int y_cb_offset;
144     unsigned int y_cr_offset;
145
146     assert(obj_surface);
147
148     y_cb_offset = obj_surface->y_cb_offset;
149     y_cr_offset = obj_surface->y_cr_offset;
150
151     BEGIN_BCS_BATCH(batch, 6);
152     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
153     OUT_BCS_BATCH(batch, 0);
154     OUT_BCS_BATCH(batch,
155                   ((obj_surface->orig_height - 1) << 18) |
156                   ((obj_surface->orig_width - 1) << 4));
157     OUT_BCS_BATCH(batch,
158                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
159                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
160                   (0 << 22) | /* surface object control state, ignored */
161                   ((obj_surface->width - 1) << 3) | /* pitch */
162                   (0 << 2)  | /* must be 0 */
163                   (1 << 1)  | /* must be tiled */
164                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
165     OUT_BCS_BATCH(batch,
166                   (0 << 16) | /* X offset for U(Cb), must be 0 */
167                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
168     OUT_BCS_BATCH(batch,
169                   (0 << 16) | /* X offset for V(Cr), must be 0 */
170                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
171     ADVANCE_BCS_BATCH(batch);
172 }
173
174 static void
175 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
176                              struct decode_state *decode_state,
177                              int standard_select,
178                              struct gen7_mfd_context *gen7_mfd_context)
179 {
180     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
181     int i;
182
183     BEGIN_BCS_BATCH(batch, 61);
184     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
185         /* Pre-deblock 1-3 */
186     if (gen7_mfd_context->pre_deblocking_output.valid)
187         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
188                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
189                       0);
190     else
191         OUT_BCS_BATCH(batch, 0);
192
193         OUT_BCS_BATCH(batch, 0);
194         OUT_BCS_BATCH(batch, 0);
195         /* Post-debloing 4-6 */
196     if (gen7_mfd_context->post_deblocking_output.valid)
197         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
198                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
199                       0);
200     else
201         OUT_BCS_BATCH(batch, 0);
202
203         OUT_BCS_BATCH(batch, 0);
204         OUT_BCS_BATCH(batch, 0);
205
206         /* uncompressed-video & stream out 7-12 */
207     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
208     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
209         OUT_BCS_BATCH(batch, 0);
210         OUT_BCS_BATCH(batch, 0);
211         OUT_BCS_BATCH(batch, 0);
212         OUT_BCS_BATCH(batch, 0);
213
214         /* intra row-store scratch 13-15 */
215     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
216         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
217                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
218                       0);
219     else
220         OUT_BCS_BATCH(batch, 0);
221
222         OUT_BCS_BATCH(batch, 0);
223         OUT_BCS_BATCH(batch, 0);
224         /* deblocking-filter-row-store 16-18 */
225     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
226         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
227                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
228                       0);
229     else
230         OUT_BCS_BATCH(batch, 0);
231         OUT_BCS_BATCH(batch, 0);
232         OUT_BCS_BATCH(batch, 0);
233
234     /* DW 19..50 */
235     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
236         struct object_surface *obj_surface;
237
238         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
239             gen7_mfd_context->reference_surface[i].obj_surface &&
240             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
241             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
242
243             OUT_BCS_RELOC(batch, obj_surface->bo,
244                           I915_GEM_DOMAIN_INSTRUCTION, 0,
245                           0);
246         } else {
247             OUT_BCS_BATCH(batch, 0);
248         }
249         
250         OUT_BCS_BATCH(batch, 0);
251     }
252     
253     /* reference property 51 */
254     OUT_BCS_BATCH(batch, 0);  
255         
256     /* Macroblock status & ILDB 52-57 */
257     OUT_BCS_BATCH(batch, 0);
258     OUT_BCS_BATCH(batch, 0);
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch, 0);
261     OUT_BCS_BATCH(batch, 0);
262     OUT_BCS_BATCH(batch, 0);
263
264     /* the second Macroblock status 58-60 */    
265     OUT_BCS_BATCH(batch, 0);
266     OUT_BCS_BATCH(batch, 0);
267     OUT_BCS_BATCH(batch, 0);
268
269     ADVANCE_BCS_BATCH(batch);
270 }
271
272 static void
273 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
274                                  dri_bo *slice_data_bo,
275                                  int standard_select,
276                                  struct gen7_mfd_context *gen7_mfd_context)
277 {
278     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
279
280     BEGIN_BCS_BATCH(batch, 26);
281     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
282         /* MFX In BS 1-5 */
283     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
284     OUT_BCS_BATCH(batch, 0);
285     OUT_BCS_BATCH(batch, 0);
286         /* Upper bound 4-5 */   
287     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
288     OUT_BCS_BATCH(batch, 0);
289
290         /* MFX indirect MV 6-10 */
291     OUT_BCS_BATCH(batch, 0);
292     OUT_BCS_BATCH(batch, 0);
293     OUT_BCS_BATCH(batch, 0);
294     OUT_BCS_BATCH(batch, 0);
295     OUT_BCS_BATCH(batch, 0);
296         
297         /* MFX IT_COFF 11-15 */
298     OUT_BCS_BATCH(batch, 0);
299     OUT_BCS_BATCH(batch, 0);
300     OUT_BCS_BATCH(batch, 0);
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303
304         /* MFX IT_DBLK 16-20 */
305     OUT_BCS_BATCH(batch, 0);
306     OUT_BCS_BATCH(batch, 0);
307     OUT_BCS_BATCH(batch, 0);
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310
311         /* MFX PAK_BSE object for encoder 21-25 */
312     OUT_BCS_BATCH(batch, 0);
313     OUT_BCS_BATCH(batch, 0);
314     OUT_BCS_BATCH(batch, 0);
315     OUT_BCS_BATCH(batch, 0);
316     OUT_BCS_BATCH(batch, 0);
317
318     ADVANCE_BCS_BATCH(batch);
319 }
320
321 static void
322 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
323                                  struct decode_state *decode_state,
324                                  int standard_select,
325                                  struct gen7_mfd_context *gen7_mfd_context)
326 {
327     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
328
329     BEGIN_BCS_BATCH(batch, 10);
330     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
331
332     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
333         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
334                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
335                       0);
336         else
337                 OUT_BCS_BATCH(batch, 0);
338                 
339     OUT_BCS_BATCH(batch, 0);
340     OUT_BCS_BATCH(batch, 0);
341         /* MPR Row Store Scratch buffer 4-6 */
342     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
343         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
344                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
345                       0);
346     else
347         OUT_BCS_BATCH(batch, 0);
348
349     OUT_BCS_BATCH(batch, 0);
350     OUT_BCS_BATCH(batch, 0);
351
352         /* Bitplane 7-9 */ 
353     if (gen7_mfd_context->bitplane_read_buffer.valid)
354         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
355                       I915_GEM_DOMAIN_INSTRUCTION, 0,
356                       0);
357     else
358         OUT_BCS_BATCH(batch, 0);
359     OUT_BCS_BATCH(batch, 0);
360     OUT_BCS_BATCH(batch, 0);
361     ADVANCE_BCS_BATCH(batch);
362 }
363
364 static void
365 gen8_mfd_qm_state(VADriverContextP ctx,
366                   int qm_type,
367                   unsigned char *qm,
368                   int qm_length,
369                   struct gen7_mfd_context *gen7_mfd_context)
370 {
371     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
372     unsigned int qm_buffer[16];
373
374     assert(qm_length <= 16 * 4);
375     memcpy(qm_buffer, qm, qm_length);
376
377     BEGIN_BCS_BATCH(batch, 18);
378     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
379     OUT_BCS_BATCH(batch, qm_type << 0);
380     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
381     ADVANCE_BCS_BATCH(batch);
382 }
383
384 static void
385 gen8_mfd_avc_img_state(VADriverContextP ctx,
386                        struct decode_state *decode_state,
387                        struct gen7_mfd_context *gen7_mfd_context)
388 {
389     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
390     int img_struct;
391     int mbaff_frame_flag;
392     unsigned int width_in_mbs, height_in_mbs;
393     VAPictureParameterBufferH264 *pic_param;
394
395     assert(decode_state->pic_param && decode_state->pic_param->buffer);
396     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
397     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
398
399     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
400         img_struct = 1;
401     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
402         img_struct = 3;
403     else
404         img_struct = 0;
405
406     if ((img_struct & 0x1) == 0x1) {
407         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
408     } else {
409         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
410     }
411
412     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
413         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
414         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
415     } else {
416         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
417     }
418
419     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
420                         !pic_param->pic_fields.bits.field_pic_flag);
421
422     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
423     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
424
425     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
426     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
427            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
428     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
429
430     BEGIN_BCS_BATCH(batch, 17);
431     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
432     OUT_BCS_BATCH(batch, 
433                   width_in_mbs * height_in_mbs);
434     OUT_BCS_BATCH(batch, 
435                   ((height_in_mbs - 1) << 16) | 
436                   ((width_in_mbs - 1) << 0));
437     OUT_BCS_BATCH(batch, 
438                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
439                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
440                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
441                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
442                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
443                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
444                   (img_struct << 8));
445     OUT_BCS_BATCH(batch,
446                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
447                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
448                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
449                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
450                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
451                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
452                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
453                   (mbaff_frame_flag << 1) |
454                   (pic_param->pic_fields.bits.field_pic_flag << 0));
455     OUT_BCS_BATCH(batch, 0);
456     OUT_BCS_BATCH(batch, 0);
457     OUT_BCS_BATCH(batch, 0);
458     OUT_BCS_BATCH(batch, 0);
459     OUT_BCS_BATCH(batch, 0);
460     OUT_BCS_BATCH(batch, 0);
461     OUT_BCS_BATCH(batch, 0);
462     OUT_BCS_BATCH(batch, 0);
463     OUT_BCS_BATCH(batch, 0);
464     OUT_BCS_BATCH(batch, 0);
465     OUT_BCS_BATCH(batch, 0);
466     OUT_BCS_BATCH(batch, 0);
467     ADVANCE_BCS_BATCH(batch);
468 }
469
470 static void
471 gen8_mfd_avc_qm_state(VADriverContextP ctx,
472                       struct decode_state *decode_state,
473                       struct gen7_mfd_context *gen7_mfd_context)
474 {
475     VAIQMatrixBufferH264 *iq_matrix;
476     VAPictureParameterBufferH264 *pic_param;
477
478     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
479         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
480     else
481         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
482
483     assert(decode_state->pic_param && decode_state->pic_param->buffer);
484     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
485
486     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
487     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
488
489     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
490         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
491         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
492     }
493 }
494
495 static void
496 gen8_mfd_avc_picid_state(VADriverContextP ctx,
497                       struct decode_state *decode_state,
498                       struct gen7_mfd_context *gen7_mfd_context)
499 {
500     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
501
502     BEGIN_BCS_BATCH(batch, 10);
503     OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
504     OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
505     OUT_BCS_BATCH(batch, 0);
506     OUT_BCS_BATCH(batch, 0);
507     OUT_BCS_BATCH(batch, 0);
508     OUT_BCS_BATCH(batch, 0);
509     OUT_BCS_BATCH(batch, 0);
510     OUT_BCS_BATCH(batch, 0);
511     OUT_BCS_BATCH(batch, 0);
512     OUT_BCS_BATCH(batch, 0);
513     ADVANCE_BCS_BATCH(batch);
514 }
515
516 static void
517 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
518                               struct decode_state *decode_state,
519                               VAPictureParameterBufferH264 *pic_param,
520                               VASliceParameterBufferH264 *slice_param,
521                               struct gen7_mfd_context *gen7_mfd_context)
522 {
523     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
524     struct object_surface *obj_surface;
525     GenAvcSurface *gen7_avc_surface;
526     VAPictureH264 *va_pic;
527     int i, j;
528
529     BEGIN_BCS_BATCH(batch, 71);
530     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
531
532     /* reference surfaces 0..15 */
533     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
534         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
535             gen7_mfd_context->reference_surface[i].obj_surface &&
536             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
537
538             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
539             gen7_avc_surface = obj_surface->private_data;
540
541             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
542                           I915_GEM_DOMAIN_INSTRUCTION, 0,
543                           0);
544             OUT_BCS_BATCH(batch, 0);
545         } else {
546             OUT_BCS_BATCH(batch, 0);
547             OUT_BCS_BATCH(batch, 0);
548         }
549     }
550     
551     OUT_BCS_BATCH(batch, 0);
552
553     /* the current decoding frame/field */
554     va_pic = &pic_param->CurrPic;
555     obj_surface = decode_state->render_object;
556     assert(obj_surface->bo && obj_surface->private_data);
557     gen7_avc_surface = obj_surface->private_data;
558
559     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
560                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
561                   0);
562
563     OUT_BCS_BATCH(batch, 0);
564     OUT_BCS_BATCH(batch, 0);
565
566     /* POC List */
567     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
568         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
569             int found = 0;
570
571             assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
572
573             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
574                 va_pic = &pic_param->ReferenceFrames[j];
575                 
576                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
577                     continue;
578
579                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
580                     found = 1;
581                     break;
582                 }
583             }
584
585             assert(found == 1);
586             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
587             
588             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
589             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
590         } else {
591             OUT_BCS_BATCH(batch, 0);
592             OUT_BCS_BATCH(batch, 0);
593         }
594     }
595
596     va_pic = &pic_param->CurrPic;
597     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
598     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
599
600     ADVANCE_BCS_BATCH(batch);
601 }
602
603 static void
604 gen8_mfd_avc_slice_state(VADriverContextP ctx,
605                          VAPictureParameterBufferH264 *pic_param,
606                          VASliceParameterBufferH264 *slice_param,
607                          VASliceParameterBufferH264 *next_slice_param,
608                          struct gen7_mfd_context *gen7_mfd_context)
609 {
610     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
611     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
612     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
613     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
614     int num_ref_idx_l0, num_ref_idx_l1;
615     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
616                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
617     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
618     int slice_type;
619
620     if (slice_param->slice_type == SLICE_TYPE_I ||
621         slice_param->slice_type == SLICE_TYPE_SI) {
622         slice_type = SLICE_TYPE_I;
623     } else if (slice_param->slice_type == SLICE_TYPE_P ||
624                slice_param->slice_type == SLICE_TYPE_SP) {
625         slice_type = SLICE_TYPE_P;
626     } else { 
627         assert(slice_param->slice_type == SLICE_TYPE_B);
628         slice_type = SLICE_TYPE_B;
629     }
630
631     if (slice_type == SLICE_TYPE_I) {
632         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
633         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
634         num_ref_idx_l0 = 0;
635         num_ref_idx_l1 = 0;
636     } else if (slice_type == SLICE_TYPE_P) {
637         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
638         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
639         num_ref_idx_l1 = 0;
640     } else {
641         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
642         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
643     }
644
645     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
646     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
647     slice_ver_pos = first_mb_in_slice / width_in_mbs;
648
649     if (next_slice_param) {
650         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
651         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
652         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
653     } else {
654         next_slice_hor_pos = 0;
655         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
656     }
657
658     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
659     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
660     OUT_BCS_BATCH(batch, slice_type);
661     OUT_BCS_BATCH(batch, 
662                   (num_ref_idx_l1 << 24) |
663                   (num_ref_idx_l0 << 16) |
664                   (slice_param->chroma_log2_weight_denom << 8) |
665                   (slice_param->luma_log2_weight_denom << 0));
666     OUT_BCS_BATCH(batch, 
667                   (slice_param->direct_spatial_mv_pred_flag << 29) |
668                   (slice_param->disable_deblocking_filter_idc << 27) |
669                   (slice_param->cabac_init_idc << 24) |
670                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
671                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
672                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
673     OUT_BCS_BATCH(batch, 
674                   (slice_ver_pos << 24) |
675                   (slice_hor_pos << 16) | 
676                   (first_mb_in_slice << 0));
677     OUT_BCS_BATCH(batch,
678                   (next_slice_ver_pos << 16) |
679                   (next_slice_hor_pos << 0));
680     OUT_BCS_BATCH(batch, 
681                   (next_slice_param == NULL) << 19); /* last slice flag */
682     OUT_BCS_BATCH(batch, 0);
683     OUT_BCS_BATCH(batch, 0);
684     OUT_BCS_BATCH(batch, 0);
685     OUT_BCS_BATCH(batch, 0);
686     ADVANCE_BCS_BATCH(batch);
687 }
688
689 static inline void
690 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
691                            VAPictureParameterBufferH264 *pic_param,
692                            VASliceParameterBufferH264 *slice_param,
693                            struct gen7_mfd_context *gen7_mfd_context)
694 {
695     gen6_send_avc_ref_idx_state(
696         gen7_mfd_context->base.batch,
697         slice_param,
698         gen7_mfd_context->reference_surface
699     );
700 }
701
702 static void
703 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
704                                 VAPictureParameterBufferH264 *pic_param,
705                                 VASliceParameterBufferH264 *slice_param,
706                                 struct gen7_mfd_context *gen7_mfd_context)
707 {
708     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
709     int i, j, num_weight_offset_table = 0;
710     short weightoffsets[32 * 6];
711
712     if ((slice_param->slice_type == SLICE_TYPE_P ||
713          slice_param->slice_type == SLICE_TYPE_SP) &&
714         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
715         num_weight_offset_table = 1;
716     }
717     
718     if ((slice_param->slice_type == SLICE_TYPE_B) &&
719         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
720         num_weight_offset_table = 2;
721     }
722
723     for (i = 0; i < num_weight_offset_table; i++) {
724         BEGIN_BCS_BATCH(batch, 98);
725         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
726         OUT_BCS_BATCH(batch, i);
727
728         if (i == 0) {
729             for (j = 0; j < 32; j++) {
730                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
731                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
732                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
733                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
734                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
735                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
736             }
737         } else {
738             for (j = 0; j < 32; j++) {
739                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
740                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
741                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
742                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
743                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
744                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
745             }
746         }
747
748         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
749         ADVANCE_BCS_BATCH(batch);
750     }
751 }
752
753 static void
754 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
755                         VAPictureParameterBufferH264 *pic_param,
756                         VASliceParameterBufferH264 *slice_param,
757                         dri_bo *slice_data_bo,
758                         VASliceParameterBufferH264 *next_slice_param,
759                         struct gen7_mfd_context *gen7_mfd_context)
760 {
761     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
762     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
763                                                             slice_param,
764                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
765
766     /* the input bitsteam format on GEN7 differs from GEN6 */
767     BEGIN_BCS_BATCH(batch, 6);
768     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
769     OUT_BCS_BATCH(batch, 
770                   (slice_param->slice_data_size));
771     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
772     OUT_BCS_BATCH(batch,
773                   (0 << 31) |
774                   (0 << 14) |
775                   (0 << 12) |
776                   (0 << 10) |
777                   (0 << 8));
778     OUT_BCS_BATCH(batch,
779                   ((slice_data_bit_offset >> 3) << 16) |
780                   (1 << 7)  |
781                   (0 << 5)  |
782                   (0 << 4)  |
783                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
784                   (slice_data_bit_offset & 0x7));
785     OUT_BCS_BATCH(batch, 0);
786     ADVANCE_BCS_BATCH(batch);
787 }
788
789 static inline void
790 gen8_mfd_avc_context_init(
791     VADriverContextP         ctx,
792     struct gen7_mfd_context *gen7_mfd_context
793 )
794 {
795     /* Initialize flat scaling lists */
796     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
797 }
798
799 static void
800 gen8_mfd_avc_decode_init(VADriverContextP ctx,
801                          struct decode_state *decode_state,
802                          struct gen7_mfd_context *gen7_mfd_context)
803 {
804     VAPictureParameterBufferH264 *pic_param;
805     VASliceParameterBufferH264 *slice_param;
806     struct i965_driver_data *i965 = i965_driver_data(ctx);
807     struct object_surface *obj_surface;
808     dri_bo *bo;
809     int i, j, enable_avc_ildb = 0;
810     unsigned int width_in_mbs, height_in_mbs;
811
812     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
813         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
814         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
815
816         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
817             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
818             assert((slice_param->slice_type == SLICE_TYPE_I) ||
819                    (slice_param->slice_type == SLICE_TYPE_SI) ||
820                    (slice_param->slice_type == SLICE_TYPE_P) ||
821                    (slice_param->slice_type == SLICE_TYPE_SP) ||
822                    (slice_param->slice_type == SLICE_TYPE_B));
823
824             if (slice_param->disable_deblocking_filter_idc != 1) {
825                 enable_avc_ildb = 1;
826                 break;
827             }
828
829             slice_param++;
830         }
831     }
832
833     assert(decode_state->pic_param && decode_state->pic_param->buffer);
834     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
835     intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
836     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
837     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
838     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
839     assert(height_in_mbs > 0 && height_in_mbs <= 256);
840
841     /* Current decoded picture */
842     obj_surface = decode_state->render_object;
843     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
844     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
845     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
846
847     /* initial uv component for YUV400 case */
848     if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
849          unsigned int uv_offset = obj_surface->width * obj_surface->height; 
850          unsigned int uv_size   = obj_surface->width * obj_surface->height / 2; 
851
852          drm_intel_gem_bo_map_gtt(obj_surface->bo);
853          memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
854          drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
855     }
856
857     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
858
859     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
860     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
861     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
862     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
863
864     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
865     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
866     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
867     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
868
869     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
870     bo = dri_bo_alloc(i965->intel.bufmgr,
871                       "intra row store",
872                       width_in_mbs * 64,
873                       0x1000);
874     assert(bo);
875     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
876     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
877
878     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
879     bo = dri_bo_alloc(i965->intel.bufmgr,
880                       "deblocking filter row store",
881                       width_in_mbs * 64 * 4,
882                       0x1000);
883     assert(bo);
884     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
885     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
886
887     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
888     bo = dri_bo_alloc(i965->intel.bufmgr,
889                       "bsd mpc row store",
890                       width_in_mbs * 64 * 2,
891                       0x1000);
892     assert(bo);
893     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
894     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
895
896     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
897     bo = dri_bo_alloc(i965->intel.bufmgr,
898                       "mpr row store",
899                       width_in_mbs * 64 * 2,
900                       0x1000);
901     assert(bo);
902     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
903     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
904
905     gen7_mfd_context->bitplane_read_buffer.valid = 0;
906 }
907
908 static void
909 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
910                             struct decode_state *decode_state,
911                             struct gen7_mfd_context *gen7_mfd_context)
912 {
913     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
914     VAPictureParameterBufferH264 *pic_param;
915     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
916     dri_bo *slice_data_bo;
917     int i, j;
918
919     assert(decode_state->pic_param && decode_state->pic_param->buffer);
920     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
921     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
922
923     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
924     intel_batchbuffer_emit_mi_flush(batch);
925     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
926     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
927     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
928     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
929     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
930     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
931     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
932
933     for (j = 0; j < decode_state->num_slice_params; j++) {
934         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
935         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
936         slice_data_bo = decode_state->slice_datas[j]->bo;
937         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
938
939         if (j == decode_state->num_slice_params - 1)
940             next_slice_group_param = NULL;
941         else
942             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
943
944         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
945             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
946             assert((slice_param->slice_type == SLICE_TYPE_I) ||
947                    (slice_param->slice_type == SLICE_TYPE_SI) ||
948                    (slice_param->slice_type == SLICE_TYPE_P) ||
949                    (slice_param->slice_type == SLICE_TYPE_SP) ||
950                    (slice_param->slice_type == SLICE_TYPE_B));
951
952             if (i < decode_state->slice_params[j]->num_elements - 1)
953                 next_slice_param = slice_param + 1;
954             else
955                 next_slice_param = next_slice_group_param;
956
957             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
958             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
959             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
960             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
961             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
962             slice_param++;
963         }
964     }
965
966     intel_batchbuffer_end_atomic(batch);
967     intel_batchbuffer_flush(batch);
968 }
969
970 static void
971 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
972                            struct decode_state *decode_state,
973                            struct gen7_mfd_context *gen7_mfd_context)
974 {
975     VAPictureParameterBufferMPEG2 *pic_param;
976     struct i965_driver_data *i965 = i965_driver_data(ctx);
977     struct object_surface *obj_surface;
978     dri_bo *bo;
979     unsigned int width_in_mbs;
980
981     assert(decode_state->pic_param && decode_state->pic_param->buffer);
982     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
983     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
984
985     mpeg2_set_reference_surfaces(
986         ctx,
987         gen7_mfd_context->reference_surface,
988         decode_state,
989         pic_param
990     );
991
992     /* Current decoded picture */
993     obj_surface = decode_state->render_object;
994     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
995
996     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
997     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
998     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
999     gen7_mfd_context->pre_deblocking_output.valid = 1;
1000
1001     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1002     bo = dri_bo_alloc(i965->intel.bufmgr,
1003                       "bsd mpc row store",
1004                       width_in_mbs * 96,
1005                       0x1000);
1006     assert(bo);
1007     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1008     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1009
1010     gen7_mfd_context->post_deblocking_output.valid = 0;
1011     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1012     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1013     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1014     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1015 }
1016
1017 static void
1018 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1019                          struct decode_state *decode_state,
1020                          struct gen7_mfd_context *gen7_mfd_context)
1021 {
1022     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1023     VAPictureParameterBufferMPEG2 *pic_param;
1024     unsigned int slice_concealment_disable_bit = 0;
1025
1026     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1027     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1028
1029     slice_concealment_disable_bit = 1;
1030
1031     BEGIN_BCS_BATCH(batch, 13);
1032     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1033     OUT_BCS_BATCH(batch,
1034                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1035                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1036                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1037                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1038                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1039                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1040                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1041                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1042                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1043                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1044                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1045                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1046     OUT_BCS_BATCH(batch,
1047                   pic_param->picture_coding_type << 9);
1048     OUT_BCS_BATCH(batch,
1049                   (slice_concealment_disable_bit << 31) |
1050                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1051                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1052     OUT_BCS_BATCH(batch, 0);
1053     OUT_BCS_BATCH(batch, 0);
1054     OUT_BCS_BATCH(batch, 0);
1055     OUT_BCS_BATCH(batch, 0);
1056     OUT_BCS_BATCH(batch, 0);
1057     OUT_BCS_BATCH(batch, 0);
1058     OUT_BCS_BATCH(batch, 0);
1059     OUT_BCS_BATCH(batch, 0);
1060     OUT_BCS_BATCH(batch, 0);
1061     ADVANCE_BCS_BATCH(batch);
1062 }
1063
1064 static void
1065 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1066                         struct decode_state *decode_state,
1067                         struct gen7_mfd_context *gen7_mfd_context)
1068 {
1069     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1070     int i, j;
1071
1072     /* Update internal QM state */
1073     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1074         VAIQMatrixBufferMPEG2 * const iq_matrix =
1075             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1076
1077         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1078             iq_matrix->load_intra_quantiser_matrix) {
1079             gen_iq_matrix->load_intra_quantiser_matrix =
1080                 iq_matrix->load_intra_quantiser_matrix;
1081             if (iq_matrix->load_intra_quantiser_matrix) {
1082                 for (j = 0; j < 64; j++)
1083                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1084                         iq_matrix->intra_quantiser_matrix[j];
1085             }
1086         }
1087
1088         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1089             iq_matrix->load_non_intra_quantiser_matrix) {
1090             gen_iq_matrix->load_non_intra_quantiser_matrix =
1091                 iq_matrix->load_non_intra_quantiser_matrix;
1092             if (iq_matrix->load_non_intra_quantiser_matrix) {
1093                 for (j = 0; j < 64; j++)
1094                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1095                         iq_matrix->non_intra_quantiser_matrix[j];
1096             }
1097         }
1098     }
1099
1100     /* Commit QM state to HW */
1101     for (i = 0; i < 2; i++) {
1102         unsigned char *qm = NULL;
1103         int qm_type;
1104
1105         if (i == 0) {
1106             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1107                 qm = gen_iq_matrix->intra_quantiser_matrix;
1108                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1109             }
1110         } else {
1111             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1112                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1113                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1114             }
1115         }
1116
1117         if (!qm)
1118             continue;
1119
1120         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1121     }
1122 }
1123
1124 static void
1125 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1126                           VAPictureParameterBufferMPEG2 *pic_param,
1127                           VASliceParameterBufferMPEG2 *slice_param,
1128                           VASliceParameterBufferMPEG2 *next_slice_param,
1129                           struct gen7_mfd_context *gen7_mfd_context)
1130 {
1131     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1132     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1133     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1134
1135     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1136         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1137         is_field_pic = 1;
1138     is_field_pic_wa = is_field_pic &&
1139         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1140
1141     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1142     hpos0 = slice_param->slice_horizontal_position;
1143
1144     if (next_slice_param == NULL) {
1145         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1146         hpos1 = 0;
1147     } else {
1148         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1149         hpos1 = next_slice_param->slice_horizontal_position;
1150     }
1151
1152     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1153
1154     BEGIN_BCS_BATCH(batch, 5);
1155     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1156     OUT_BCS_BATCH(batch, 
1157                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1158     OUT_BCS_BATCH(batch, 
1159                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1160     OUT_BCS_BATCH(batch,
1161                   hpos0 << 24 |
1162                   vpos0 << 16 |
1163                   mb_count << 8 |
1164                   (next_slice_param == NULL) << 5 |
1165                   (next_slice_param == NULL) << 3 |
1166                   (slice_param->macroblock_offset & 0x7));
1167     OUT_BCS_BATCH(batch,
1168                   (slice_param->quantiser_scale_code << 24) |
1169                   (vpos1 << 8 | hpos1));
1170     ADVANCE_BCS_BATCH(batch);
1171 }
1172
1173 static void
1174 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1175                               struct decode_state *decode_state,
1176                               struct gen7_mfd_context *gen7_mfd_context)
1177 {
1178     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1179     VAPictureParameterBufferMPEG2 *pic_param;
1180     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1181     dri_bo *slice_data_bo;
1182     int i, j;
1183
1184     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1185     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1186
1187     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1188     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1189     intel_batchbuffer_emit_mi_flush(batch);
1190     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1191     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1192     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1193     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1194     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1195     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1196
1197     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1198         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1199             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1200
1201     for (j = 0; j < decode_state->num_slice_params; j++) {
1202         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1203         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1204         slice_data_bo = decode_state->slice_datas[j]->bo;
1205         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1206
1207         if (j == decode_state->num_slice_params - 1)
1208             next_slice_group_param = NULL;
1209         else
1210             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1211
1212         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1213             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1214
1215             if (i < decode_state->slice_params[j]->num_elements - 1)
1216                 next_slice_param = slice_param + 1;
1217             else
1218                 next_slice_param = next_slice_group_param;
1219
1220             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1221             slice_param++;
1222         }
1223     }
1224
1225     intel_batchbuffer_end_atomic(batch);
1226     intel_batchbuffer_flush(batch);
1227 }
1228
1229 static const int va_to_gen7_vc1_pic_type[5] = {
1230     GEN7_VC1_I_PICTURE,
1231     GEN7_VC1_P_PICTURE,
1232     GEN7_VC1_B_PICTURE,
1233     GEN7_VC1_BI_PICTURE,
1234     GEN7_VC1_P_PICTURE,
1235 };
1236
1237 static const int va_to_gen7_vc1_mv[4] = {
1238     1, /* 1-MV */
1239     2, /* 1-MV half-pel */
1240     3, /* 1-MV half-pef bilinear */
1241     0, /* Mixed MV */
1242 };
1243
1244 static const int b_picture_scale_factor[21] = {
1245     128, 85,  170, 64,  192,
1246     51,  102, 153, 204, 43,
1247     215, 37,  74,  111, 148,
1248     185, 222, 32,  96,  160, 
1249     224,
1250 };
1251
1252 static const int va_to_gen7_vc1_condover[3] = {
1253     0,
1254     2,
1255     3
1256 };
1257
1258 static const int va_to_gen7_vc1_profile[4] = {
1259     GEN7_VC1_SIMPLE_PROFILE,
1260     GEN7_VC1_MAIN_PROFILE,
1261     GEN7_VC1_RESERVED_PROFILE,
1262     GEN7_VC1_ADVANCED_PROFILE
1263 };
1264
1265 static void 
1266 gen8_mfd_free_vc1_surface(void **data)
1267 {
1268     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1269
1270     if (!gen7_vc1_surface)
1271         return;
1272
1273     dri_bo_unreference(gen7_vc1_surface->dmv);
1274     free(gen7_vc1_surface);
1275     *data = NULL;
1276 }
1277
1278 static void
1279 gen8_mfd_init_vc1_surface(VADriverContextP ctx, 
1280                           VAPictureParameterBufferVC1 *pic_param,
1281                           struct object_surface *obj_surface)
1282 {
1283     struct i965_driver_data *i965 = i965_driver_data(ctx);
1284     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1285     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1286     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1287
1288     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1289
1290     if (!gen7_vc1_surface) {
1291         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1292         assert((obj_surface->size & 0x3f) == 0);
1293         obj_surface->private_data = gen7_vc1_surface;
1294     }
1295
1296     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1297
1298     if (gen7_vc1_surface->dmv == NULL) {
1299         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1300                                              "direct mv w/r buffer",
1301                                              width_in_mbs * height_in_mbs * 64,
1302                                              0x1000);
1303     }
1304 }
1305
1306 static void
1307 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1308                          struct decode_state *decode_state,
1309                          struct gen7_mfd_context *gen7_mfd_context)
1310 {
1311     VAPictureParameterBufferVC1 *pic_param;
1312     struct i965_driver_data *i965 = i965_driver_data(ctx);
1313     struct object_surface *obj_surface;
1314     dri_bo *bo;
1315     int width_in_mbs;
1316     int picture_type;
1317
1318     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1319     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1320     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1321     picture_type = pic_param->picture_fields.bits.picture_type;
1322  
1323     intel_update_vc1_frame_store_index(ctx,
1324                                        decode_state,
1325                                        pic_param,
1326                                        gen7_mfd_context->reference_surface);
1327
1328     /* Current decoded picture */
1329     obj_surface = decode_state->render_object;
1330     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1331     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1332
1333     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1334     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1335     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1336     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1337
1338     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1339     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1340     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1341     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1342
1343     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1344     bo = dri_bo_alloc(i965->intel.bufmgr,
1345                       "intra row store",
1346                       width_in_mbs * 64,
1347                       0x1000);
1348     assert(bo);
1349     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1350     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1351
1352     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1353     bo = dri_bo_alloc(i965->intel.bufmgr,
1354                       "deblocking filter row store",
1355                       width_in_mbs * 7 * 64,
1356                       0x1000);
1357     assert(bo);
1358     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1359     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1360
1361     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1362     bo = dri_bo_alloc(i965->intel.bufmgr,
1363                       "bsd mpc row store",
1364                       width_in_mbs * 96,
1365                       0x1000);
1366     assert(bo);
1367     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1368     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1369
1370     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1371
1372     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1373     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1374     
1375     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1376         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1377         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1378         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1379         int src_w, src_h;
1380         uint8_t *src = NULL, *dst = NULL;
1381
1382         assert(decode_state->bit_plane->buffer);
1383         src = decode_state->bit_plane->buffer;
1384
1385         bo = dri_bo_alloc(i965->intel.bufmgr,
1386                           "VC-1 Bitplane",
1387                           bitplane_width * height_in_mbs,
1388                           0x1000);
1389         assert(bo);
1390         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1391
1392         dri_bo_map(bo, True);
1393         assert(bo->virtual);
1394         dst = bo->virtual;
1395
1396         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1397             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1398                 int src_index, dst_index;
1399                 int src_shift;
1400                 uint8_t src_value;
1401
1402                 src_index = (src_h * width_in_mbs + src_w) / 2;
1403                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1404                 src_value = ((src[src_index] >> src_shift) & 0xf);
1405
1406                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1407                     src_value |= 0x2;
1408                 }
1409
1410                 dst_index = src_w / 2;
1411                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1412             }
1413
1414             if (src_w & 1)
1415                 dst[src_w / 2] >>= 4;
1416
1417             dst += bitplane_width;
1418         }
1419
1420         dri_bo_unmap(bo);
1421     } else
1422         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1423 }
1424
1425 static void
1426 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1427                        struct decode_state *decode_state,
1428                        struct gen7_mfd_context *gen7_mfd_context)
1429 {
1430     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1431     VAPictureParameterBufferVC1 *pic_param;
1432     struct object_surface *obj_surface;
1433     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1434     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1435     int unified_mv_mode;
1436     int ref_field_pic_polarity = 0;
1437     int scale_factor = 0;
1438     int trans_ac_y = 0;
1439     int dmv_surface_valid = 0;
1440     int brfd = 0;
1441     int fcm = 0;
1442     int picture_type;
1443     int profile;
1444     int overlap;
1445     int interpolation_mode = 0;
1446
1447     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1448     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1449
1450     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1451     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1452     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1453     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1454     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1455     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1456     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1457     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1458
1459     if (dquant == 0) {
1460         alt_pquant_config = 0;
1461         alt_pquant_edge_mask = 0;
1462     } else if (dquant == 2) {
1463         alt_pquant_config = 1;
1464         alt_pquant_edge_mask = 0xf;
1465     } else {
1466         assert(dquant == 1);
1467         if (dquantfrm == 0) {
1468             alt_pquant_config = 0;
1469             alt_pquant_edge_mask = 0;
1470             alt_pq = 0;
1471         } else {
1472             assert(dquantfrm == 1);
1473             alt_pquant_config = 1;
1474
1475             switch (dqprofile) {
1476             case 3:
1477                 if (dqbilevel == 0) {
1478                     alt_pquant_config = 2;
1479                     alt_pquant_edge_mask = 0;
1480                 } else {
1481                     assert(dqbilevel == 1);
1482                     alt_pquant_config = 3;
1483                     alt_pquant_edge_mask = 0;
1484                 }
1485                 break;
1486                 
1487             case 0:
1488                 alt_pquant_edge_mask = 0xf;
1489                 break;
1490
1491             case 1:
1492                 if (dqdbedge == 3)
1493                     alt_pquant_edge_mask = 0x9;
1494                 else
1495                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1496
1497                 break;
1498
1499             case 2:
1500                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1501                 break;
1502
1503             default:
1504                 assert(0);
1505             }
1506         }
1507     }
1508
1509     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1510         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1511         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1512     } else {
1513         assert(pic_param->mv_fields.bits.mv_mode < 4);
1514         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1515     }
1516
1517     if (pic_param->sequence_fields.bits.interlace == 1 &&
1518         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1519         /* FIXME: calculate reference field picture polarity */
1520         assert(0);
1521         ref_field_pic_polarity = 0;
1522     }
1523
1524     if (pic_param->b_picture_fraction < 21)
1525         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1526
1527     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1528     
1529     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1530         picture_type == GEN7_VC1_I_PICTURE)
1531         picture_type = GEN7_VC1_BI_PICTURE;
1532
1533     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1534         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1535     else {
1536         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1537
1538         /*
1539          * 8.3.6.2.1 Transform Type Selection
1540          * If variable-sized transform coding is not enabled,
1541          * then the 8x8 transform shall be used for all blocks.
1542          * it is also MFX_VC1_PIC_STATE requirement.
1543          */
1544         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1545             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1546             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1547         }
1548     }
1549
1550     if (picture_type == GEN7_VC1_B_PICTURE) {
1551         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1552
1553         obj_surface = decode_state->reference_objects[1];
1554
1555         if (obj_surface)
1556             gen7_vc1_surface = obj_surface->private_data;
1557
1558         if (!gen7_vc1_surface || 
1559             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1560              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1561             dmv_surface_valid = 0;
1562         else
1563             dmv_surface_valid = 1;
1564     }
1565
1566     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1567
1568     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1569         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1570     else {
1571         if (pic_param->picture_fields.bits.top_field_first)
1572             fcm = 2;
1573         else
1574             fcm = 3;
1575     }
1576
1577     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1578         brfd = pic_param->reference_fields.bits.reference_distance;
1579         brfd = (scale_factor * brfd) >> 8;
1580         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1581
1582         if (brfd < 0)
1583             brfd = 0;
1584     }
1585
1586     overlap = 0;
1587     if (profile != GEN7_VC1_ADVANCED_PROFILE){
1588         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1589             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1590             overlap = 1; 
1591         }
1592     }else {
1593         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1594              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1595               overlap = 1; 
1596         }
1597         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1598             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1599              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1600                 overlap = 1; 
1601              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1602                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1603                  overlap = 1;
1604              }
1605         }
1606     } 
1607
1608     assert(pic_param->conditional_overlap_flag < 3);
1609     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1610
1611     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1612         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1613          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1614         interpolation_mode = 9; /* Half-pel bilinear */
1615     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1616              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1617               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1618         interpolation_mode = 1; /* Half-pel bicubic */
1619     else
1620         interpolation_mode = 0; /* Quarter-pel bicubic */
1621
1622     BEGIN_BCS_BATCH(batch, 6);
1623     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1624     OUT_BCS_BATCH(batch,
1625                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1626                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1627     OUT_BCS_BATCH(batch,
1628                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1629                   dmv_surface_valid << 15 |
1630                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1631                   pic_param->rounding_control << 13 |
1632                   pic_param->sequence_fields.bits.syncmarker << 12 |
1633                   interpolation_mode << 8 |
1634                   0 << 7 | /* FIXME: scale up or down ??? */
1635                   pic_param->range_reduction_frame << 6 |
1636                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1637                   overlap << 4 |
1638                   !pic_param->picture_fields.bits.is_first_field << 3 |
1639                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1640     OUT_BCS_BATCH(batch,
1641                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1642                   picture_type << 26 |
1643                   fcm << 24 |
1644                   alt_pq << 16 |
1645                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1646                   scale_factor << 0);
1647     OUT_BCS_BATCH(batch,
1648                   unified_mv_mode << 28 |
1649                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1650                   pic_param->fast_uvmc_flag << 26 |
1651                   ref_field_pic_polarity << 25 |
1652                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1653                   pic_param->reference_fields.bits.reference_distance << 20 |
1654                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1655                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1656                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1657                   alt_pquant_edge_mask << 4 |
1658                   alt_pquant_config << 2 |
1659                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1660                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1661     OUT_BCS_BATCH(batch,
1662                   !!pic_param->bitplane_present.value << 31 |
1663                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1664                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1665                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1666                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1667                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1668                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1669                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1670                   pic_param->mv_fields.bits.mv_table << 20 |
1671                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1672                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1673                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1674                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1675                   pic_param->mb_mode_table << 8 |
1676                   trans_ac_y << 6 |
1677                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1678                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1679                   pic_param->cbp_table << 0);
1680     ADVANCE_BCS_BATCH(batch);
1681 }
1682
1683 static void
1684 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1685                              struct decode_state *decode_state,
1686                              struct gen7_mfd_context *gen7_mfd_context)
1687 {
1688     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1689     VAPictureParameterBufferVC1 *pic_param;
1690     int intensitycomp_single;
1691
1692     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1693     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1694
1695     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1696     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1697     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1698
1699     BEGIN_BCS_BATCH(batch, 6);
1700     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1701     OUT_BCS_BATCH(batch,
1702                   0 << 14 | /* FIXME: double ??? */
1703                   0 << 12 |
1704                   intensitycomp_single << 10 |
1705                   intensitycomp_single << 8 |
1706                   0 << 4 | /* FIXME: interlace mode */
1707                   0);
1708     OUT_BCS_BATCH(batch,
1709                   pic_param->luma_shift << 16 |
1710                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1711     OUT_BCS_BATCH(batch, 0);
1712     OUT_BCS_BATCH(batch, 0);
1713     OUT_BCS_BATCH(batch, 0);
1714     ADVANCE_BCS_BATCH(batch);
1715 }
1716
1717 static void
1718 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1719                               struct decode_state *decode_state,
1720                               struct gen7_mfd_context *gen7_mfd_context)
1721 {
1722     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1723     struct object_surface *obj_surface;
1724     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1725
1726     obj_surface = decode_state->render_object;
1727
1728     if (obj_surface && obj_surface->private_data) {
1729         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1730     }
1731
1732     obj_surface = decode_state->reference_objects[1];
1733
1734     if (obj_surface && obj_surface->private_data) {
1735         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1736     }
1737
1738     BEGIN_BCS_BATCH(batch, 7);
1739     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1740
1741     if (dmv_write_buffer)
1742         OUT_BCS_RELOC(batch, dmv_write_buffer,
1743                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1744                       0);
1745     else
1746         OUT_BCS_BATCH(batch, 0);
1747
1748     OUT_BCS_BATCH(batch, 0);
1749     OUT_BCS_BATCH(batch, 0);
1750
1751     if (dmv_read_buffer)
1752         OUT_BCS_RELOC(batch, dmv_read_buffer,
1753                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1754                       0);
1755     else
1756         OUT_BCS_BATCH(batch, 0);
1757     
1758     OUT_BCS_BATCH(batch, 0);
1759     OUT_BCS_BATCH(batch, 0);
1760                   
1761     ADVANCE_BCS_BATCH(batch);
1762 }
1763
1764 static int
1765 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1766 {
1767     int out_slice_data_bit_offset;
1768     int slice_header_size = in_slice_data_bit_offset / 8;
1769     int i, j;
1770
1771     if (profile != 3)
1772         out_slice_data_bit_offset = in_slice_data_bit_offset;
1773     else {
1774         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1775             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1776                 i++, j += 2;
1777             }
1778         }
1779
1780         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1781     }
1782
1783     return out_slice_data_bit_offset;
1784 }
1785
1786 static void
1787 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1788                         VAPictureParameterBufferVC1 *pic_param,
1789                         VASliceParameterBufferVC1 *slice_param,
1790                         VASliceParameterBufferVC1 *next_slice_param,
1791                         dri_bo *slice_data_bo,
1792                         struct gen7_mfd_context *gen7_mfd_context)
1793 {
1794     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1795     int next_slice_start_vert_pos;
1796     int macroblock_offset;
1797     uint8_t *slice_data = NULL;
1798
1799     dri_bo_map(slice_data_bo, 0);
1800     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1801     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1802                                                                slice_param->macroblock_offset,
1803                                                                pic_param->sequence_fields.bits.profile);
1804     dri_bo_unmap(slice_data_bo);
1805
1806     if (next_slice_param)
1807         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1808     else
1809         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1810
1811     BEGIN_BCS_BATCH(batch, 5);
1812     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1813     OUT_BCS_BATCH(batch, 
1814                   slice_param->slice_data_size - (macroblock_offset >> 3));
1815     OUT_BCS_BATCH(batch, 
1816                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1817     OUT_BCS_BATCH(batch,
1818                   slice_param->slice_vertical_position << 16 |
1819                   next_slice_start_vert_pos << 0);
1820     OUT_BCS_BATCH(batch,
1821                   (macroblock_offset & 0x7));
1822     ADVANCE_BCS_BATCH(batch);
1823 }
1824
1825 static void
1826 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1827                             struct decode_state *decode_state,
1828                             struct gen7_mfd_context *gen7_mfd_context)
1829 {
1830     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1831     VAPictureParameterBufferVC1 *pic_param;
1832     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1833     dri_bo *slice_data_bo;
1834     int i, j;
1835
1836     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1837     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1838
1839     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1840     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1841     intel_batchbuffer_emit_mi_flush(batch);
1842     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1843     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1844     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1845     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1846     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1847     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1848     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1849
1850     for (j = 0; j < decode_state->num_slice_params; j++) {
1851         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1852         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1853         slice_data_bo = decode_state->slice_datas[j]->bo;
1854         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1855
1856         if (j == decode_state->num_slice_params - 1)
1857             next_slice_group_param = NULL;
1858         else
1859             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1860
1861         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1862             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1863
1864             if (i < decode_state->slice_params[j]->num_elements - 1)
1865                 next_slice_param = slice_param + 1;
1866             else
1867                 next_slice_param = next_slice_group_param;
1868
1869             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1870             slice_param++;
1871         }
1872     }
1873
1874     intel_batchbuffer_end_atomic(batch);
1875     intel_batchbuffer_flush(batch);
1876 }
1877
1878 static void
1879 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1880                           struct decode_state *decode_state,
1881                           struct gen7_mfd_context *gen7_mfd_context)
1882 {
1883     struct object_surface *obj_surface;
1884     VAPictureParameterBufferJPEGBaseline *pic_param;
1885     int subsampling = SUBSAMPLE_YUV420;
1886
1887     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1888
1889     if (pic_param->num_components == 1)
1890         subsampling = SUBSAMPLE_YUV400;
1891     else if (pic_param->num_components == 3) {
1892         int h1 = pic_param->components[0].h_sampling_factor;
1893         int h2 = pic_param->components[1].h_sampling_factor;
1894         int h3 = pic_param->components[2].h_sampling_factor;
1895         int v1 = pic_param->components[0].v_sampling_factor;
1896         int v2 = pic_param->components[1].v_sampling_factor;
1897         int v3 = pic_param->components[2].v_sampling_factor;
1898
1899         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1900             v1 == 2 && v2 == 1 && v3 == 1)
1901             subsampling = SUBSAMPLE_YUV420;
1902         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1903                  v1 == 1 && v2 == 1 && v3 == 1)
1904             subsampling = SUBSAMPLE_YUV422H;
1905         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1906                  v1 == 1 && v2 == 1 && v3 == 1)
1907             subsampling = SUBSAMPLE_YUV444;
1908         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1909                  v1 == 1 && v2 == 1 && v3 == 1)
1910             subsampling = SUBSAMPLE_YUV411;
1911         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1912                  v1 == 2 && v2 == 1 && v3 == 1)
1913             subsampling = SUBSAMPLE_YUV422V;
1914         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1915                  v1 == 2 && v2 == 2 && v3 == 2)
1916             subsampling = SUBSAMPLE_YUV422H;
1917         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1918                  v1 == 2 && v2 == 1 && v3 == 1)
1919             subsampling = SUBSAMPLE_YUV422V;
1920         else
1921             assert(0);
1922     } else {
1923         assert(0);
1924     }
1925
1926     /* Current decoded picture */
1927     obj_surface = decode_state->render_object;
1928     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling);
1929
1930     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1931     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1932     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1933     gen7_mfd_context->pre_deblocking_output.valid = 1;
1934
1935     gen7_mfd_context->post_deblocking_output.bo = NULL;
1936     gen7_mfd_context->post_deblocking_output.valid = 0;
1937
1938     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1939     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1940
1941     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1942     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1943
1944     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1945     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1946
1947     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1948     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1949
1950     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1951     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1952 }
1953
1954 static const int va_to_gen7_jpeg_rotation[4] = {
1955     GEN7_JPEG_ROTATION_0,
1956     GEN7_JPEG_ROTATION_90,
1957     GEN7_JPEG_ROTATION_180,
1958     GEN7_JPEG_ROTATION_270
1959 };
1960
1961 static void
1962 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1963                         struct decode_state *decode_state,
1964                         struct gen7_mfd_context *gen7_mfd_context)
1965 {
1966     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1967     VAPictureParameterBufferJPEGBaseline *pic_param;
1968     int chroma_type = GEN7_YUV420;
1969     int frame_width_in_blks;
1970     int frame_height_in_blks;
1971
1972     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1973     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1974
1975     if (pic_param->num_components == 1)
1976         chroma_type = GEN7_YUV400;
1977     else if (pic_param->num_components == 3) {
1978         int h1 = pic_param->components[0].h_sampling_factor;
1979         int h2 = pic_param->components[1].h_sampling_factor;
1980         int h3 = pic_param->components[2].h_sampling_factor;
1981         int v1 = pic_param->components[0].v_sampling_factor;
1982         int v2 = pic_param->components[1].v_sampling_factor;
1983         int v3 = pic_param->components[2].v_sampling_factor;
1984
1985         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1986             v1 == 2 && v2 == 1 && v3 == 1)
1987             chroma_type = GEN7_YUV420;
1988         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1989                  v1 == 1 && v2 == 1 && v3 == 1)
1990             chroma_type = GEN7_YUV422H_2Y;
1991         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1992                  v1 == 1 && v2 == 1 && v3 == 1)
1993             chroma_type = GEN7_YUV444;
1994         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1995                  v1 == 1 && v2 == 1 && v3 == 1)
1996             chroma_type = GEN7_YUV411;
1997         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1998                  v1 == 2 && v2 == 1 && v3 == 1)
1999             chroma_type = GEN7_YUV422V_2Y;
2000         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2001                  v1 == 2 && v2 == 2 && v3 == 2)
2002             chroma_type = GEN7_YUV422H_4Y;
2003         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2004                  v1 == 2 && v2 == 1 && v3 == 1)
2005             chroma_type = GEN7_YUV422V_4Y;
2006         else
2007             assert(0);
2008     }
2009
2010     if (chroma_type == GEN7_YUV400 ||
2011         chroma_type == GEN7_YUV444 ||
2012         chroma_type == GEN7_YUV422V_2Y) {
2013         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2014         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2015     } else if (chroma_type == GEN7_YUV411) {
2016         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2017         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2018     } else {
2019         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2020         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2021     }
2022
2023     BEGIN_BCS_BATCH(batch, 3);
2024     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2025     OUT_BCS_BATCH(batch,
2026                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2027                   (chroma_type << 0));
2028     OUT_BCS_BATCH(batch,
2029                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2030                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2031     ADVANCE_BCS_BATCH(batch);
2032 }
2033
2034 static const int va_to_gen7_jpeg_hufftable[2] = {
2035     MFX_HUFFTABLE_ID_Y,
2036     MFX_HUFFTABLE_ID_UV
2037 };
2038
2039 static void
2040 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2041                                struct decode_state *decode_state,
2042                                struct gen7_mfd_context *gen7_mfd_context,
2043                                int num_tables)
2044 {
2045     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2046     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2047     int index;
2048
2049     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2050         return;
2051
2052     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2053
2054     for (index = 0; index < num_tables; index++) {
2055         int id = va_to_gen7_jpeg_hufftable[index];
2056         if (!huffman_table->load_huffman_table[index])
2057             continue;
2058         BEGIN_BCS_BATCH(batch, 53);
2059         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2060         OUT_BCS_BATCH(batch, id);
2061         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2062         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2063         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2064         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2065         ADVANCE_BCS_BATCH(batch);
2066     }
2067 }
2068
2069 static const int va_to_gen7_jpeg_qm[5] = {
2070     -1,
2071     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2072     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2073     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2074     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2075 };
2076
2077 static void
2078 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2079                        struct decode_state *decode_state,
2080                        struct gen7_mfd_context *gen7_mfd_context)
2081 {
2082     VAPictureParameterBufferJPEGBaseline *pic_param;
2083     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2084     int index;
2085
2086     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2087         return;
2088
2089     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2090     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2091
2092     assert(pic_param->num_components <= 3);
2093
2094     for (index = 0; index < pic_param->num_components; index++) {
2095         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2096         int qm_type;
2097         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2098         unsigned char raster_qm[64];
2099         int j;
2100
2101         if (id > 4 || id < 1)
2102             continue;
2103
2104         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2105             continue;
2106
2107         qm_type = va_to_gen7_jpeg_qm[id];
2108
2109         for (j = 0; j < 64; j++)
2110             raster_qm[zigzag_direct[j]] = qm[j];
2111
2112         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2113     }
2114 }
2115
2116 static void
2117 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2118                          VAPictureParameterBufferJPEGBaseline *pic_param,
2119                          VASliceParameterBufferJPEGBaseline *slice_param,
2120                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2121                          dri_bo *slice_data_bo,
2122                          struct gen7_mfd_context *gen7_mfd_context)
2123 {
2124     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2125     int scan_component_mask = 0;
2126     int i;
2127
2128     assert(slice_param->num_components > 0);
2129     assert(slice_param->num_components < 4);
2130     assert(slice_param->num_components <= pic_param->num_components);
2131
2132     for (i = 0; i < slice_param->num_components; i++) {
2133         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2134         case 1:
2135             scan_component_mask |= (1 << 0);
2136             break;
2137         case 2:
2138             scan_component_mask |= (1 << 1);
2139             break;
2140         case 3:
2141             scan_component_mask |= (1 << 2);
2142             break;
2143         default:
2144             assert(0);
2145             break;
2146         }
2147     }
2148
2149     BEGIN_BCS_BATCH(batch, 6);
2150     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2151     OUT_BCS_BATCH(batch, 
2152                   slice_param->slice_data_size);
2153     OUT_BCS_BATCH(batch, 
2154                   slice_param->slice_data_offset);
2155     OUT_BCS_BATCH(batch,
2156                   slice_param->slice_horizontal_position << 16 |
2157                   slice_param->slice_vertical_position << 0);
2158     OUT_BCS_BATCH(batch,
2159                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2160                   (scan_component_mask << 27) |                 /* scan components */
2161                   (0 << 26) |   /* disable interrupt allowed */
2162                   (slice_param->num_mcus << 0));                /* MCU count */
2163     OUT_BCS_BATCH(batch,
2164                   (slice_param->restart_interval << 0));    /* RestartInterval */
2165     ADVANCE_BCS_BATCH(batch);
2166 }
2167
2168 /* Workaround for JPEG decoding on Ivybridge */
2169 #ifdef JPEG_WA
2170
2171 VAStatus 
2172 i965_DestroySurfaces(VADriverContextP ctx,
2173                      VASurfaceID *surface_list,
2174                      int num_surfaces);
2175 VAStatus 
2176 i965_CreateSurfaces(VADriverContextP ctx,
2177                     int width,
2178                     int height,
2179                     int format,
2180                     int num_surfaces,
2181                     VASurfaceID *surfaces);
2182
2183 static struct {
2184     int width;
2185     int height;
2186     unsigned char data[32];
2187     int data_size;
2188     int data_bit_offset;
2189     int qp;
2190 } gen7_jpeg_wa_clip = {
2191     16,
2192     16,
2193     {
2194         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2195         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2196     },
2197     14,
2198     40,
2199     28,
2200 };
2201
2202 static void
2203 gen8_jpeg_wa_init(VADriverContextP ctx,
2204                   struct gen7_mfd_context *gen7_mfd_context)
2205 {
2206     struct i965_driver_data *i965 = i965_driver_data(ctx);
2207     VAStatus status;
2208     struct object_surface *obj_surface;
2209
2210     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2211         i965_DestroySurfaces(ctx,
2212                              &gen7_mfd_context->jpeg_wa_surface_id,
2213                              1);
2214
2215     status = i965_CreateSurfaces(ctx,
2216                                  gen7_jpeg_wa_clip.width,
2217                                  gen7_jpeg_wa_clip.height,
2218                                  VA_RT_FORMAT_YUV420,
2219                                  1,
2220                                  &gen7_mfd_context->jpeg_wa_surface_id);
2221     assert(status == VA_STATUS_SUCCESS);
2222
2223     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2224     assert(obj_surface);
2225     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
2226     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2227
2228     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2229         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2230                                                                "JPEG WA data",
2231                                                                0x1000,
2232                                                                0x1000);
2233         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2234                        0,
2235                        gen7_jpeg_wa_clip.data_size,
2236                        gen7_jpeg_wa_clip.data);
2237     }
2238 }
2239
2240 static void
2241 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2242                               struct gen7_mfd_context *gen7_mfd_context)
2243 {
2244     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2245
2246     BEGIN_BCS_BATCH(batch, 5);
2247     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2248     OUT_BCS_BATCH(batch,
2249                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2250                   (MFD_MODE_VLD << 15) | /* VLD mode */
2251                   (0 << 10) | /* disable Stream-Out */
2252                   (0 << 9)  | /* Post Deblocking Output */
2253                   (1 << 8)  | /* Pre Deblocking Output */
2254                   (0 << 5)  | /* not in stitch mode */
2255                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2256                   (MFX_FORMAT_AVC << 0));
2257     OUT_BCS_BATCH(batch,
2258                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2259                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2260                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2261                   (0 << 1)  |
2262                   (0 << 0));
2263     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2264     OUT_BCS_BATCH(batch, 0); /* reserved */
2265     ADVANCE_BCS_BATCH(batch);
2266 }
2267
2268 static void
2269 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2270                            struct gen7_mfd_context *gen7_mfd_context)
2271 {
2272     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2273     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2274
2275     BEGIN_BCS_BATCH(batch, 6);
2276     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2277     OUT_BCS_BATCH(batch, 0);
2278     OUT_BCS_BATCH(batch,
2279                   ((obj_surface->orig_width - 1) << 18) |
2280                   ((obj_surface->orig_height - 1) << 4));
2281     OUT_BCS_BATCH(batch,
2282                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2283                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2284                   (0 << 22) | /* surface object control state, ignored */
2285                   ((obj_surface->width - 1) << 3) | /* pitch */
2286                   (0 << 2)  | /* must be 0 */
2287                   (1 << 1)  | /* must be tiled */
2288                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2289     OUT_BCS_BATCH(batch,
2290                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2291                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2292     OUT_BCS_BATCH(batch,
2293                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2294                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2295     ADVANCE_BCS_BATCH(batch);
2296 }
2297
2298 static void
2299 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2300                                  struct gen7_mfd_context *gen7_mfd_context)
2301 {
2302     struct i965_driver_data *i965 = i965_driver_data(ctx);
2303     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2304     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2305     dri_bo *intra_bo;
2306     int i;
2307
2308     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2309                             "intra row store",
2310                             128 * 64,
2311                             0x1000);
2312
2313     BEGIN_BCS_BATCH(batch, 61);
2314     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2315     OUT_BCS_RELOC(batch,
2316                   obj_surface->bo,
2317                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2318                   0);
2319         OUT_BCS_BATCH(batch, 0);
2320         OUT_BCS_BATCH(batch, 0);
2321     
2322
2323     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2324         OUT_BCS_BATCH(batch, 0);
2325         OUT_BCS_BATCH(batch, 0);
2326
2327         /* uncompressed-video & stream out 7-12 */
2328     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2329     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2330         OUT_BCS_BATCH(batch, 0);
2331         OUT_BCS_BATCH(batch, 0);
2332         OUT_BCS_BATCH(batch, 0);
2333         OUT_BCS_BATCH(batch, 0);
2334
2335         /* the DW 13-15 is for intra row store scratch */
2336     OUT_BCS_RELOC(batch,
2337                   intra_bo,
2338                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2339                   0);
2340         OUT_BCS_BATCH(batch, 0);
2341         OUT_BCS_BATCH(batch, 0);
2342
2343         /* the DW 16-18 is for deblocking filter */ 
2344     OUT_BCS_BATCH(batch, 0);
2345         OUT_BCS_BATCH(batch, 0);
2346         OUT_BCS_BATCH(batch, 0);
2347
2348     /* DW 19..50 */
2349     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2350         OUT_BCS_BATCH(batch, 0);
2351         OUT_BCS_BATCH(batch, 0);
2352     }
2353     OUT_BCS_BATCH(batch, 0);
2354
2355         /* the DW52-54 is for mb status address */
2356     OUT_BCS_BATCH(batch, 0);
2357         OUT_BCS_BATCH(batch, 0);
2358         OUT_BCS_BATCH(batch, 0);
2359         /* the DW56-60 is for ILDB & second ILDB address */
2360     OUT_BCS_BATCH(batch, 0);
2361         OUT_BCS_BATCH(batch, 0);
2362         OUT_BCS_BATCH(batch, 0);
2363     OUT_BCS_BATCH(batch, 0);
2364         OUT_BCS_BATCH(batch, 0);
2365         OUT_BCS_BATCH(batch, 0);
2366
2367     ADVANCE_BCS_BATCH(batch);
2368
2369     dri_bo_unreference(intra_bo);
2370 }
2371
2372 static void
2373 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2374                                      struct gen7_mfd_context *gen7_mfd_context)
2375 {
2376     struct i965_driver_data *i965 = i965_driver_data(ctx);
2377     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2378     dri_bo *bsd_mpc_bo, *mpr_bo;
2379
2380     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2381                               "bsd mpc row store",
2382                               11520, /* 1.5 * 120 * 64 */
2383                               0x1000);
2384
2385     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2386                           "mpr row store",
2387                           7680, /* 1. 0 * 120 * 64 */
2388                           0x1000);
2389
2390     BEGIN_BCS_BATCH(batch, 10);
2391     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2392
2393     OUT_BCS_RELOC(batch,
2394                   bsd_mpc_bo,
2395                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2396                   0);
2397
2398     OUT_BCS_BATCH(batch, 0);
2399     OUT_BCS_BATCH(batch, 0);
2400
2401     OUT_BCS_RELOC(batch,
2402                   mpr_bo,
2403                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2404                   0);
2405     OUT_BCS_BATCH(batch, 0);
2406     OUT_BCS_BATCH(batch, 0);
2407
2408     OUT_BCS_BATCH(batch, 0);
2409     OUT_BCS_BATCH(batch, 0);
2410     OUT_BCS_BATCH(batch, 0);
2411
2412     ADVANCE_BCS_BATCH(batch);
2413
2414     dri_bo_unreference(bsd_mpc_bo);
2415     dri_bo_unreference(mpr_bo);
2416 }
2417
2418 static void
2419 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2420                           struct gen7_mfd_context *gen7_mfd_context)
2421 {
2422
2423 }
2424
2425 static void
2426 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2427                            struct gen7_mfd_context *gen7_mfd_context)
2428 {
2429     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2430     int img_struct = 0;
2431     int mbaff_frame_flag = 0;
2432     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2433
2434     BEGIN_BCS_BATCH(batch, 16);
2435     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2436     OUT_BCS_BATCH(batch, 
2437                   width_in_mbs * height_in_mbs);
2438     OUT_BCS_BATCH(batch, 
2439                   ((height_in_mbs - 1) << 16) | 
2440                   ((width_in_mbs - 1) << 0));
2441     OUT_BCS_BATCH(batch, 
2442                   (0 << 24) |
2443                   (0 << 16) |
2444                   (0 << 14) |
2445                   (0 << 13) |
2446                   (0 << 12) | /* differ from GEN6 */
2447                   (0 << 10) |
2448                   (img_struct << 8));
2449     OUT_BCS_BATCH(batch,
2450                   (1 << 10) | /* 4:2:0 */
2451                   (1 << 7) |  /* CABAC */
2452                   (0 << 6) |
2453                   (0 << 5) |
2454                   (0 << 4) |
2455                   (0 << 3) |
2456                   (1 << 2) |
2457                   (mbaff_frame_flag << 1) |
2458                   (0 << 0));
2459     OUT_BCS_BATCH(batch, 0);
2460     OUT_BCS_BATCH(batch, 0);
2461     OUT_BCS_BATCH(batch, 0);
2462     OUT_BCS_BATCH(batch, 0);
2463     OUT_BCS_BATCH(batch, 0);
2464     OUT_BCS_BATCH(batch, 0);
2465     OUT_BCS_BATCH(batch, 0);
2466     OUT_BCS_BATCH(batch, 0);
2467     OUT_BCS_BATCH(batch, 0);
2468     OUT_BCS_BATCH(batch, 0);
2469     OUT_BCS_BATCH(batch, 0);
2470     ADVANCE_BCS_BATCH(batch);
2471 }
2472
2473 static void
2474 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2475                                   struct gen7_mfd_context *gen7_mfd_context)
2476 {
2477     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2478     int i;
2479
2480     BEGIN_BCS_BATCH(batch, 71);
2481     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2482
2483     /* reference surfaces 0..15 */
2484     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2485         OUT_BCS_BATCH(batch, 0); /* top */
2486         OUT_BCS_BATCH(batch, 0); /* bottom */
2487     }
2488         
2489         OUT_BCS_BATCH(batch, 0);
2490
2491     /* the current decoding frame/field */
2492     OUT_BCS_BATCH(batch, 0); /* top */
2493     OUT_BCS_BATCH(batch, 0);
2494     OUT_BCS_BATCH(batch, 0);
2495
2496     /* POC List */
2497     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2498         OUT_BCS_BATCH(batch, 0);
2499         OUT_BCS_BATCH(batch, 0);
2500     }
2501
2502     OUT_BCS_BATCH(batch, 0);
2503     OUT_BCS_BATCH(batch, 0);
2504
2505     ADVANCE_BCS_BATCH(batch);
2506 }
2507
2508 static void
2509 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2510                                      struct gen7_mfd_context *gen7_mfd_context)
2511 {
2512     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2513
2514     BEGIN_BCS_BATCH(batch, 11);
2515     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2516     OUT_BCS_RELOC(batch,
2517                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2518                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2519                   0);
2520     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2521     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2522     OUT_BCS_BATCH(batch, 0);
2523     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2524     OUT_BCS_BATCH(batch, 0);
2525     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2526     OUT_BCS_BATCH(batch, 0);
2527     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2528     OUT_BCS_BATCH(batch, 0);
2529     ADVANCE_BCS_BATCH(batch);
2530 }
2531
2532 static void
2533 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2534                             struct gen7_mfd_context *gen7_mfd_context)
2535 {
2536     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2537
2538     /* the input bitsteam format on GEN7 differs from GEN6 */
2539     BEGIN_BCS_BATCH(batch, 6);
2540     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2541     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2542     OUT_BCS_BATCH(batch, 0);
2543     OUT_BCS_BATCH(batch,
2544                   (0 << 31) |
2545                   (0 << 14) |
2546                   (0 << 12) |
2547                   (0 << 10) |
2548                   (0 << 8));
2549     OUT_BCS_BATCH(batch,
2550                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2551                   (0 << 5)  |
2552                   (0 << 4)  |
2553                   (1 << 3) | /* LastSlice Flag */
2554                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2555     OUT_BCS_BATCH(batch, 0);
2556     ADVANCE_BCS_BATCH(batch);
2557 }
2558
2559 static void
2560 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2561                              struct gen7_mfd_context *gen7_mfd_context)
2562 {
2563     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2564     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2565     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2566     int first_mb_in_slice = 0;
2567     int slice_type = SLICE_TYPE_I;
2568
2569     BEGIN_BCS_BATCH(batch, 11);
2570     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2571     OUT_BCS_BATCH(batch, slice_type);
2572     OUT_BCS_BATCH(batch, 
2573                   (num_ref_idx_l1 << 24) |
2574                   (num_ref_idx_l0 << 16) |
2575                   (0 << 8) |
2576                   (0 << 0));
2577     OUT_BCS_BATCH(batch, 
2578                   (0 << 29) |
2579                   (1 << 27) |   /* disable Deblocking */
2580                   (0 << 24) |
2581                   (gen7_jpeg_wa_clip.qp << 16) |
2582                   (0 << 8) |
2583                   (0 << 0));
2584     OUT_BCS_BATCH(batch, 
2585                   (slice_ver_pos << 24) |
2586                   (slice_hor_pos << 16) | 
2587                   (first_mb_in_slice << 0));
2588     OUT_BCS_BATCH(batch,
2589                   (next_slice_ver_pos << 16) |
2590                   (next_slice_hor_pos << 0));
2591     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2592     OUT_BCS_BATCH(batch, 0);
2593     OUT_BCS_BATCH(batch, 0);
2594     OUT_BCS_BATCH(batch, 0);
2595     OUT_BCS_BATCH(batch, 0);
2596     ADVANCE_BCS_BATCH(batch);
2597 }
2598
2599 static void
2600 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2601                  struct gen7_mfd_context *gen7_mfd_context)
2602 {
2603     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2604     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2605     intel_batchbuffer_emit_mi_flush(batch);
2606     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2607     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2608     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2609     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2610     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2611     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2612     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2613
2614     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2615     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2616     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2617 }
2618
2619 #endif
2620
2621 void
2622 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2623                              struct decode_state *decode_state,
2624                              struct gen7_mfd_context *gen7_mfd_context)
2625 {
2626     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2627     VAPictureParameterBufferJPEGBaseline *pic_param;
2628     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2629     dri_bo *slice_data_bo;
2630     int i, j, max_selector = 0;
2631
2632     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2633     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2634
2635     /* Currently only support Baseline DCT */
2636     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2637     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2638 #ifdef JPEG_WA
2639     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2640 #endif
2641     intel_batchbuffer_emit_mi_flush(batch);
2642     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2643     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2644     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2645     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2646     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2647
2648     for (j = 0; j < decode_state->num_slice_params; j++) {
2649         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2650         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2651         slice_data_bo = decode_state->slice_datas[j]->bo;
2652         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2653
2654         if (j == decode_state->num_slice_params - 1)
2655             next_slice_group_param = NULL;
2656         else
2657             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2658
2659         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2660             int component;
2661
2662             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2663
2664             if (i < decode_state->slice_params[j]->num_elements - 1)
2665                 next_slice_param = slice_param + 1;
2666             else
2667                 next_slice_param = next_slice_group_param;
2668
2669             for (component = 0; component < slice_param->num_components; component++) {
2670                 if (max_selector < slice_param->components[component].dc_table_selector)
2671                     max_selector = slice_param->components[component].dc_table_selector;
2672
2673                 if (max_selector < slice_param->components[component].ac_table_selector)
2674                     max_selector = slice_param->components[component].ac_table_selector;
2675             }
2676
2677             slice_param++;
2678         }
2679     }
2680
2681     assert(max_selector < 2);
2682     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2683
2684     for (j = 0; j < decode_state->num_slice_params; j++) {
2685         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2686         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2687         slice_data_bo = decode_state->slice_datas[j]->bo;
2688         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2689
2690         if (j == decode_state->num_slice_params - 1)
2691             next_slice_group_param = NULL;
2692         else
2693             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2694
2695         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2696             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2697
2698             if (i < decode_state->slice_params[j]->num_elements - 1)
2699                 next_slice_param = slice_param + 1;
2700             else
2701                 next_slice_param = next_slice_group_param;
2702
2703             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2704             slice_param++;
2705         }
2706     }
2707
2708     intel_batchbuffer_end_atomic(batch);
2709     intel_batchbuffer_flush(batch);
2710 }
2711
2712 static VAStatus
2713 gen8_mfd_decode_picture(VADriverContextP ctx, 
2714                         VAProfile profile, 
2715                         union codec_state *codec_state,
2716                         struct hw_context *hw_context)
2717
2718 {
2719     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2720     struct decode_state *decode_state = &codec_state->decode;
2721     VAStatus vaStatus;
2722
2723     assert(gen7_mfd_context);
2724
2725     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
2726
2727     if (vaStatus != VA_STATUS_SUCCESS)
2728         goto out;
2729
2730     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
2731
2732     switch (profile) {
2733     case VAProfileMPEG2Simple:
2734     case VAProfileMPEG2Main:
2735         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
2736         break;
2737         
2738     case VAProfileH264Baseline:
2739     case VAProfileH264Main:
2740     case VAProfileH264High:
2741         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
2742         break;
2743
2744     case VAProfileVC1Simple:
2745     case VAProfileVC1Main:
2746     case VAProfileVC1Advanced:
2747         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
2748         break;
2749
2750     case VAProfileJPEGBaseline:
2751         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
2752         break;
2753
2754     default:
2755         assert(0);
2756         break;
2757     }
2758
2759     vaStatus = VA_STATUS_SUCCESS;
2760
2761 out:
2762     return vaStatus;
2763 }
2764
2765 static void
2766 gen8_mfd_context_destroy(void *hw_context)
2767 {
2768     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2769
2770     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2771     gen7_mfd_context->post_deblocking_output.bo = NULL;
2772
2773     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2774     gen7_mfd_context->pre_deblocking_output.bo = NULL;
2775
2776     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2777     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2778
2779     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2780     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2781
2782     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2783     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2784
2785     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2786     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2787
2788     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
2789     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2790
2791     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
2792
2793     intel_batchbuffer_free(gen7_mfd_context->base.batch);
2794     free(gen7_mfd_context);
2795 }
2796
2797 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
2798                                     struct gen7_mfd_context *gen7_mfd_context)
2799 {
2800     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
2801     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
2802     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
2803     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
2804 }
2805
2806 struct hw_context *
2807 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
2808 {
2809     struct intel_driver_data *intel = intel_driver_data(ctx);
2810     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
2811     int i;
2812
2813     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
2814     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
2815     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
2816
2817     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
2818         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
2819         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
2820     }
2821
2822     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
2823
2824     switch (obj_config->profile) {
2825     case VAProfileMPEG2Simple:
2826     case VAProfileMPEG2Main:
2827         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
2828         break;
2829
2830     case VAProfileH264Baseline:
2831     case VAProfileH264Main:
2832     case VAProfileH264High:
2833         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
2834         break;
2835     default:
2836         break;
2837     }
2838     return (struct hw_context *)gen7_mfd_context;
2839 }