BDW doesn't support H.264 Baseline profile
[platform/upstream/libva-intel-driver.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx, 
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77         assert((obj_surface->size & 0x3f) == 0);
78         obj_surface->private_data = gen7_avc_surface;
79     }
80
81     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
82                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
83
84     if (gen7_avc_surface->dmv_top == NULL) {
85         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86                                                  "direct mv w/r buffer",
87                                                  width_in_mbs * height_in_mbs * 128,
88                                                  0x1000);
89         assert(gen7_avc_surface->dmv_top);
90     }
91
92     if (gen7_avc_surface->dmv_bottom_flag &&
93         gen7_avc_surface->dmv_bottom == NULL) {
94         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
95                                                     "direct mv w/r buffer",
96                                                     width_in_mbs * height_in_mbs * 128,                                                    
97                                                     0x1000);
98         assert(gen7_avc_surface->dmv_bottom);
99     }
100 }
101
102 static void
103 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
104                           struct decode_state *decode_state,
105                           int standard_select,
106                           struct gen7_mfd_context *gen7_mfd_context)
107 {
108     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
109
110     assert(standard_select == MFX_FORMAT_MPEG2 ||
111            standard_select == MFX_FORMAT_AVC ||
112            standard_select == MFX_FORMAT_VC1 ||
113            standard_select == MFX_FORMAT_JPEG ||
114            standard_select == MFX_FORMAT_VP8);
115
116     BEGIN_BCS_BATCH(batch, 5);
117     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
118     OUT_BCS_BATCH(batch,
119                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
120                   (MFD_MODE_VLD << 15) | /* VLD mode */
121                   (0 << 10) | /* disable Stream-Out */
122                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
123                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
124                   (0 << 5)  | /* not in stitch mode */
125                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
126                   (standard_select << 0));
127     OUT_BCS_BATCH(batch,
128                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
129                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
130                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
131                   (0 << 1)  |
132                   (0 << 0));
133     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
134     OUT_BCS_BATCH(batch, 0); /* reserved */
135     ADVANCE_BCS_BATCH(batch);
136 }
137
138 static void
139 gen8_mfd_surface_state(VADriverContextP ctx,
140                        struct decode_state *decode_state,
141                        int standard_select,
142                        struct gen7_mfd_context *gen7_mfd_context)
143 {
144     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
145     struct object_surface *obj_surface = decode_state->render_object;
146     unsigned int y_cb_offset;
147     unsigned int y_cr_offset;
148
149     assert(obj_surface);
150
151     y_cb_offset = obj_surface->y_cb_offset;
152     y_cr_offset = obj_surface->y_cr_offset;
153
154     BEGIN_BCS_BATCH(batch, 6);
155     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
156     OUT_BCS_BATCH(batch, 0);
157     OUT_BCS_BATCH(batch,
158                   ((obj_surface->orig_height - 1) << 18) |
159                   ((obj_surface->orig_width - 1) << 4));
160     OUT_BCS_BATCH(batch,
161                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
162                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
163                   (0 << 22) | /* surface object control state, ignored */
164                   ((obj_surface->width - 1) << 3) | /* pitch */
165                   (0 << 2)  | /* must be 0 */
166                   (1 << 1)  | /* must be tiled */
167                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
168     OUT_BCS_BATCH(batch,
169                   (0 << 16) | /* X offset for U(Cb), must be 0 */
170                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
171     OUT_BCS_BATCH(batch,
172                   (0 << 16) | /* X offset for V(Cr), must be 0 */
173                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
174     ADVANCE_BCS_BATCH(batch);
175 }
176
177 static void
178 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
179                              struct decode_state *decode_state,
180                              int standard_select,
181                              struct gen7_mfd_context *gen7_mfd_context)
182 {
183     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
184     int i;
185
186     BEGIN_BCS_BATCH(batch, 61);
187     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188         /* Pre-deblock 1-3 */
189     if (gen7_mfd_context->pre_deblocking_output.valid)
190         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
191                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192                       0);
193     else
194         OUT_BCS_BATCH(batch, 0);
195
196         OUT_BCS_BATCH(batch, 0);
197         OUT_BCS_BATCH(batch, 0);
198         /* Post-debloing 4-6 */
199     if (gen7_mfd_context->post_deblocking_output.valid)
200         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
201                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
202                       0);
203     else
204         OUT_BCS_BATCH(batch, 0);
205
206         OUT_BCS_BATCH(batch, 0);
207         OUT_BCS_BATCH(batch, 0);
208
209         /* uncompressed-video & stream out 7-12 */
210     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
211     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
212         OUT_BCS_BATCH(batch, 0);
213         OUT_BCS_BATCH(batch, 0);
214         OUT_BCS_BATCH(batch, 0);
215         OUT_BCS_BATCH(batch, 0);
216
217         /* intra row-store scratch 13-15 */
218     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
219         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
220                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
221                       0);
222     else
223         OUT_BCS_BATCH(batch, 0);
224
225         OUT_BCS_BATCH(batch, 0);
226         OUT_BCS_BATCH(batch, 0);
227         /* deblocking-filter-row-store 16-18 */
228     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
229         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
230                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
231                       0);
232     else
233         OUT_BCS_BATCH(batch, 0);
234         OUT_BCS_BATCH(batch, 0);
235         OUT_BCS_BATCH(batch, 0);
236
237     /* DW 19..50 */
238     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
239         struct object_surface *obj_surface;
240
241         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
242             gen7_mfd_context->reference_surface[i].obj_surface &&
243             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
244             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
245
246             OUT_BCS_RELOC(batch, obj_surface->bo,
247                           I915_GEM_DOMAIN_INSTRUCTION, 0,
248                           0);
249         } else {
250             OUT_BCS_BATCH(batch, 0);
251         }
252         
253         OUT_BCS_BATCH(batch, 0);
254     }
255     
256     /* reference property 51 */
257     OUT_BCS_BATCH(batch, 0);  
258         
259     /* Macroblock status & ILDB 52-57 */
260     OUT_BCS_BATCH(batch, 0);
261     OUT_BCS_BATCH(batch, 0);
262     OUT_BCS_BATCH(batch, 0);
263     OUT_BCS_BATCH(batch, 0);
264     OUT_BCS_BATCH(batch, 0);
265     OUT_BCS_BATCH(batch, 0);
266
267     /* the second Macroblock status 58-60 */    
268     OUT_BCS_BATCH(batch, 0);
269     OUT_BCS_BATCH(batch, 0);
270     OUT_BCS_BATCH(batch, 0);
271
272     ADVANCE_BCS_BATCH(batch);
273 }
274
275 static void
276 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
277                                  dri_bo *slice_data_bo,
278                                  int standard_select,
279                                  struct gen7_mfd_context *gen7_mfd_context)
280 {
281     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
282
283     BEGIN_BCS_BATCH(batch, 26);
284     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
285         /* MFX In BS 1-5 */
286     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
287     OUT_BCS_BATCH(batch, 0);
288     OUT_BCS_BATCH(batch, 0);
289         /* Upper bound 4-5 */   
290     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
291     OUT_BCS_BATCH(batch, 0);
292
293         /* MFX indirect MV 6-10 */
294     OUT_BCS_BATCH(batch, 0);
295     OUT_BCS_BATCH(batch, 0);
296     OUT_BCS_BATCH(batch, 0);
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299         
300         /* MFX IT_COFF 11-15 */
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306
307         /* MFX IT_DBLK 16-20 */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313
314         /* MFX PAK_BSE object for encoder 21-25 */
315     OUT_BCS_BATCH(batch, 0);
316     OUT_BCS_BATCH(batch, 0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319     OUT_BCS_BATCH(batch, 0);
320
321     ADVANCE_BCS_BATCH(batch);
322 }
323
324 static void
325 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
326                                  struct decode_state *decode_state,
327                                  int standard_select,
328                                  struct gen7_mfd_context *gen7_mfd_context)
329 {
330     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
331
332     BEGIN_BCS_BATCH(batch, 10);
333     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
334
335     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
336         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
337                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
338                       0);
339         else
340                 OUT_BCS_BATCH(batch, 0);
341                 
342     OUT_BCS_BATCH(batch, 0);
343     OUT_BCS_BATCH(batch, 0);
344         /* MPR Row Store Scratch buffer 4-6 */
345     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
346         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
347                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
348                       0);
349     else
350         OUT_BCS_BATCH(batch, 0);
351
352     OUT_BCS_BATCH(batch, 0);
353     OUT_BCS_BATCH(batch, 0);
354
355         /* Bitplane 7-9 */ 
356     if (gen7_mfd_context->bitplane_read_buffer.valid)
357         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
358                       I915_GEM_DOMAIN_INSTRUCTION, 0,
359                       0);
360     else
361         OUT_BCS_BATCH(batch, 0);
362     OUT_BCS_BATCH(batch, 0);
363     OUT_BCS_BATCH(batch, 0);
364     ADVANCE_BCS_BATCH(batch);
365 }
366
367 static void
368 gen8_mfd_qm_state(VADriverContextP ctx,
369                   int qm_type,
370                   unsigned char *qm,
371                   int qm_length,
372                   struct gen7_mfd_context *gen7_mfd_context)
373 {
374     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
375     unsigned int qm_buffer[16];
376
377     assert(qm_length <= 16 * 4);
378     memcpy(qm_buffer, qm, qm_length);
379
380     BEGIN_BCS_BATCH(batch, 18);
381     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
382     OUT_BCS_BATCH(batch, qm_type << 0);
383     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
384     ADVANCE_BCS_BATCH(batch);
385 }
386
387 static void
388 gen8_mfd_avc_img_state(VADriverContextP ctx,
389                        struct decode_state *decode_state,
390                        struct gen7_mfd_context *gen7_mfd_context)
391 {
392     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
393     int img_struct;
394     int mbaff_frame_flag;
395     unsigned int width_in_mbs, height_in_mbs;
396     VAPictureParameterBufferH264 *pic_param;
397
398     assert(decode_state->pic_param && decode_state->pic_param->buffer);
399     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
400     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
401
402     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
403         img_struct = 1;
404     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
405         img_struct = 3;
406     else
407         img_struct = 0;
408
409     if ((img_struct & 0x1) == 0x1) {
410         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
411     } else {
412         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
413     }
414
415     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
416         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
417         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
418     } else {
419         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
420     }
421
422     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
423                         !pic_param->pic_fields.bits.field_pic_flag);
424
425     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
426     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
427
428     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
429     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
430            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
431     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
432
433     BEGIN_BCS_BATCH(batch, 17);
434     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
435     OUT_BCS_BATCH(batch, 
436                   width_in_mbs * height_in_mbs);
437     OUT_BCS_BATCH(batch, 
438                   ((height_in_mbs - 1) << 16) | 
439                   ((width_in_mbs - 1) << 0));
440     OUT_BCS_BATCH(batch, 
441                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
442                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
443                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
444                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
445                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
446                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
447                   (img_struct << 8));
448     OUT_BCS_BATCH(batch,
449                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
450                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
451                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
452                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
453                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
454                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
455                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
456                   (mbaff_frame_flag << 1) |
457                   (pic_param->pic_fields.bits.field_pic_flag << 0));
458     OUT_BCS_BATCH(batch, 0);
459     OUT_BCS_BATCH(batch, 0);
460     OUT_BCS_BATCH(batch, 0);
461     OUT_BCS_BATCH(batch, 0);
462     OUT_BCS_BATCH(batch, 0);
463     OUT_BCS_BATCH(batch, 0);
464     OUT_BCS_BATCH(batch, 0);
465     OUT_BCS_BATCH(batch, 0);
466     OUT_BCS_BATCH(batch, 0);
467     OUT_BCS_BATCH(batch, 0);
468     OUT_BCS_BATCH(batch, 0);
469     OUT_BCS_BATCH(batch, 0);
470     ADVANCE_BCS_BATCH(batch);
471 }
472
473 static void
474 gen8_mfd_avc_qm_state(VADriverContextP ctx,
475                       struct decode_state *decode_state,
476                       struct gen7_mfd_context *gen7_mfd_context)
477 {
478     VAIQMatrixBufferH264 *iq_matrix;
479     VAPictureParameterBufferH264 *pic_param;
480
481     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
482         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
483     else
484         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
485
486     assert(decode_state->pic_param && decode_state->pic_param->buffer);
487     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
488
489     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
490     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
491
492     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
493         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
494         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
495     }
496 }
497
498 static void
499 gen8_mfd_avc_picid_state(VADriverContextP ctx,
500                       struct decode_state *decode_state,
501                       struct gen7_mfd_context *gen7_mfd_context)
502 {
503     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
504
505     BEGIN_BCS_BATCH(batch, 10);
506     OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
507     OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
508     OUT_BCS_BATCH(batch, 0);
509     OUT_BCS_BATCH(batch, 0);
510     OUT_BCS_BATCH(batch, 0);
511     OUT_BCS_BATCH(batch, 0);
512     OUT_BCS_BATCH(batch, 0);
513     OUT_BCS_BATCH(batch, 0);
514     OUT_BCS_BATCH(batch, 0);
515     OUT_BCS_BATCH(batch, 0);
516     ADVANCE_BCS_BATCH(batch);
517 }
518
519 static void
520 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
521                               struct decode_state *decode_state,
522                               VAPictureParameterBufferH264 *pic_param,
523                               VASliceParameterBufferH264 *slice_param,
524                               struct gen7_mfd_context *gen7_mfd_context)
525 {
526     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527     struct object_surface *obj_surface;
528     GenAvcSurface *gen7_avc_surface;
529     VAPictureH264 *va_pic;
530     int i, j;
531
532     BEGIN_BCS_BATCH(batch, 71);
533     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
534
535     /* reference surfaces 0..15 */
536     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538             gen7_mfd_context->reference_surface[i].obj_surface &&
539             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
540
541             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542             gen7_avc_surface = obj_surface->private_data;
543
544             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
545                           I915_GEM_DOMAIN_INSTRUCTION, 0,
546                           0);
547             OUT_BCS_BATCH(batch, 0);
548         } else {
549             OUT_BCS_BATCH(batch, 0);
550             OUT_BCS_BATCH(batch, 0);
551         }
552     }
553     
554     OUT_BCS_BATCH(batch, 0);
555
556     /* the current decoding frame/field */
557     va_pic = &pic_param->CurrPic;
558     obj_surface = decode_state->render_object;
559     assert(obj_surface->bo && obj_surface->private_data);
560     gen7_avc_surface = obj_surface->private_data;
561
562     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
563                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
564                   0);
565
566     OUT_BCS_BATCH(batch, 0);
567     OUT_BCS_BATCH(batch, 0);
568
569     /* POC List */
570     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
571         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
572             int found = 0;
573
574             assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
575
576             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
577                 va_pic = &pic_param->ReferenceFrames[j];
578                 
579                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
580                     continue;
581
582                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
583                     found = 1;
584                     break;
585                 }
586             }
587
588             assert(found == 1);
589             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
590             
591             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
592             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
593         } else {
594             OUT_BCS_BATCH(batch, 0);
595             OUT_BCS_BATCH(batch, 0);
596         }
597     }
598
599     va_pic = &pic_param->CurrPic;
600     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
601     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
602
603     ADVANCE_BCS_BATCH(batch);
604 }
605
606 static void
607 gen8_mfd_avc_slice_state(VADriverContextP ctx,
608                          VAPictureParameterBufferH264 *pic_param,
609                          VASliceParameterBufferH264 *slice_param,
610                          VASliceParameterBufferH264 *next_slice_param,
611                          struct gen7_mfd_context *gen7_mfd_context)
612 {
613     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
614     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
615     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
616     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
617     int num_ref_idx_l0, num_ref_idx_l1;
618     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
619                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
620     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
621     int slice_type;
622
623     if (slice_param->slice_type == SLICE_TYPE_I ||
624         slice_param->slice_type == SLICE_TYPE_SI) {
625         slice_type = SLICE_TYPE_I;
626     } else if (slice_param->slice_type == SLICE_TYPE_P ||
627                slice_param->slice_type == SLICE_TYPE_SP) {
628         slice_type = SLICE_TYPE_P;
629     } else { 
630         assert(slice_param->slice_type == SLICE_TYPE_B);
631         slice_type = SLICE_TYPE_B;
632     }
633
634     if (slice_type == SLICE_TYPE_I) {
635         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
636         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
637         num_ref_idx_l0 = 0;
638         num_ref_idx_l1 = 0;
639     } else if (slice_type == SLICE_TYPE_P) {
640         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
641         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
642         num_ref_idx_l1 = 0;
643     } else {
644         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
645         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
646     }
647
648     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
649     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
650     slice_ver_pos = first_mb_in_slice / width_in_mbs;
651
652     if (next_slice_param) {
653         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
654         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
655         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
656     } else {
657         next_slice_hor_pos = 0;
658         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
659     }
660
661     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663     OUT_BCS_BATCH(batch, slice_type);
664     OUT_BCS_BATCH(batch, 
665                   (num_ref_idx_l1 << 24) |
666                   (num_ref_idx_l0 << 16) |
667                   (slice_param->chroma_log2_weight_denom << 8) |
668                   (slice_param->luma_log2_weight_denom << 0));
669     OUT_BCS_BATCH(batch, 
670                   (slice_param->direct_spatial_mv_pred_flag << 29) |
671                   (slice_param->disable_deblocking_filter_idc << 27) |
672                   (slice_param->cabac_init_idc << 24) |
673                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
676     OUT_BCS_BATCH(batch, 
677                   (slice_ver_pos << 24) |
678                   (slice_hor_pos << 16) | 
679                   (first_mb_in_slice << 0));
680     OUT_BCS_BATCH(batch,
681                   (next_slice_ver_pos << 16) |
682                   (next_slice_hor_pos << 0));
683     OUT_BCS_BATCH(batch, 
684                   (next_slice_param == NULL) << 19); /* last slice flag */
685     OUT_BCS_BATCH(batch, 0);
686     OUT_BCS_BATCH(batch, 0);
687     OUT_BCS_BATCH(batch, 0);
688     OUT_BCS_BATCH(batch, 0);
689     ADVANCE_BCS_BATCH(batch);
690 }
691
692 static inline void
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694                            VAPictureParameterBufferH264 *pic_param,
695                            VASliceParameterBufferH264 *slice_param,
696                            struct gen7_mfd_context *gen7_mfd_context)
697 {
698     gen6_send_avc_ref_idx_state(
699         gen7_mfd_context->base.batch,
700         slice_param,
701         gen7_mfd_context->reference_surface
702     );
703 }
704
705 static void
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707                                 VAPictureParameterBufferH264 *pic_param,
708                                 VASliceParameterBufferH264 *slice_param,
709                                 struct gen7_mfd_context *gen7_mfd_context)
710 {
711     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712     int i, j, num_weight_offset_table = 0;
713     short weightoffsets[32 * 6];
714
715     if ((slice_param->slice_type == SLICE_TYPE_P ||
716          slice_param->slice_type == SLICE_TYPE_SP) &&
717         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718         num_weight_offset_table = 1;
719     }
720     
721     if ((slice_param->slice_type == SLICE_TYPE_B) &&
722         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723         num_weight_offset_table = 2;
724     }
725
726     for (i = 0; i < num_weight_offset_table; i++) {
727         BEGIN_BCS_BATCH(batch, 98);
728         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729         OUT_BCS_BATCH(batch, i);
730
731         if (i == 0) {
732             for (j = 0; j < 32; j++) {
733                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
739             }
740         } else {
741             for (j = 0; j < 32; j++) {
742                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
748             }
749         }
750
751         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752         ADVANCE_BCS_BATCH(batch);
753     }
754 }
755
756 static void
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758                         VAPictureParameterBufferH264 *pic_param,
759                         VASliceParameterBufferH264 *slice_param,
760                         dri_bo *slice_data_bo,
761                         VASliceParameterBufferH264 *next_slice_param,
762                         struct gen7_mfd_context *gen7_mfd_context)
763 {
764     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
766                                                             slice_param,
767                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
768
769     /* the input bitsteam format on GEN7 differs from GEN6 */
770     BEGIN_BCS_BATCH(batch, 6);
771     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
772     OUT_BCS_BATCH(batch, 
773                   (slice_param->slice_data_size));
774     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
775     OUT_BCS_BATCH(batch,
776                   (0 << 31) |
777                   (0 << 14) |
778                   (0 << 12) |
779                   (0 << 10) |
780                   (0 << 8));
781     OUT_BCS_BATCH(batch,
782                   ((slice_data_bit_offset >> 3) << 16) |
783                   (1 << 7)  |
784                   (0 << 5)  |
785                   (0 << 4)  |
786                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787                   (slice_data_bit_offset & 0x7));
788     OUT_BCS_BATCH(batch, 0);
789     ADVANCE_BCS_BATCH(batch);
790 }
791
792 static inline void
793 gen8_mfd_avc_context_init(
794     VADriverContextP         ctx,
795     struct gen7_mfd_context *gen7_mfd_context
796 )
797 {
798     /* Initialize flat scaling lists */
799     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
800 }
801
802 static void
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804                          struct decode_state *decode_state,
805                          struct gen7_mfd_context *gen7_mfd_context)
806 {
807     VAPictureParameterBufferH264 *pic_param;
808     VASliceParameterBufferH264 *slice_param;
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810     struct object_surface *obj_surface;
811     dri_bo *bo;
812     int i, j, enable_avc_ildb = 0;
813     unsigned int width_in_mbs, height_in_mbs;
814
815     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
818
819         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821             assert((slice_param->slice_type == SLICE_TYPE_I) ||
822                    (slice_param->slice_type == SLICE_TYPE_SI) ||
823                    (slice_param->slice_type == SLICE_TYPE_P) ||
824                    (slice_param->slice_type == SLICE_TYPE_SP) ||
825                    (slice_param->slice_type == SLICE_TYPE_B));
826
827             if (slice_param->disable_deblocking_filter_idc != 1) {
828                 enable_avc_ildb = 1;
829                 break;
830             }
831
832             slice_param++;
833         }
834     }
835
836     assert(decode_state->pic_param && decode_state->pic_param->buffer);
837     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838     intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
839     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
840     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
841     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
842     assert(height_in_mbs > 0 && height_in_mbs <= 256);
843
844     /* Current decoded picture */
845     obj_surface = decode_state->render_object;
846     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
847     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
848     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
849
850     /* initial uv component for YUV400 case */
851     if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
852          unsigned int uv_offset = obj_surface->width * obj_surface->height; 
853          unsigned int uv_size   = obj_surface->width * obj_surface->height / 2; 
854
855          drm_intel_gem_bo_map_gtt(obj_surface->bo);
856          memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
857          drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
858     }
859
860     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
861
862     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
863     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
864     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
865     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
866
867     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
868     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
869     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
870     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
871
872     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
873     bo = dri_bo_alloc(i965->intel.bufmgr,
874                       "intra row store",
875                       width_in_mbs * 64,
876                       0x1000);
877     assert(bo);
878     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
879     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
880
881     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
882     bo = dri_bo_alloc(i965->intel.bufmgr,
883                       "deblocking filter row store",
884                       width_in_mbs * 64 * 4,
885                       0x1000);
886     assert(bo);
887     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
888     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
889
890     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
891     bo = dri_bo_alloc(i965->intel.bufmgr,
892                       "bsd mpc row store",
893                       width_in_mbs * 64 * 2,
894                       0x1000);
895     assert(bo);
896     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
897     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
898
899     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
900     bo = dri_bo_alloc(i965->intel.bufmgr,
901                       "mpr row store",
902                       width_in_mbs * 64 * 2,
903                       0x1000);
904     assert(bo);
905     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
906     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
907
908     gen7_mfd_context->bitplane_read_buffer.valid = 0;
909 }
910
911 static void
912 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
913                             struct decode_state *decode_state,
914                             struct gen7_mfd_context *gen7_mfd_context)
915 {
916     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
917     VAPictureParameterBufferH264 *pic_param;
918     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
919     dri_bo *slice_data_bo;
920     int i, j;
921
922     assert(decode_state->pic_param && decode_state->pic_param->buffer);
923     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
924     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
925
926     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
927     intel_batchbuffer_emit_mi_flush(batch);
928     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
929     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
930     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
931     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
932     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
933     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
934     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
935
936     for (j = 0; j < decode_state->num_slice_params; j++) {
937         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
938         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
939         slice_data_bo = decode_state->slice_datas[j]->bo;
940         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
941
942         if (j == decode_state->num_slice_params - 1)
943             next_slice_group_param = NULL;
944         else
945             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
946
947         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
948             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
949             assert((slice_param->slice_type == SLICE_TYPE_I) ||
950                    (slice_param->slice_type == SLICE_TYPE_SI) ||
951                    (slice_param->slice_type == SLICE_TYPE_P) ||
952                    (slice_param->slice_type == SLICE_TYPE_SP) ||
953                    (slice_param->slice_type == SLICE_TYPE_B));
954
955             if (i < decode_state->slice_params[j]->num_elements - 1)
956                 next_slice_param = slice_param + 1;
957             else
958                 next_slice_param = next_slice_group_param;
959
960             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
961             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
962             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
963             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
964             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
965             slice_param++;
966         }
967     }
968
969     intel_batchbuffer_end_atomic(batch);
970     intel_batchbuffer_flush(batch);
971 }
972
973 static void
974 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
975                            struct decode_state *decode_state,
976                            struct gen7_mfd_context *gen7_mfd_context)
977 {
978     VAPictureParameterBufferMPEG2 *pic_param;
979     struct i965_driver_data *i965 = i965_driver_data(ctx);
980     struct object_surface *obj_surface;
981     dri_bo *bo;
982     unsigned int width_in_mbs;
983
984     assert(decode_state->pic_param && decode_state->pic_param->buffer);
985     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
986     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
987
988     mpeg2_set_reference_surfaces(
989         ctx,
990         gen7_mfd_context->reference_surface,
991         decode_state,
992         pic_param
993     );
994
995     /* Current decoded picture */
996     obj_surface = decode_state->render_object;
997     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
998
999     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1000     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1001     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1002     gen7_mfd_context->pre_deblocking_output.valid = 1;
1003
1004     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1005     bo = dri_bo_alloc(i965->intel.bufmgr,
1006                       "bsd mpc row store",
1007                       width_in_mbs * 96,
1008                       0x1000);
1009     assert(bo);
1010     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1011     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1012
1013     gen7_mfd_context->post_deblocking_output.valid = 0;
1014     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1015     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1016     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1017     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1018 }
1019
1020 static void
1021 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1022                          struct decode_state *decode_state,
1023                          struct gen7_mfd_context *gen7_mfd_context)
1024 {
1025     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1026     VAPictureParameterBufferMPEG2 *pic_param;
1027     unsigned int slice_concealment_disable_bit = 0;
1028
1029     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1030     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1031
1032     slice_concealment_disable_bit = 1;
1033
1034     BEGIN_BCS_BATCH(batch, 13);
1035     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1036     OUT_BCS_BATCH(batch,
1037                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1038                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1039                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1040                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1041                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1042                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1043                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1044                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1045                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1046                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1047                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1048                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1049     OUT_BCS_BATCH(batch,
1050                   pic_param->picture_coding_type << 9);
1051     OUT_BCS_BATCH(batch,
1052                   (slice_concealment_disable_bit << 31) |
1053                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1054                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1055     OUT_BCS_BATCH(batch, 0);
1056     OUT_BCS_BATCH(batch, 0);
1057     OUT_BCS_BATCH(batch, 0);
1058     OUT_BCS_BATCH(batch, 0);
1059     OUT_BCS_BATCH(batch, 0);
1060     OUT_BCS_BATCH(batch, 0);
1061     OUT_BCS_BATCH(batch, 0);
1062     OUT_BCS_BATCH(batch, 0);
1063     OUT_BCS_BATCH(batch, 0);
1064     ADVANCE_BCS_BATCH(batch);
1065 }
1066
1067 static void
1068 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1069                         struct decode_state *decode_state,
1070                         struct gen7_mfd_context *gen7_mfd_context)
1071 {
1072     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1073     int i, j;
1074
1075     /* Update internal QM state */
1076     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1077         VAIQMatrixBufferMPEG2 * const iq_matrix =
1078             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1079
1080         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1081             iq_matrix->load_intra_quantiser_matrix) {
1082             gen_iq_matrix->load_intra_quantiser_matrix =
1083                 iq_matrix->load_intra_quantiser_matrix;
1084             if (iq_matrix->load_intra_quantiser_matrix) {
1085                 for (j = 0; j < 64; j++)
1086                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1087                         iq_matrix->intra_quantiser_matrix[j];
1088             }
1089         }
1090
1091         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1092             iq_matrix->load_non_intra_quantiser_matrix) {
1093             gen_iq_matrix->load_non_intra_quantiser_matrix =
1094                 iq_matrix->load_non_intra_quantiser_matrix;
1095             if (iq_matrix->load_non_intra_quantiser_matrix) {
1096                 for (j = 0; j < 64; j++)
1097                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1098                         iq_matrix->non_intra_quantiser_matrix[j];
1099             }
1100         }
1101     }
1102
1103     /* Commit QM state to HW */
1104     for (i = 0; i < 2; i++) {
1105         unsigned char *qm = NULL;
1106         int qm_type;
1107
1108         if (i == 0) {
1109             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1110                 qm = gen_iq_matrix->intra_quantiser_matrix;
1111                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1112             }
1113         } else {
1114             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1115                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1116                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1117             }
1118         }
1119
1120         if (!qm)
1121             continue;
1122
1123         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1124     }
1125 }
1126
1127 static void
1128 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1129                           VAPictureParameterBufferMPEG2 *pic_param,
1130                           VASliceParameterBufferMPEG2 *slice_param,
1131                           VASliceParameterBufferMPEG2 *next_slice_param,
1132                           struct gen7_mfd_context *gen7_mfd_context)
1133 {
1134     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1135     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1136     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1137
1138     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1139         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1140         is_field_pic = 1;
1141     is_field_pic_wa = is_field_pic &&
1142         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1143
1144     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1145     hpos0 = slice_param->slice_horizontal_position;
1146
1147     if (next_slice_param == NULL) {
1148         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1149         hpos1 = 0;
1150     } else {
1151         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1152         hpos1 = next_slice_param->slice_horizontal_position;
1153     }
1154
1155     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1156
1157     BEGIN_BCS_BATCH(batch, 5);
1158     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1159     OUT_BCS_BATCH(batch, 
1160                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1161     OUT_BCS_BATCH(batch, 
1162                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1163     OUT_BCS_BATCH(batch,
1164                   hpos0 << 24 |
1165                   vpos0 << 16 |
1166                   mb_count << 8 |
1167                   (next_slice_param == NULL) << 5 |
1168                   (next_slice_param == NULL) << 3 |
1169                   (slice_param->macroblock_offset & 0x7));
1170     OUT_BCS_BATCH(batch,
1171                   (slice_param->quantiser_scale_code << 24) |
1172                   (vpos1 << 8 | hpos1));
1173     ADVANCE_BCS_BATCH(batch);
1174 }
1175
1176 static void
1177 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1178                               struct decode_state *decode_state,
1179                               struct gen7_mfd_context *gen7_mfd_context)
1180 {
1181     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1182     VAPictureParameterBufferMPEG2 *pic_param;
1183     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1184     dri_bo *slice_data_bo;
1185     int i, j;
1186
1187     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1188     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1189
1190     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1191     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1192     intel_batchbuffer_emit_mi_flush(batch);
1193     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1194     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1195     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1196     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1197     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1198     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1199
1200     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1201         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1202             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1203
1204     for (j = 0; j < decode_state->num_slice_params; j++) {
1205         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1206         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1207         slice_data_bo = decode_state->slice_datas[j]->bo;
1208         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1209
1210         if (j == decode_state->num_slice_params - 1)
1211             next_slice_group_param = NULL;
1212         else
1213             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1214
1215         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1216             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1217
1218             if (i < decode_state->slice_params[j]->num_elements - 1)
1219                 next_slice_param = slice_param + 1;
1220             else
1221                 next_slice_param = next_slice_group_param;
1222
1223             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1224             slice_param++;
1225         }
1226     }
1227
1228     intel_batchbuffer_end_atomic(batch);
1229     intel_batchbuffer_flush(batch);
1230 }
1231
1232 static const int va_to_gen7_vc1_pic_type[5] = {
1233     GEN7_VC1_I_PICTURE,
1234     GEN7_VC1_P_PICTURE,
1235     GEN7_VC1_B_PICTURE,
1236     GEN7_VC1_BI_PICTURE,
1237     GEN7_VC1_P_PICTURE,
1238 };
1239
1240 static const int va_to_gen7_vc1_mv[4] = {
1241     1, /* 1-MV */
1242     2, /* 1-MV half-pel */
1243     3, /* 1-MV half-pef bilinear */
1244     0, /* Mixed MV */
1245 };
1246
1247 static const int b_picture_scale_factor[21] = {
1248     128, 85,  170, 64,  192,
1249     51,  102, 153, 204, 43,
1250     215, 37,  74,  111, 148,
1251     185, 222, 32,  96,  160, 
1252     224,
1253 };
1254
1255 static const int va_to_gen7_vc1_condover[3] = {
1256     0,
1257     2,
1258     3
1259 };
1260
1261 static const int va_to_gen7_vc1_profile[4] = {
1262     GEN7_VC1_SIMPLE_PROFILE,
1263     GEN7_VC1_MAIN_PROFILE,
1264     GEN7_VC1_RESERVED_PROFILE,
1265     GEN7_VC1_ADVANCED_PROFILE
1266 };
1267
1268 static void 
1269 gen8_mfd_free_vc1_surface(void **data)
1270 {
1271     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1272
1273     if (!gen7_vc1_surface)
1274         return;
1275
1276     dri_bo_unreference(gen7_vc1_surface->dmv);
1277     free(gen7_vc1_surface);
1278     *data = NULL;
1279 }
1280
1281 static void
1282 gen8_mfd_init_vc1_surface(VADriverContextP ctx, 
1283                           VAPictureParameterBufferVC1 *pic_param,
1284                           struct object_surface *obj_surface)
1285 {
1286     struct i965_driver_data *i965 = i965_driver_data(ctx);
1287     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1288     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1289     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1290
1291     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1292
1293     if (!gen7_vc1_surface) {
1294         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1295         assert((obj_surface->size & 0x3f) == 0);
1296         obj_surface->private_data = gen7_vc1_surface;
1297     }
1298
1299     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1300
1301     if (gen7_vc1_surface->dmv == NULL) {
1302         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1303                                              "direct mv w/r buffer",
1304                                              width_in_mbs * height_in_mbs * 64,
1305                                              0x1000);
1306     }
1307 }
1308
1309 static void
1310 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1311                          struct decode_state *decode_state,
1312                          struct gen7_mfd_context *gen7_mfd_context)
1313 {
1314     VAPictureParameterBufferVC1 *pic_param;
1315     struct i965_driver_data *i965 = i965_driver_data(ctx);
1316     struct object_surface *obj_surface;
1317     dri_bo *bo;
1318     int width_in_mbs;
1319     int picture_type;
1320
1321     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1322     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1323     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1324     picture_type = pic_param->picture_fields.bits.picture_type;
1325  
1326     intel_update_vc1_frame_store_index(ctx,
1327                                        decode_state,
1328                                        pic_param,
1329                                        gen7_mfd_context->reference_surface);
1330
1331     /* Current decoded picture */
1332     obj_surface = decode_state->render_object;
1333     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1334     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1335
1336     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1337     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1338     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1339     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1340
1341     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1342     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1343     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1344     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1345
1346     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1347     bo = dri_bo_alloc(i965->intel.bufmgr,
1348                       "intra row store",
1349                       width_in_mbs * 64,
1350                       0x1000);
1351     assert(bo);
1352     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1353     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1354
1355     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1356     bo = dri_bo_alloc(i965->intel.bufmgr,
1357                       "deblocking filter row store",
1358                       width_in_mbs * 7 * 64,
1359                       0x1000);
1360     assert(bo);
1361     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1362     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1363
1364     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1365     bo = dri_bo_alloc(i965->intel.bufmgr,
1366                       "bsd mpc row store",
1367                       width_in_mbs * 96,
1368                       0x1000);
1369     assert(bo);
1370     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1371     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1372
1373     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1374
1375     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1376     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1377     
1378     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1379         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1380         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1381         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1382         int src_w, src_h;
1383         uint8_t *src = NULL, *dst = NULL;
1384
1385         assert(decode_state->bit_plane->buffer);
1386         src = decode_state->bit_plane->buffer;
1387
1388         bo = dri_bo_alloc(i965->intel.bufmgr,
1389                           "VC-1 Bitplane",
1390                           bitplane_width * height_in_mbs,
1391                           0x1000);
1392         assert(bo);
1393         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1394
1395         dri_bo_map(bo, True);
1396         assert(bo->virtual);
1397         dst = bo->virtual;
1398
1399         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1400             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1401                 int src_index, dst_index;
1402                 int src_shift;
1403                 uint8_t src_value;
1404
1405                 src_index = (src_h * width_in_mbs + src_w) / 2;
1406                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1407                 src_value = ((src[src_index] >> src_shift) & 0xf);
1408
1409                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1410                     src_value |= 0x2;
1411                 }
1412
1413                 dst_index = src_w / 2;
1414                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1415             }
1416
1417             if (src_w & 1)
1418                 dst[src_w / 2] >>= 4;
1419
1420             dst += bitplane_width;
1421         }
1422
1423         dri_bo_unmap(bo);
1424     } else
1425         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1426 }
1427
1428 static void
1429 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1430                        struct decode_state *decode_state,
1431                        struct gen7_mfd_context *gen7_mfd_context)
1432 {
1433     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1434     VAPictureParameterBufferVC1 *pic_param;
1435     struct object_surface *obj_surface;
1436     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1437     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1438     int unified_mv_mode;
1439     int ref_field_pic_polarity = 0;
1440     int scale_factor = 0;
1441     int trans_ac_y = 0;
1442     int dmv_surface_valid = 0;
1443     int brfd = 0;
1444     int fcm = 0;
1445     int picture_type;
1446     int profile;
1447     int overlap;
1448     int interpolation_mode = 0;
1449
1450     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1451     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1452
1453     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1454     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1455     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1456     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1457     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1458     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1459     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1460     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1461
1462     if (dquant == 0) {
1463         alt_pquant_config = 0;
1464         alt_pquant_edge_mask = 0;
1465     } else if (dquant == 2) {
1466         alt_pquant_config = 1;
1467         alt_pquant_edge_mask = 0xf;
1468     } else {
1469         assert(dquant == 1);
1470         if (dquantfrm == 0) {
1471             alt_pquant_config = 0;
1472             alt_pquant_edge_mask = 0;
1473             alt_pq = 0;
1474         } else {
1475             assert(dquantfrm == 1);
1476             alt_pquant_config = 1;
1477
1478             switch (dqprofile) {
1479             case 3:
1480                 if (dqbilevel == 0) {
1481                     alt_pquant_config = 2;
1482                     alt_pquant_edge_mask = 0;
1483                 } else {
1484                     assert(dqbilevel == 1);
1485                     alt_pquant_config = 3;
1486                     alt_pquant_edge_mask = 0;
1487                 }
1488                 break;
1489                 
1490             case 0:
1491                 alt_pquant_edge_mask = 0xf;
1492                 break;
1493
1494             case 1:
1495                 if (dqdbedge == 3)
1496                     alt_pquant_edge_mask = 0x9;
1497                 else
1498                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1499
1500                 break;
1501
1502             case 2:
1503                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1504                 break;
1505
1506             default:
1507                 assert(0);
1508             }
1509         }
1510     }
1511
1512     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1513         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1514         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1515     } else {
1516         assert(pic_param->mv_fields.bits.mv_mode < 4);
1517         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1518     }
1519
1520     if (pic_param->sequence_fields.bits.interlace == 1 &&
1521         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1522         /* FIXME: calculate reference field picture polarity */
1523         assert(0);
1524         ref_field_pic_polarity = 0;
1525     }
1526
1527     if (pic_param->b_picture_fraction < 21)
1528         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1529
1530     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1531     
1532     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1533         picture_type == GEN7_VC1_I_PICTURE)
1534         picture_type = GEN7_VC1_BI_PICTURE;
1535
1536     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1537         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1538     else {
1539         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1540
1541         /*
1542          * 8.3.6.2.1 Transform Type Selection
1543          * If variable-sized transform coding is not enabled,
1544          * then the 8x8 transform shall be used for all blocks.
1545          * it is also MFX_VC1_PIC_STATE requirement.
1546          */
1547         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1548             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1549             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1550         }
1551     }
1552
1553     if (picture_type == GEN7_VC1_B_PICTURE) {
1554         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1555
1556         obj_surface = decode_state->reference_objects[1];
1557
1558         if (obj_surface)
1559             gen7_vc1_surface = obj_surface->private_data;
1560
1561         if (!gen7_vc1_surface || 
1562             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1563              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1564             dmv_surface_valid = 0;
1565         else
1566             dmv_surface_valid = 1;
1567     }
1568
1569     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1570
1571     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1572         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1573     else {
1574         if (pic_param->picture_fields.bits.top_field_first)
1575             fcm = 2;
1576         else
1577             fcm = 3;
1578     }
1579
1580     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1581         brfd = pic_param->reference_fields.bits.reference_distance;
1582         brfd = (scale_factor * brfd) >> 8;
1583         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1584
1585         if (brfd < 0)
1586             brfd = 0;
1587     }
1588
1589     overlap = 0;
1590     if (profile != GEN7_VC1_ADVANCED_PROFILE){
1591         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1592             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1593             overlap = 1; 
1594         }
1595     }else {
1596         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1597              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1598               overlap = 1; 
1599         }
1600         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1601             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1602              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1603                 overlap = 1; 
1604              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1605                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1606                  overlap = 1;
1607              }
1608         }
1609     } 
1610
1611     assert(pic_param->conditional_overlap_flag < 3);
1612     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1613
1614     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1615         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1616          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1617         interpolation_mode = 9; /* Half-pel bilinear */
1618     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1619              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1620               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1621         interpolation_mode = 1; /* Half-pel bicubic */
1622     else
1623         interpolation_mode = 0; /* Quarter-pel bicubic */
1624
1625     BEGIN_BCS_BATCH(batch, 6);
1626     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1627     OUT_BCS_BATCH(batch,
1628                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1629                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1630     OUT_BCS_BATCH(batch,
1631                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1632                   dmv_surface_valid << 15 |
1633                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1634                   pic_param->rounding_control << 13 |
1635                   pic_param->sequence_fields.bits.syncmarker << 12 |
1636                   interpolation_mode << 8 |
1637                   0 << 7 | /* FIXME: scale up or down ??? */
1638                   pic_param->range_reduction_frame << 6 |
1639                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1640                   overlap << 4 |
1641                   !pic_param->picture_fields.bits.is_first_field << 3 |
1642                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1643     OUT_BCS_BATCH(batch,
1644                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1645                   picture_type << 26 |
1646                   fcm << 24 |
1647                   alt_pq << 16 |
1648                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1649                   scale_factor << 0);
1650     OUT_BCS_BATCH(batch,
1651                   unified_mv_mode << 28 |
1652                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1653                   pic_param->fast_uvmc_flag << 26 |
1654                   ref_field_pic_polarity << 25 |
1655                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1656                   pic_param->reference_fields.bits.reference_distance << 20 |
1657                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1658                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1659                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1660                   alt_pquant_edge_mask << 4 |
1661                   alt_pquant_config << 2 |
1662                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1663                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1664     OUT_BCS_BATCH(batch,
1665                   !!pic_param->bitplane_present.value << 31 |
1666                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1667                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1668                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1669                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1670                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1671                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1672                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1673                   pic_param->mv_fields.bits.mv_table << 20 |
1674                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1675                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1676                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1677                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1678                   pic_param->mb_mode_table << 8 |
1679                   trans_ac_y << 6 |
1680                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1681                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1682                   pic_param->cbp_table << 0);
1683     ADVANCE_BCS_BATCH(batch);
1684 }
1685
1686 static void
1687 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1688                              struct decode_state *decode_state,
1689                              struct gen7_mfd_context *gen7_mfd_context)
1690 {
1691     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1692     VAPictureParameterBufferVC1 *pic_param;
1693     int intensitycomp_single;
1694
1695     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1696     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1697
1698     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1699     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1700     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1701
1702     BEGIN_BCS_BATCH(batch, 6);
1703     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1704     OUT_BCS_BATCH(batch,
1705                   0 << 14 | /* FIXME: double ??? */
1706                   0 << 12 |
1707                   intensitycomp_single << 10 |
1708                   intensitycomp_single << 8 |
1709                   0 << 4 | /* FIXME: interlace mode */
1710                   0);
1711     OUT_BCS_BATCH(batch,
1712                   pic_param->luma_shift << 16 |
1713                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1714     OUT_BCS_BATCH(batch, 0);
1715     OUT_BCS_BATCH(batch, 0);
1716     OUT_BCS_BATCH(batch, 0);
1717     ADVANCE_BCS_BATCH(batch);
1718 }
1719
1720 static void
1721 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1722                               struct decode_state *decode_state,
1723                               struct gen7_mfd_context *gen7_mfd_context)
1724 {
1725     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1726     struct object_surface *obj_surface;
1727     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1728
1729     obj_surface = decode_state->render_object;
1730
1731     if (obj_surface && obj_surface->private_data) {
1732         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1733     }
1734
1735     obj_surface = decode_state->reference_objects[1];
1736
1737     if (obj_surface && obj_surface->private_data) {
1738         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1739     }
1740
1741     BEGIN_BCS_BATCH(batch, 7);
1742     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1743
1744     if (dmv_write_buffer)
1745         OUT_BCS_RELOC(batch, dmv_write_buffer,
1746                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1747                       0);
1748     else
1749         OUT_BCS_BATCH(batch, 0);
1750
1751     OUT_BCS_BATCH(batch, 0);
1752     OUT_BCS_BATCH(batch, 0);
1753
1754     if (dmv_read_buffer)
1755         OUT_BCS_RELOC(batch, dmv_read_buffer,
1756                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1757                       0);
1758     else
1759         OUT_BCS_BATCH(batch, 0);
1760     
1761     OUT_BCS_BATCH(batch, 0);
1762     OUT_BCS_BATCH(batch, 0);
1763                   
1764     ADVANCE_BCS_BATCH(batch);
1765 }
1766
1767 static int
1768 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1769 {
1770     int out_slice_data_bit_offset;
1771     int slice_header_size = in_slice_data_bit_offset / 8;
1772     int i, j;
1773
1774     if (profile != 3)
1775         out_slice_data_bit_offset = in_slice_data_bit_offset;
1776     else {
1777         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1778             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1779                 i++, j += 2;
1780             }
1781         }
1782
1783         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1784     }
1785
1786     return out_slice_data_bit_offset;
1787 }
1788
1789 static void
1790 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1791                         VAPictureParameterBufferVC1 *pic_param,
1792                         VASliceParameterBufferVC1 *slice_param,
1793                         VASliceParameterBufferVC1 *next_slice_param,
1794                         dri_bo *slice_data_bo,
1795                         struct gen7_mfd_context *gen7_mfd_context)
1796 {
1797     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1798     int next_slice_start_vert_pos;
1799     int macroblock_offset;
1800     uint8_t *slice_data = NULL;
1801
1802     dri_bo_map(slice_data_bo, 0);
1803     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1804     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1805                                                                slice_param->macroblock_offset,
1806                                                                pic_param->sequence_fields.bits.profile);
1807     dri_bo_unmap(slice_data_bo);
1808
1809     if (next_slice_param)
1810         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1811     else
1812         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1813
1814     BEGIN_BCS_BATCH(batch, 5);
1815     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1816     OUT_BCS_BATCH(batch, 
1817                   slice_param->slice_data_size - (macroblock_offset >> 3));
1818     OUT_BCS_BATCH(batch, 
1819                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1820     OUT_BCS_BATCH(batch,
1821                   slice_param->slice_vertical_position << 16 |
1822                   next_slice_start_vert_pos << 0);
1823     OUT_BCS_BATCH(batch,
1824                   (macroblock_offset & 0x7));
1825     ADVANCE_BCS_BATCH(batch);
1826 }
1827
1828 static void
1829 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1830                             struct decode_state *decode_state,
1831                             struct gen7_mfd_context *gen7_mfd_context)
1832 {
1833     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1834     VAPictureParameterBufferVC1 *pic_param;
1835     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1836     dri_bo *slice_data_bo;
1837     int i, j;
1838
1839     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1840     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1841
1842     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1843     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1844     intel_batchbuffer_emit_mi_flush(batch);
1845     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1846     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1847     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1848     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1849     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1850     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1851     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1852
1853     for (j = 0; j < decode_state->num_slice_params; j++) {
1854         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1855         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1856         slice_data_bo = decode_state->slice_datas[j]->bo;
1857         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1858
1859         if (j == decode_state->num_slice_params - 1)
1860             next_slice_group_param = NULL;
1861         else
1862             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1863
1864         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1865             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1866
1867             if (i < decode_state->slice_params[j]->num_elements - 1)
1868                 next_slice_param = slice_param + 1;
1869             else
1870                 next_slice_param = next_slice_group_param;
1871
1872             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1873             slice_param++;
1874         }
1875     }
1876
1877     intel_batchbuffer_end_atomic(batch);
1878     intel_batchbuffer_flush(batch);
1879 }
1880
1881 static void
1882 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1883                           struct decode_state *decode_state,
1884                           struct gen7_mfd_context *gen7_mfd_context)
1885 {
1886     struct object_surface *obj_surface;
1887     VAPictureParameterBufferJPEGBaseline *pic_param;
1888     int subsampling = SUBSAMPLE_YUV420;
1889
1890     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1891
1892     if (pic_param->num_components == 1)
1893         subsampling = SUBSAMPLE_YUV400;
1894     else if (pic_param->num_components == 3) {
1895         int h1 = pic_param->components[0].h_sampling_factor;
1896         int h2 = pic_param->components[1].h_sampling_factor;
1897         int h3 = pic_param->components[2].h_sampling_factor;
1898         int v1 = pic_param->components[0].v_sampling_factor;
1899         int v2 = pic_param->components[1].v_sampling_factor;
1900         int v3 = pic_param->components[2].v_sampling_factor;
1901
1902         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1903             v1 == 2 && v2 == 1 && v3 == 1)
1904             subsampling = SUBSAMPLE_YUV420;
1905         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1906                  v1 == 1 && v2 == 1 && v3 == 1)
1907             subsampling = SUBSAMPLE_YUV422H;
1908         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1909                  v1 == 1 && v2 == 1 && v3 == 1)
1910             subsampling = SUBSAMPLE_YUV444;
1911         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1912                  v1 == 1 && v2 == 1 && v3 == 1)
1913             subsampling = SUBSAMPLE_YUV411;
1914         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1915                  v1 == 2 && v2 == 1 && v3 == 1)
1916             subsampling = SUBSAMPLE_YUV422V;
1917         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1918                  v1 == 2 && v2 == 2 && v3 == 2)
1919             subsampling = SUBSAMPLE_YUV422H;
1920         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1921                  v1 == 2 && v2 == 1 && v3 == 1)
1922             subsampling = SUBSAMPLE_YUV422V;
1923         else
1924             assert(0);
1925     } else {
1926         assert(0);
1927     }
1928
1929     /* Current decoded picture */
1930     obj_surface = decode_state->render_object;
1931     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling);
1932
1933     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1934     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1935     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1936     gen7_mfd_context->pre_deblocking_output.valid = 1;
1937
1938     gen7_mfd_context->post_deblocking_output.bo = NULL;
1939     gen7_mfd_context->post_deblocking_output.valid = 0;
1940
1941     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1942     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1943
1944     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1945     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1946
1947     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1948     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1949
1950     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1951     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1952
1953     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1954     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1955 }
1956
1957 static const int va_to_gen7_jpeg_rotation[4] = {
1958     GEN7_JPEG_ROTATION_0,
1959     GEN7_JPEG_ROTATION_90,
1960     GEN7_JPEG_ROTATION_180,
1961     GEN7_JPEG_ROTATION_270
1962 };
1963
1964 static void
1965 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1966                         struct decode_state *decode_state,
1967                         struct gen7_mfd_context *gen7_mfd_context)
1968 {
1969     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1970     VAPictureParameterBufferJPEGBaseline *pic_param;
1971     int chroma_type = GEN7_YUV420;
1972     int frame_width_in_blks;
1973     int frame_height_in_blks;
1974
1975     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1976     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1977
1978     if (pic_param->num_components == 1)
1979         chroma_type = GEN7_YUV400;
1980     else if (pic_param->num_components == 3) {
1981         int h1 = pic_param->components[0].h_sampling_factor;
1982         int h2 = pic_param->components[1].h_sampling_factor;
1983         int h3 = pic_param->components[2].h_sampling_factor;
1984         int v1 = pic_param->components[0].v_sampling_factor;
1985         int v2 = pic_param->components[1].v_sampling_factor;
1986         int v3 = pic_param->components[2].v_sampling_factor;
1987
1988         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1989             v1 == 2 && v2 == 1 && v3 == 1)
1990             chroma_type = GEN7_YUV420;
1991         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1992                  v1 == 1 && v2 == 1 && v3 == 1)
1993             chroma_type = GEN7_YUV422H_2Y;
1994         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1995                  v1 == 1 && v2 == 1 && v3 == 1)
1996             chroma_type = GEN7_YUV444;
1997         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1998                  v1 == 1 && v2 == 1 && v3 == 1)
1999             chroma_type = GEN7_YUV411;
2000         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2001                  v1 == 2 && v2 == 1 && v3 == 1)
2002             chroma_type = GEN7_YUV422V_2Y;
2003         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2004                  v1 == 2 && v2 == 2 && v3 == 2)
2005             chroma_type = GEN7_YUV422H_4Y;
2006         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2007                  v1 == 2 && v2 == 1 && v3 == 1)
2008             chroma_type = GEN7_YUV422V_4Y;
2009         else
2010             assert(0);
2011     }
2012
2013     if (chroma_type == GEN7_YUV400 ||
2014         chroma_type == GEN7_YUV444 ||
2015         chroma_type == GEN7_YUV422V_2Y) {
2016         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2017         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2018     } else if (chroma_type == GEN7_YUV411) {
2019         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2020         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2021     } else {
2022         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2023         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2024     }
2025
2026     BEGIN_BCS_BATCH(batch, 3);
2027     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2028     OUT_BCS_BATCH(batch,
2029                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2030                   (chroma_type << 0));
2031     OUT_BCS_BATCH(batch,
2032                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2033                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2034     ADVANCE_BCS_BATCH(batch);
2035 }
2036
2037 static const int va_to_gen7_jpeg_hufftable[2] = {
2038     MFX_HUFFTABLE_ID_Y,
2039     MFX_HUFFTABLE_ID_UV
2040 };
2041
2042 static void
2043 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2044                                struct decode_state *decode_state,
2045                                struct gen7_mfd_context *gen7_mfd_context,
2046                                int num_tables)
2047 {
2048     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2049     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2050     int index;
2051
2052     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2053         return;
2054
2055     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2056
2057     for (index = 0; index < num_tables; index++) {
2058         int id = va_to_gen7_jpeg_hufftable[index];
2059         if (!huffman_table->load_huffman_table[index])
2060             continue;
2061         BEGIN_BCS_BATCH(batch, 53);
2062         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2063         OUT_BCS_BATCH(batch, id);
2064         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2065         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2066         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2067         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2068         ADVANCE_BCS_BATCH(batch);
2069     }
2070 }
2071
2072 static const int va_to_gen7_jpeg_qm[5] = {
2073     -1,
2074     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2075     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2076     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2077     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2078 };
2079
2080 static void
2081 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2082                        struct decode_state *decode_state,
2083                        struct gen7_mfd_context *gen7_mfd_context)
2084 {
2085     VAPictureParameterBufferJPEGBaseline *pic_param;
2086     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2087     int index;
2088
2089     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2090         return;
2091
2092     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2093     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2094
2095     assert(pic_param->num_components <= 3);
2096
2097     for (index = 0; index < pic_param->num_components; index++) {
2098         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2099         int qm_type;
2100         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2101         unsigned char raster_qm[64];
2102         int j;
2103
2104         if (id > 4 || id < 1)
2105             continue;
2106
2107         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2108             continue;
2109
2110         qm_type = va_to_gen7_jpeg_qm[id];
2111
2112         for (j = 0; j < 64; j++)
2113             raster_qm[zigzag_direct[j]] = qm[j];
2114
2115         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2116     }
2117 }
2118
2119 static void
2120 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2121                          VAPictureParameterBufferJPEGBaseline *pic_param,
2122                          VASliceParameterBufferJPEGBaseline *slice_param,
2123                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2124                          dri_bo *slice_data_bo,
2125                          struct gen7_mfd_context *gen7_mfd_context)
2126 {
2127     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2128     int scan_component_mask = 0;
2129     int i;
2130
2131     assert(slice_param->num_components > 0);
2132     assert(slice_param->num_components < 4);
2133     assert(slice_param->num_components <= pic_param->num_components);
2134
2135     for (i = 0; i < slice_param->num_components; i++) {
2136         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2137         case 1:
2138             scan_component_mask |= (1 << 0);
2139             break;
2140         case 2:
2141             scan_component_mask |= (1 << 1);
2142             break;
2143         case 3:
2144             scan_component_mask |= (1 << 2);
2145             break;
2146         default:
2147             assert(0);
2148             break;
2149         }
2150     }
2151
2152     BEGIN_BCS_BATCH(batch, 6);
2153     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2154     OUT_BCS_BATCH(batch, 
2155                   slice_param->slice_data_size);
2156     OUT_BCS_BATCH(batch, 
2157                   slice_param->slice_data_offset);
2158     OUT_BCS_BATCH(batch,
2159                   slice_param->slice_horizontal_position << 16 |
2160                   slice_param->slice_vertical_position << 0);
2161     OUT_BCS_BATCH(batch,
2162                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2163                   (scan_component_mask << 27) |                 /* scan components */
2164                   (0 << 26) |   /* disable interrupt allowed */
2165                   (slice_param->num_mcus << 0));                /* MCU count */
2166     OUT_BCS_BATCH(batch,
2167                   (slice_param->restart_interval << 0));    /* RestartInterval */
2168     ADVANCE_BCS_BATCH(batch);
2169 }
2170
2171 /* Workaround for JPEG decoding on Ivybridge */
2172 #ifdef JPEG_WA
2173
2174 VAStatus 
2175 i965_DestroySurfaces(VADriverContextP ctx,
2176                      VASurfaceID *surface_list,
2177                      int num_surfaces);
2178 VAStatus 
2179 i965_CreateSurfaces(VADriverContextP ctx,
2180                     int width,
2181                     int height,
2182                     int format,
2183                     int num_surfaces,
2184                     VASurfaceID *surfaces);
2185
2186 static struct {
2187     int width;
2188     int height;
2189     unsigned char data[32];
2190     int data_size;
2191     int data_bit_offset;
2192     int qp;
2193 } gen7_jpeg_wa_clip = {
2194     16,
2195     16,
2196     {
2197         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2198         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2199     },
2200     14,
2201     40,
2202     28,
2203 };
2204
2205 static void
2206 gen8_jpeg_wa_init(VADriverContextP ctx,
2207                   struct gen7_mfd_context *gen7_mfd_context)
2208 {
2209     struct i965_driver_data *i965 = i965_driver_data(ctx);
2210     VAStatus status;
2211     struct object_surface *obj_surface;
2212
2213     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2214         i965_DestroySurfaces(ctx,
2215                              &gen7_mfd_context->jpeg_wa_surface_id,
2216                              1);
2217
2218     status = i965_CreateSurfaces(ctx,
2219                                  gen7_jpeg_wa_clip.width,
2220                                  gen7_jpeg_wa_clip.height,
2221                                  VA_RT_FORMAT_YUV420,
2222                                  1,
2223                                  &gen7_mfd_context->jpeg_wa_surface_id);
2224     assert(status == VA_STATUS_SUCCESS);
2225
2226     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2227     assert(obj_surface);
2228     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
2229     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2230
2231     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2232         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2233                                                                "JPEG WA data",
2234                                                                0x1000,
2235                                                                0x1000);
2236         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2237                        0,
2238                        gen7_jpeg_wa_clip.data_size,
2239                        gen7_jpeg_wa_clip.data);
2240     }
2241 }
2242
2243 static void
2244 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2245                               struct gen7_mfd_context *gen7_mfd_context)
2246 {
2247     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2248
2249     BEGIN_BCS_BATCH(batch, 5);
2250     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2251     OUT_BCS_BATCH(batch,
2252                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2253                   (MFD_MODE_VLD << 15) | /* VLD mode */
2254                   (0 << 10) | /* disable Stream-Out */
2255                   (0 << 9)  | /* Post Deblocking Output */
2256                   (1 << 8)  | /* Pre Deblocking Output */
2257                   (0 << 5)  | /* not in stitch mode */
2258                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2259                   (MFX_FORMAT_AVC << 0));
2260     OUT_BCS_BATCH(batch,
2261                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2262                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2263                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2264                   (0 << 1)  |
2265                   (0 << 0));
2266     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2267     OUT_BCS_BATCH(batch, 0); /* reserved */
2268     ADVANCE_BCS_BATCH(batch);
2269 }
2270
2271 static void
2272 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2273                            struct gen7_mfd_context *gen7_mfd_context)
2274 {
2275     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2276     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2277
2278     BEGIN_BCS_BATCH(batch, 6);
2279     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2280     OUT_BCS_BATCH(batch, 0);
2281     OUT_BCS_BATCH(batch,
2282                   ((obj_surface->orig_width - 1) << 18) |
2283                   ((obj_surface->orig_height - 1) << 4));
2284     OUT_BCS_BATCH(batch,
2285                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2286                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2287                   (0 << 22) | /* surface object control state, ignored */
2288                   ((obj_surface->width - 1) << 3) | /* pitch */
2289                   (0 << 2)  | /* must be 0 */
2290                   (1 << 1)  | /* must be tiled */
2291                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2292     OUT_BCS_BATCH(batch,
2293                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2294                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2295     OUT_BCS_BATCH(batch,
2296                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2297                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2298     ADVANCE_BCS_BATCH(batch);
2299 }
2300
2301 static void
2302 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2303                                  struct gen7_mfd_context *gen7_mfd_context)
2304 {
2305     struct i965_driver_data *i965 = i965_driver_data(ctx);
2306     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2307     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2308     dri_bo *intra_bo;
2309     int i;
2310
2311     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2312                             "intra row store",
2313                             128 * 64,
2314                             0x1000);
2315
2316     BEGIN_BCS_BATCH(batch, 61);
2317     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2318     OUT_BCS_RELOC(batch,
2319                   obj_surface->bo,
2320                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2321                   0);
2322         OUT_BCS_BATCH(batch, 0);
2323         OUT_BCS_BATCH(batch, 0);
2324     
2325
2326     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2327         OUT_BCS_BATCH(batch, 0);
2328         OUT_BCS_BATCH(batch, 0);
2329
2330         /* uncompressed-video & stream out 7-12 */
2331     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2332     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2333         OUT_BCS_BATCH(batch, 0);
2334         OUT_BCS_BATCH(batch, 0);
2335         OUT_BCS_BATCH(batch, 0);
2336         OUT_BCS_BATCH(batch, 0);
2337
2338         /* the DW 13-15 is for intra row store scratch */
2339     OUT_BCS_RELOC(batch,
2340                   intra_bo,
2341                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2342                   0);
2343         OUT_BCS_BATCH(batch, 0);
2344         OUT_BCS_BATCH(batch, 0);
2345
2346         /* the DW 16-18 is for deblocking filter */ 
2347     OUT_BCS_BATCH(batch, 0);
2348         OUT_BCS_BATCH(batch, 0);
2349         OUT_BCS_BATCH(batch, 0);
2350
2351     /* DW 19..50 */
2352     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2353         OUT_BCS_BATCH(batch, 0);
2354         OUT_BCS_BATCH(batch, 0);
2355     }
2356     OUT_BCS_BATCH(batch, 0);
2357
2358         /* the DW52-54 is for mb status address */
2359     OUT_BCS_BATCH(batch, 0);
2360         OUT_BCS_BATCH(batch, 0);
2361         OUT_BCS_BATCH(batch, 0);
2362         /* the DW56-60 is for ILDB & second ILDB address */
2363     OUT_BCS_BATCH(batch, 0);
2364         OUT_BCS_BATCH(batch, 0);
2365         OUT_BCS_BATCH(batch, 0);
2366     OUT_BCS_BATCH(batch, 0);
2367         OUT_BCS_BATCH(batch, 0);
2368         OUT_BCS_BATCH(batch, 0);
2369
2370     ADVANCE_BCS_BATCH(batch);
2371
2372     dri_bo_unreference(intra_bo);
2373 }
2374
2375 static void
2376 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2377                                      struct gen7_mfd_context *gen7_mfd_context)
2378 {
2379     struct i965_driver_data *i965 = i965_driver_data(ctx);
2380     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2381     dri_bo *bsd_mpc_bo, *mpr_bo;
2382
2383     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2384                               "bsd mpc row store",
2385                               11520, /* 1.5 * 120 * 64 */
2386                               0x1000);
2387
2388     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2389                           "mpr row store",
2390                           7680, /* 1. 0 * 120 * 64 */
2391                           0x1000);
2392
2393     BEGIN_BCS_BATCH(batch, 10);
2394     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2395
2396     OUT_BCS_RELOC(batch,
2397                   bsd_mpc_bo,
2398                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2399                   0);
2400
2401     OUT_BCS_BATCH(batch, 0);
2402     OUT_BCS_BATCH(batch, 0);
2403
2404     OUT_BCS_RELOC(batch,
2405                   mpr_bo,
2406                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2407                   0);
2408     OUT_BCS_BATCH(batch, 0);
2409     OUT_BCS_BATCH(batch, 0);
2410
2411     OUT_BCS_BATCH(batch, 0);
2412     OUT_BCS_BATCH(batch, 0);
2413     OUT_BCS_BATCH(batch, 0);
2414
2415     ADVANCE_BCS_BATCH(batch);
2416
2417     dri_bo_unreference(bsd_mpc_bo);
2418     dri_bo_unreference(mpr_bo);
2419 }
2420
2421 static void
2422 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2423                           struct gen7_mfd_context *gen7_mfd_context)
2424 {
2425
2426 }
2427
2428 static void
2429 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2430                            struct gen7_mfd_context *gen7_mfd_context)
2431 {
2432     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2433     int img_struct = 0;
2434     int mbaff_frame_flag = 0;
2435     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2436
2437     BEGIN_BCS_BATCH(batch, 16);
2438     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2439     OUT_BCS_BATCH(batch, 
2440                   width_in_mbs * height_in_mbs);
2441     OUT_BCS_BATCH(batch, 
2442                   ((height_in_mbs - 1) << 16) | 
2443                   ((width_in_mbs - 1) << 0));
2444     OUT_BCS_BATCH(batch, 
2445                   (0 << 24) |
2446                   (0 << 16) |
2447                   (0 << 14) |
2448                   (0 << 13) |
2449                   (0 << 12) | /* differ from GEN6 */
2450                   (0 << 10) |
2451                   (img_struct << 8));
2452     OUT_BCS_BATCH(batch,
2453                   (1 << 10) | /* 4:2:0 */
2454                   (1 << 7) |  /* CABAC */
2455                   (0 << 6) |
2456                   (0 << 5) |
2457                   (0 << 4) |
2458                   (0 << 3) |
2459                   (1 << 2) |
2460                   (mbaff_frame_flag << 1) |
2461                   (0 << 0));
2462     OUT_BCS_BATCH(batch, 0);
2463     OUT_BCS_BATCH(batch, 0);
2464     OUT_BCS_BATCH(batch, 0);
2465     OUT_BCS_BATCH(batch, 0);
2466     OUT_BCS_BATCH(batch, 0);
2467     OUT_BCS_BATCH(batch, 0);
2468     OUT_BCS_BATCH(batch, 0);
2469     OUT_BCS_BATCH(batch, 0);
2470     OUT_BCS_BATCH(batch, 0);
2471     OUT_BCS_BATCH(batch, 0);
2472     OUT_BCS_BATCH(batch, 0);
2473     ADVANCE_BCS_BATCH(batch);
2474 }
2475
2476 static void
2477 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2478                                   struct gen7_mfd_context *gen7_mfd_context)
2479 {
2480     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2481     int i;
2482
2483     BEGIN_BCS_BATCH(batch, 71);
2484     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2485
2486     /* reference surfaces 0..15 */
2487     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2488         OUT_BCS_BATCH(batch, 0); /* top */
2489         OUT_BCS_BATCH(batch, 0); /* bottom */
2490     }
2491         
2492         OUT_BCS_BATCH(batch, 0);
2493
2494     /* the current decoding frame/field */
2495     OUT_BCS_BATCH(batch, 0); /* top */
2496     OUT_BCS_BATCH(batch, 0);
2497     OUT_BCS_BATCH(batch, 0);
2498
2499     /* POC List */
2500     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2501         OUT_BCS_BATCH(batch, 0);
2502         OUT_BCS_BATCH(batch, 0);
2503     }
2504
2505     OUT_BCS_BATCH(batch, 0);
2506     OUT_BCS_BATCH(batch, 0);
2507
2508     ADVANCE_BCS_BATCH(batch);
2509 }
2510
2511 static void
2512 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2513                                      struct gen7_mfd_context *gen7_mfd_context)
2514 {
2515     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2516
2517     BEGIN_BCS_BATCH(batch, 11);
2518     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2519     OUT_BCS_RELOC(batch,
2520                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2521                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2522                   0);
2523     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2524     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2525     OUT_BCS_BATCH(batch, 0);
2526     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2527     OUT_BCS_BATCH(batch, 0);
2528     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2529     OUT_BCS_BATCH(batch, 0);
2530     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2531     OUT_BCS_BATCH(batch, 0);
2532     ADVANCE_BCS_BATCH(batch);
2533 }
2534
2535 static void
2536 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2537                             struct gen7_mfd_context *gen7_mfd_context)
2538 {
2539     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2540
2541     /* the input bitsteam format on GEN7 differs from GEN6 */
2542     BEGIN_BCS_BATCH(batch, 6);
2543     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2544     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2545     OUT_BCS_BATCH(batch, 0);
2546     OUT_BCS_BATCH(batch,
2547                   (0 << 31) |
2548                   (0 << 14) |
2549                   (0 << 12) |
2550                   (0 << 10) |
2551                   (0 << 8));
2552     OUT_BCS_BATCH(batch,
2553                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2554                   (0 << 5)  |
2555                   (0 << 4)  |
2556                   (1 << 3) | /* LastSlice Flag */
2557                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2558     OUT_BCS_BATCH(batch, 0);
2559     ADVANCE_BCS_BATCH(batch);
2560 }
2561
2562 static void
2563 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2564                              struct gen7_mfd_context *gen7_mfd_context)
2565 {
2566     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2567     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2568     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2569     int first_mb_in_slice = 0;
2570     int slice_type = SLICE_TYPE_I;
2571
2572     BEGIN_BCS_BATCH(batch, 11);
2573     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2574     OUT_BCS_BATCH(batch, slice_type);
2575     OUT_BCS_BATCH(batch, 
2576                   (num_ref_idx_l1 << 24) |
2577                   (num_ref_idx_l0 << 16) |
2578                   (0 << 8) |
2579                   (0 << 0));
2580     OUT_BCS_BATCH(batch, 
2581                   (0 << 29) |
2582                   (1 << 27) |   /* disable Deblocking */
2583                   (0 << 24) |
2584                   (gen7_jpeg_wa_clip.qp << 16) |
2585                   (0 << 8) |
2586                   (0 << 0));
2587     OUT_BCS_BATCH(batch, 
2588                   (slice_ver_pos << 24) |
2589                   (slice_hor_pos << 16) | 
2590                   (first_mb_in_slice << 0));
2591     OUT_BCS_BATCH(batch,
2592                   (next_slice_ver_pos << 16) |
2593                   (next_slice_hor_pos << 0));
2594     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2595     OUT_BCS_BATCH(batch, 0);
2596     OUT_BCS_BATCH(batch, 0);
2597     OUT_BCS_BATCH(batch, 0);
2598     OUT_BCS_BATCH(batch, 0);
2599     ADVANCE_BCS_BATCH(batch);
2600 }
2601
2602 static void
2603 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2604                  struct gen7_mfd_context *gen7_mfd_context)
2605 {
2606     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2607     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2608     intel_batchbuffer_emit_mi_flush(batch);
2609     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2610     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2611     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2612     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2613     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2614     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2615     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2616
2617     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2618     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2619     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2620 }
2621
2622 #endif
2623
2624 void
2625 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2626                              struct decode_state *decode_state,
2627                              struct gen7_mfd_context *gen7_mfd_context)
2628 {
2629     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2630     VAPictureParameterBufferJPEGBaseline *pic_param;
2631     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2632     dri_bo *slice_data_bo;
2633     int i, j, max_selector = 0;
2634
2635     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2636     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2637
2638     /* Currently only support Baseline DCT */
2639     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2640     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2641 #ifdef JPEG_WA
2642     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2643 #endif
2644     intel_batchbuffer_emit_mi_flush(batch);
2645     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2646     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2647     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2648     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2649     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2650
2651     for (j = 0; j < decode_state->num_slice_params; j++) {
2652         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2653         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2654         slice_data_bo = decode_state->slice_datas[j]->bo;
2655         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2656
2657         if (j == decode_state->num_slice_params - 1)
2658             next_slice_group_param = NULL;
2659         else
2660             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2661
2662         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2663             int component;
2664
2665             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2666
2667             if (i < decode_state->slice_params[j]->num_elements - 1)
2668                 next_slice_param = slice_param + 1;
2669             else
2670                 next_slice_param = next_slice_group_param;
2671
2672             for (component = 0; component < slice_param->num_components; component++) {
2673                 if (max_selector < slice_param->components[component].dc_table_selector)
2674                     max_selector = slice_param->components[component].dc_table_selector;
2675
2676                 if (max_selector < slice_param->components[component].ac_table_selector)
2677                     max_selector = slice_param->components[component].ac_table_selector;
2678             }
2679
2680             slice_param++;
2681         }
2682     }
2683
2684     assert(max_selector < 2);
2685     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2686
2687     for (j = 0; j < decode_state->num_slice_params; j++) {
2688         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2689         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2690         slice_data_bo = decode_state->slice_datas[j]->bo;
2691         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2692
2693         if (j == decode_state->num_slice_params - 1)
2694             next_slice_group_param = NULL;
2695         else
2696             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2697
2698         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2699             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2700
2701             if (i < decode_state->slice_params[j]->num_elements - 1)
2702                 next_slice_param = slice_param + 1;
2703             else
2704                 next_slice_param = next_slice_group_param;
2705
2706             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2707             slice_param++;
2708         }
2709     }
2710
2711     intel_batchbuffer_end_atomic(batch);
2712     intel_batchbuffer_flush(batch);
2713 }
2714
2715 static const int vp8_dc_qlookup[128] =
2716 {
2717       4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2718      18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2719      29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2720      44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2721      59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2722      75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2723      91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2724     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2725 };
2726
2727 static const int vp8_ac_qlookup[128] =
2728 {
2729       4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2730      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2731      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2732      52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2733      78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2734     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2735     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2736     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2737 };
2738
2739 static inline unsigned int vp8_clip_quantization_index(unsigned int index)
2740 {
2741     if(index > 127)
2742         return 127;
2743     else if(index <0)
2744         return 0;
2745 }
2746
2747 static void
2748 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2749                           struct decode_state *decode_state,
2750                           struct gen7_mfd_context *gen7_mfd_context)
2751 {
2752     struct object_surface *obj_surface;
2753     struct i965_driver_data *i965 = i965_driver_data(ctx);
2754     dri_bo *bo;
2755     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2756     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2757     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2758
2759     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2760     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2761
2762     /* Current decoded picture */
2763     obj_surface = decode_state->render_object;
2764     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2765
2766     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2767     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2768     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2769     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2770
2771     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2772     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2773     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2774     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2775
2776     /* The same as AVC */
2777     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2778     bo = dri_bo_alloc(i965->intel.bufmgr,
2779                       "intra row store",
2780                       width_in_mbs * 64,
2781                       0x1000);
2782     assert(bo);
2783     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2784     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2785
2786     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2787     bo = dri_bo_alloc(i965->intel.bufmgr,
2788                       "deblocking filter row store",
2789                       width_in_mbs * 64 * 4,
2790                       0x1000);
2791     assert(bo);
2792     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2793     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2794
2795     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2796     bo = dri_bo_alloc(i965->intel.bufmgr,
2797                       "bsd mpc row store",
2798                       width_in_mbs * 64 * 2,
2799                       0x1000);
2800     assert(bo);
2801     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2802     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2803
2804     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2805     bo = dri_bo_alloc(i965->intel.bufmgr,
2806                       "mpr row store",
2807                       width_in_mbs * 64 * 2,
2808                       0x1000);
2809     assert(bo);
2810     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2811     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2812
2813     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2814 }
2815
2816 static void
2817 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2818                        struct decode_state *decode_state,
2819                        struct gen7_mfd_context *gen7_mfd_context)
2820 {
2821     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2822     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2823     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2824     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2825     dri_bo *probs_bo = decode_state->probability_data->bo;
2826     int i, j,log2num;
2827     unsigned int quantization_value[4][6];
2828
2829     log2num = (int)log2(slice_param->num_of_partitions - 1);
2830
2831     BEGIN_BCS_BATCH(batch, 38);
2832     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2833     OUT_BCS_BATCH(batch,
2834                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2835                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2836     OUT_BCS_BATCH(batch,
2837                   log2num << 24 |
2838                   pic_param->pic_fields.bits.sharpness_level << 16 |
2839                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2840                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2841                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2842                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2843                   pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2844                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2845                   0 << 7 | /* segmentation id streamin disabled */
2846                   0 << 6 | /* segmentation id streamout disabled */
2847                   pic_param->pic_fields.bits.key_frame << 5 |
2848                   pic_param->pic_fields.bits.filter_type << 4 |
2849                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2850                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2851
2852     OUT_BCS_BATCH(batch,
2853                   pic_param->loop_filter_level[3] << 24 |
2854                   pic_param->loop_filter_level[2] << 16 |
2855                   pic_param->loop_filter_level[1] <<  8 |
2856                   pic_param->loop_filter_level[0] <<  0);
2857
2858     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2859     for (i = 0; i < 4; i++) {
2860                 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2861                 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2862                 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2863                 /* 101581>>16 is equivalent to 155/100 */
2864                 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2865                 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2866                 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2867
2868                 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2869                 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2870
2871                 OUT_BCS_BATCH(batch,
2872                       quantization_value[i][0] << 16 | /* Y1AC */
2873                       quantization_value[i][1] <<  0); /* Y1DC */
2874         OUT_BCS_BATCH(batch,
2875                       quantization_value[i][5] << 16 | /* UVAC */
2876                       quantization_value[i][4] <<  0); /* UVDC */
2877         OUT_BCS_BATCH(batch,
2878                       quantization_value[i][3] << 16 | /* Y2AC */
2879                       quantization_value[i][2] <<  0); /* Y2DC */
2880     }
2881
2882     /* CoeffProbability table for non-key frame, DW16-DW18 */
2883     if (probs_bo) {
2884         OUT_BCS_RELOC(batch, probs_bo,
2885                       0, I915_GEM_DOMAIN_INSTRUCTION,
2886                       0);
2887         OUT_BCS_BATCH(batch, 0);
2888         OUT_BCS_BATCH(batch, 0);
2889     } else {
2890         OUT_BCS_BATCH(batch, 0);
2891         OUT_BCS_BATCH(batch, 0);
2892         OUT_BCS_BATCH(batch, 0);
2893     }
2894
2895     OUT_BCS_BATCH(batch,
2896                   pic_param->mb_segment_tree_probs[2] << 16 |
2897                   pic_param->mb_segment_tree_probs[1] <<  8 |
2898                   pic_param->mb_segment_tree_probs[0] <<  0);
2899
2900     OUT_BCS_BATCH(batch,
2901                   pic_param->prob_skip_false << 24 |
2902                   pic_param->prob_intra      << 16 |
2903                   pic_param->prob_last       <<  8 |
2904                   pic_param->prob_gf         <<  0);
2905
2906     OUT_BCS_BATCH(batch,
2907                   pic_param->y_mode_probs[3] << 24 |
2908                   pic_param->y_mode_probs[2] << 16 |
2909                   pic_param->y_mode_probs[1] <<  8 |
2910                   pic_param->y_mode_probs[0] <<  0);
2911
2912     OUT_BCS_BATCH(batch,
2913                   pic_param->uv_mode_probs[2] << 16 |
2914                   pic_param->uv_mode_probs[1] <<  8 |
2915                   pic_param->uv_mode_probs[0] <<  0);
2916     
2917     /* MV update value, DW23-DW32 */
2918     for (i = 0; i < 2; i++) {
2919         for (j = 0; j < 20; j += 4) {
2920             OUT_BCS_BATCH(batch,
2921                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2922                           pic_param->mv_probs[i][j + 2] << 16 |
2923                           pic_param->mv_probs[i][j + 1] <<  8 |
2924                           pic_param->mv_probs[i][j + 0] <<  0);
2925         }
2926     }
2927
2928     OUT_BCS_BATCH(batch,
2929                   pic_param->loop_filter_deltas_ref_frame[3] << 24 |
2930                   pic_param->loop_filter_deltas_ref_frame[2] << 16 |
2931                   pic_param->loop_filter_deltas_ref_frame[1] <<  8 |
2932                   pic_param->loop_filter_deltas_ref_frame[0] <<  0);
2933
2934     OUT_BCS_BATCH(batch,
2935                   pic_param->loop_filter_deltas_mode[3] << 24 |
2936                   pic_param->loop_filter_deltas_mode[2] << 16 |
2937                   pic_param->loop_filter_deltas_mode[1] <<  8 |
2938                   pic_param->loop_filter_deltas_mode[0] <<  0);
2939
2940     /* segmentation id stream base address, DW35-DW37 */
2941     OUT_BCS_BATCH(batch, 0);
2942     OUT_BCS_BATCH(batch, 0);
2943     OUT_BCS_BATCH(batch, 0);
2944     ADVANCE_BCS_BATCH(batch);
2945 }
2946
2947 static void
2948 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2949                         VAPictureParameterBufferVP8 *pic_param,
2950                         VASliceParameterBufferVP8 *slice_param,
2951                         dri_bo *slice_data_bo,
2952                         struct gen7_mfd_context *gen7_mfd_context)
2953 {
2954     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2955     int i, log2num;
2956     unsigned int offset = slice_param->slice_data_offset;
2957
2958     assert(slice_param->num_of_partitions >= 2);
2959     assert(slice_param->num_of_partitions <= 9);
2960
2961     log2num = (int)log2(slice_param->num_of_partitions - 1);
2962
2963     BEGIN_BCS_BATCH(batch, 22);
2964     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2965     OUT_BCS_BATCH(batch,
2966                   pic_param->bool_coder_ctx.count << 16 | /* Partition 0 CPBAC Entropy Count */
2967                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
2968                   log2num << 4 |
2969                   (slice_param->macroblock_offset & 0x7));
2970     OUT_BCS_BATCH(batch,
2971                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2972                   0);
2973
2974     for (i = 0; i < 9; i++) {
2975         if (i < slice_param->num_of_partitions) {
2976             OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
2977             OUT_BCS_BATCH(batch, offset);
2978         } else {
2979             OUT_BCS_BATCH(batch, 0);
2980             OUT_BCS_BATCH(batch, 0);
2981         }
2982
2983         offset += slice_param->partition_size[i];
2984     }
2985
2986     OUT_BCS_BATCH(batch,
2987                   1 << 31 | /* concealment method */
2988                   0);
2989
2990     ADVANCE_BCS_BATCH(batch);
2991 }
2992
2993 void
2994 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
2995                             struct decode_state *decode_state,
2996                             struct gen7_mfd_context *gen7_mfd_context)
2997 {
2998     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2999     VAPictureParameterBufferVP8 *pic_param;
3000     VASliceParameterBufferVP8 *slice_param;
3001     dri_bo *slice_data_bo;
3002
3003     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3004     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3005
3006     /* one slice per frame */
3007     assert(decode_state->num_slice_params == 1);
3008     assert(decode_state->slice_params[0]->num_elements == 1);
3009     assert(decode_state->slice_params && decode_state->slice_params[0]->buffer);
3010     assert(decode_state->slice_datas[0]->bo);
3011
3012     assert(decode_state->probability_data);
3013
3014     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3015     slice_data_bo = decode_state->slice_datas[0]->bo;
3016
3017     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3018     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3019     intel_batchbuffer_emit_mi_flush(batch);
3020     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3021     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3022     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3023     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3024     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3025     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3026     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3027     intel_batchbuffer_end_atomic(batch);
3028     intel_batchbuffer_flush(batch);
3029 }
3030
3031 static VAStatus
3032 gen8_mfd_decode_picture(VADriverContextP ctx, 
3033                         VAProfile profile, 
3034                         union codec_state *codec_state,
3035                         struct hw_context *hw_context)
3036
3037 {
3038     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3039     struct decode_state *decode_state = &codec_state->decode;
3040     VAStatus vaStatus;
3041
3042     assert(gen7_mfd_context);
3043
3044     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3045
3046     if (vaStatus != VA_STATUS_SUCCESS)
3047         goto out;
3048
3049     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3050
3051     switch (profile) {
3052     case VAProfileMPEG2Simple:
3053     case VAProfileMPEG2Main:
3054         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3055         break;
3056         
3057     case VAProfileH264ConstrainedBaseline:
3058     case VAProfileH264Main:
3059     case VAProfileH264High:
3060         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3061         break;
3062
3063     case VAProfileVC1Simple:
3064     case VAProfileVC1Main:
3065     case VAProfileVC1Advanced:
3066         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3067         break;
3068
3069     case VAProfileJPEGBaseline:
3070         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3071         break;
3072
3073     case VAProfileVP8Version0_3:
3074         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3075         break;
3076
3077     default:
3078         assert(0);
3079         break;
3080     }
3081
3082     vaStatus = VA_STATUS_SUCCESS;
3083
3084 out:
3085     return vaStatus;
3086 }
3087
3088 static void
3089 gen8_mfd_context_destroy(void *hw_context)
3090 {
3091     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3092
3093     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3094     gen7_mfd_context->post_deblocking_output.bo = NULL;
3095
3096     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3097     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3098
3099     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3100     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3101
3102     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3103     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3104
3105     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3106     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3107
3108     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3109     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3110
3111     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3112     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3113
3114     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3115
3116     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3117     free(gen7_mfd_context);
3118 }
3119
3120 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3121                                     struct gen7_mfd_context *gen7_mfd_context)
3122 {
3123     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3124     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3125     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3126     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3127 }
3128
3129 struct hw_context *
3130 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3131 {
3132     struct intel_driver_data *intel = intel_driver_data(ctx);
3133     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3134     int i;
3135
3136     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3137     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3138     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3139
3140     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3141         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3142         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3143     }
3144
3145     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3146
3147     switch (obj_config->profile) {
3148     case VAProfileMPEG2Simple:
3149     case VAProfileMPEG2Main:
3150         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3151         break;
3152
3153     case VAProfileH264ConstrainedBaseline:
3154     case VAProfileH264Main:
3155     case VAProfileH264High:
3156         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3157         break;
3158     default:
3159         break;
3160     }
3161     return (struct hw_context *)gen7_mfd_context;
3162 }