Use the right parameters to initialize bit rate context
[platform/upstream/libva-intel-driver.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx, 
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77         assert((obj_surface->size & 0x3f) == 0);
78         obj_surface->private_data = gen7_avc_surface;
79     }
80
81     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
82                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
83
84     if (gen7_avc_surface->dmv_top == NULL) {
85         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86                                                  "direct mv w/r buffer",
87                                                  width_in_mbs * height_in_mbs * 128,
88                                                  0x1000);
89         assert(gen7_avc_surface->dmv_top);
90     }
91
92     if (gen7_avc_surface->dmv_bottom_flag &&
93         gen7_avc_surface->dmv_bottom == NULL) {
94         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
95                                                     "direct mv w/r buffer",
96                                                     width_in_mbs * height_in_mbs * 128,                                                    
97                                                     0x1000);
98         assert(gen7_avc_surface->dmv_bottom);
99     }
100 }
101
102 static void
103 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
104                           struct decode_state *decode_state,
105                           int standard_select,
106                           struct gen7_mfd_context *gen7_mfd_context)
107 {
108     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
109
110     assert(standard_select == MFX_FORMAT_MPEG2 ||
111            standard_select == MFX_FORMAT_AVC ||
112            standard_select == MFX_FORMAT_VC1 ||
113            standard_select == MFX_FORMAT_JPEG ||
114            standard_select == MFX_FORMAT_VP8);
115
116     BEGIN_BCS_BATCH(batch, 5);
117     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
118     OUT_BCS_BATCH(batch,
119                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
120                   (MFD_MODE_VLD << 15) | /* VLD mode */
121                   (0 << 10) | /* disable Stream-Out */
122                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
123                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
124                   (0 << 5)  | /* not in stitch mode */
125                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
126                   (standard_select << 0));
127     OUT_BCS_BATCH(batch,
128                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
129                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
130                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
131                   (0 << 1)  |
132                   (0 << 0));
133     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
134     OUT_BCS_BATCH(batch, 0); /* reserved */
135     ADVANCE_BCS_BATCH(batch);
136 }
137
138 static void
139 gen8_mfd_surface_state(VADriverContextP ctx,
140                        struct decode_state *decode_state,
141                        int standard_select,
142                        struct gen7_mfd_context *gen7_mfd_context)
143 {
144     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
145     struct object_surface *obj_surface = decode_state->render_object;
146     unsigned int y_cb_offset;
147     unsigned int y_cr_offset;
148
149     assert(obj_surface);
150
151     y_cb_offset = obj_surface->y_cb_offset;
152     y_cr_offset = obj_surface->y_cr_offset;
153
154     BEGIN_BCS_BATCH(batch, 6);
155     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
156     OUT_BCS_BATCH(batch, 0);
157     OUT_BCS_BATCH(batch,
158                   ((obj_surface->orig_height - 1) << 18) |
159                   ((obj_surface->orig_width - 1) << 4));
160     OUT_BCS_BATCH(batch,
161                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
162                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
163                   (0 << 22) | /* surface object control state, ignored */
164                   ((obj_surface->width - 1) << 3) | /* pitch */
165                   (0 << 2)  | /* must be 0 */
166                   (1 << 1)  | /* must be tiled */
167                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
168     OUT_BCS_BATCH(batch,
169                   (0 << 16) | /* X offset for U(Cb), must be 0 */
170                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
171     OUT_BCS_BATCH(batch,
172                   (0 << 16) | /* X offset for V(Cr), must be 0 */
173                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
174     ADVANCE_BCS_BATCH(batch);
175 }
176
177 static void
178 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
179                              struct decode_state *decode_state,
180                              int standard_select,
181                              struct gen7_mfd_context *gen7_mfd_context)
182 {
183     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
184     int i;
185
186     BEGIN_BCS_BATCH(batch, 61);
187     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188         /* Pre-deblock 1-3 */
189     if (gen7_mfd_context->pre_deblocking_output.valid)
190         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
191                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192                       0);
193     else
194         OUT_BCS_BATCH(batch, 0);
195
196         OUT_BCS_BATCH(batch, 0);
197         OUT_BCS_BATCH(batch, 0);
198         /* Post-debloing 4-6 */
199     if (gen7_mfd_context->post_deblocking_output.valid)
200         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
201                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
202                       0);
203     else
204         OUT_BCS_BATCH(batch, 0);
205
206         OUT_BCS_BATCH(batch, 0);
207         OUT_BCS_BATCH(batch, 0);
208
209         /* uncompressed-video & stream out 7-12 */
210     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
211     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
212         OUT_BCS_BATCH(batch, 0);
213         OUT_BCS_BATCH(batch, 0);
214         OUT_BCS_BATCH(batch, 0);
215         OUT_BCS_BATCH(batch, 0);
216
217         /* intra row-store scratch 13-15 */
218     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
219         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
220                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
221                       0);
222     else
223         OUT_BCS_BATCH(batch, 0);
224
225         OUT_BCS_BATCH(batch, 0);
226         OUT_BCS_BATCH(batch, 0);
227         /* deblocking-filter-row-store 16-18 */
228     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
229         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
230                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
231                       0);
232     else
233         OUT_BCS_BATCH(batch, 0);
234         OUT_BCS_BATCH(batch, 0);
235         OUT_BCS_BATCH(batch, 0);
236
237     /* DW 19..50 */
238     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
239         struct object_surface *obj_surface;
240
241         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
242             gen7_mfd_context->reference_surface[i].obj_surface &&
243             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
244             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
245
246             OUT_BCS_RELOC(batch, obj_surface->bo,
247                           I915_GEM_DOMAIN_INSTRUCTION, 0,
248                           0);
249         } else {
250             OUT_BCS_BATCH(batch, 0);
251         }
252         
253         OUT_BCS_BATCH(batch, 0);
254     }
255     
256     /* reference property 51 */
257     OUT_BCS_BATCH(batch, 0);  
258         
259     /* Macroblock status & ILDB 52-57 */
260     OUT_BCS_BATCH(batch, 0);
261     OUT_BCS_BATCH(batch, 0);
262     OUT_BCS_BATCH(batch, 0);
263     OUT_BCS_BATCH(batch, 0);
264     OUT_BCS_BATCH(batch, 0);
265     OUT_BCS_BATCH(batch, 0);
266
267     /* the second Macroblock status 58-60 */    
268     OUT_BCS_BATCH(batch, 0);
269     OUT_BCS_BATCH(batch, 0);
270     OUT_BCS_BATCH(batch, 0);
271
272     ADVANCE_BCS_BATCH(batch);
273 }
274
275 static void
276 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
277                                  dri_bo *slice_data_bo,
278                                  int standard_select,
279                                  struct gen7_mfd_context *gen7_mfd_context)
280 {
281     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
282
283     BEGIN_BCS_BATCH(batch, 26);
284     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
285         /* MFX In BS 1-5 */
286     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
287     OUT_BCS_BATCH(batch, 0);
288     OUT_BCS_BATCH(batch, 0);
289         /* Upper bound 4-5 */   
290     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
291     OUT_BCS_BATCH(batch, 0);
292
293         /* MFX indirect MV 6-10 */
294     OUT_BCS_BATCH(batch, 0);
295     OUT_BCS_BATCH(batch, 0);
296     OUT_BCS_BATCH(batch, 0);
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299         
300         /* MFX IT_COFF 11-15 */
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306
307         /* MFX IT_DBLK 16-20 */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313
314         /* MFX PAK_BSE object for encoder 21-25 */
315     OUT_BCS_BATCH(batch, 0);
316     OUT_BCS_BATCH(batch, 0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319     OUT_BCS_BATCH(batch, 0);
320
321     ADVANCE_BCS_BATCH(batch);
322 }
323
324 static void
325 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
326                                  struct decode_state *decode_state,
327                                  int standard_select,
328                                  struct gen7_mfd_context *gen7_mfd_context)
329 {
330     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
331
332     BEGIN_BCS_BATCH(batch, 10);
333     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
334
335     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
336         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
337                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
338                       0);
339         else
340                 OUT_BCS_BATCH(batch, 0);
341                 
342     OUT_BCS_BATCH(batch, 0);
343     OUT_BCS_BATCH(batch, 0);
344         /* MPR Row Store Scratch buffer 4-6 */
345     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
346         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
347                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
348                       0);
349     else
350         OUT_BCS_BATCH(batch, 0);
351
352     OUT_BCS_BATCH(batch, 0);
353     OUT_BCS_BATCH(batch, 0);
354
355         /* Bitplane 7-9 */ 
356     if (gen7_mfd_context->bitplane_read_buffer.valid)
357         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
358                       I915_GEM_DOMAIN_INSTRUCTION, 0,
359                       0);
360     else
361         OUT_BCS_BATCH(batch, 0);
362     OUT_BCS_BATCH(batch, 0);
363     OUT_BCS_BATCH(batch, 0);
364     ADVANCE_BCS_BATCH(batch);
365 }
366
367 static void
368 gen8_mfd_qm_state(VADriverContextP ctx,
369                   int qm_type,
370                   unsigned char *qm,
371                   int qm_length,
372                   struct gen7_mfd_context *gen7_mfd_context)
373 {
374     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
375     unsigned int qm_buffer[16];
376
377     assert(qm_length <= 16 * 4);
378     memcpy(qm_buffer, qm, qm_length);
379
380     BEGIN_BCS_BATCH(batch, 18);
381     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
382     OUT_BCS_BATCH(batch, qm_type << 0);
383     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
384     ADVANCE_BCS_BATCH(batch);
385 }
386
387 static void
388 gen8_mfd_avc_img_state(VADriverContextP ctx,
389                        struct decode_state *decode_state,
390                        struct gen7_mfd_context *gen7_mfd_context)
391 {
392     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
393     int img_struct;
394     int mbaff_frame_flag;
395     unsigned int width_in_mbs, height_in_mbs;
396     VAPictureParameterBufferH264 *pic_param;
397
398     assert(decode_state->pic_param && decode_state->pic_param->buffer);
399     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
400     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
401
402     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
403         img_struct = 1;
404     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
405         img_struct = 3;
406     else
407         img_struct = 0;
408
409     if ((img_struct & 0x1) == 0x1) {
410         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
411     } else {
412         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
413     }
414
415     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
416         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
417         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
418     } else {
419         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
420     }
421
422     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
423                         !pic_param->pic_fields.bits.field_pic_flag);
424
425     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
426     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
427
428     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
429     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
430            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
431     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
432
433     BEGIN_BCS_BATCH(batch, 17);
434     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
435     OUT_BCS_BATCH(batch, 
436                   (width_in_mbs * height_in_mbs - 1));
437     OUT_BCS_BATCH(batch, 
438                   ((height_in_mbs - 1) << 16) | 
439                   ((width_in_mbs - 1) << 0));
440     OUT_BCS_BATCH(batch, 
441                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
442                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
443                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
444                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
445                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
446                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
447                   (img_struct << 8));
448     OUT_BCS_BATCH(batch,
449                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
450                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
451                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
452                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
453                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
454                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
455                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
456                   (mbaff_frame_flag << 1) |
457                   (pic_param->pic_fields.bits.field_pic_flag << 0));
458     OUT_BCS_BATCH(batch, 0);
459     OUT_BCS_BATCH(batch, 0);
460     OUT_BCS_BATCH(batch, 0);
461     OUT_BCS_BATCH(batch, 0);
462     OUT_BCS_BATCH(batch, 0);
463     OUT_BCS_BATCH(batch, 0);
464     OUT_BCS_BATCH(batch, 0);
465     OUT_BCS_BATCH(batch, 0);
466     OUT_BCS_BATCH(batch, 0);
467     OUT_BCS_BATCH(batch, 0);
468     OUT_BCS_BATCH(batch, 0);
469     OUT_BCS_BATCH(batch, 0);
470     ADVANCE_BCS_BATCH(batch);
471 }
472
473 static void
474 gen8_mfd_avc_qm_state(VADriverContextP ctx,
475                       struct decode_state *decode_state,
476                       struct gen7_mfd_context *gen7_mfd_context)
477 {
478     VAIQMatrixBufferH264 *iq_matrix;
479     VAPictureParameterBufferH264 *pic_param;
480
481     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
482         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
483     else
484         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
485
486     assert(decode_state->pic_param && decode_state->pic_param->buffer);
487     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
488
489     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
490     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
491
492     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
493         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
494         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
495     }
496 }
497
498 static void
499 gen8_mfd_avc_picid_state(VADriverContextP ctx,
500                       struct decode_state *decode_state,
501                       struct gen7_mfd_context *gen7_mfd_context)
502 {
503     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
504
505     BEGIN_BCS_BATCH(batch, 10);
506     OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
507     OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
508     OUT_BCS_BATCH(batch, 0);
509     OUT_BCS_BATCH(batch, 0);
510     OUT_BCS_BATCH(batch, 0);
511     OUT_BCS_BATCH(batch, 0);
512     OUT_BCS_BATCH(batch, 0);
513     OUT_BCS_BATCH(batch, 0);
514     OUT_BCS_BATCH(batch, 0);
515     OUT_BCS_BATCH(batch, 0);
516     ADVANCE_BCS_BATCH(batch);
517 }
518
519 static void
520 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
521                               struct decode_state *decode_state,
522                               VAPictureParameterBufferH264 *pic_param,
523                               VASliceParameterBufferH264 *slice_param,
524                               struct gen7_mfd_context *gen7_mfd_context)
525 {
526     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527     struct object_surface *obj_surface;
528     GenAvcSurface *gen7_avc_surface;
529     VAPictureH264 *va_pic;
530     int i, j;
531
532     BEGIN_BCS_BATCH(batch, 71);
533     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
534
535     /* reference surfaces 0..15 */
536     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538             gen7_mfd_context->reference_surface[i].obj_surface &&
539             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
540
541             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542             gen7_avc_surface = obj_surface->private_data;
543
544             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
545                           I915_GEM_DOMAIN_INSTRUCTION, 0,
546                           0);
547             OUT_BCS_BATCH(batch, 0);
548         } else {
549             OUT_BCS_BATCH(batch, 0);
550             OUT_BCS_BATCH(batch, 0);
551         }
552     }
553     
554     OUT_BCS_BATCH(batch, 0);
555
556     /* the current decoding frame/field */
557     va_pic = &pic_param->CurrPic;
558     obj_surface = decode_state->render_object;
559     assert(obj_surface->bo && obj_surface->private_data);
560     gen7_avc_surface = obj_surface->private_data;
561
562     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
563                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
564                   0);
565
566     OUT_BCS_BATCH(batch, 0);
567     OUT_BCS_BATCH(batch, 0);
568
569     /* POC List */
570     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
571         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
572             int found = 0;
573
574             assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
575
576             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
577                 va_pic = &pic_param->ReferenceFrames[j];
578                 
579                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
580                     continue;
581
582                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
583                     found = 1;
584                     break;
585                 }
586             }
587
588             assert(found == 1);
589             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
590             
591             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
592             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
593         } else {
594             OUT_BCS_BATCH(batch, 0);
595             OUT_BCS_BATCH(batch, 0);
596         }
597     }
598
599     va_pic = &pic_param->CurrPic;
600     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
601     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
602
603     ADVANCE_BCS_BATCH(batch);
604 }
605
606 static void
607 gen8_mfd_avc_slice_state(VADriverContextP ctx,
608                          VAPictureParameterBufferH264 *pic_param,
609                          VASliceParameterBufferH264 *slice_param,
610                          VASliceParameterBufferH264 *next_slice_param,
611                          struct gen7_mfd_context *gen7_mfd_context)
612 {
613     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
614     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
615     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
616     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
617     int num_ref_idx_l0, num_ref_idx_l1;
618     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
619                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
620     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
621     int slice_type;
622
623     if (slice_param->slice_type == SLICE_TYPE_I ||
624         slice_param->slice_type == SLICE_TYPE_SI) {
625         slice_type = SLICE_TYPE_I;
626     } else if (slice_param->slice_type == SLICE_TYPE_P ||
627                slice_param->slice_type == SLICE_TYPE_SP) {
628         slice_type = SLICE_TYPE_P;
629     } else { 
630         assert(slice_param->slice_type == SLICE_TYPE_B);
631         slice_type = SLICE_TYPE_B;
632     }
633
634     if (slice_type == SLICE_TYPE_I) {
635         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
636         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
637         num_ref_idx_l0 = 0;
638         num_ref_idx_l1 = 0;
639     } else if (slice_type == SLICE_TYPE_P) {
640         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
641         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
642         num_ref_idx_l1 = 0;
643     } else {
644         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
645         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
646     }
647
648     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
649     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
650     slice_ver_pos = first_mb_in_slice / width_in_mbs;
651
652     if (next_slice_param) {
653         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
654         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
655         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
656     } else {
657         next_slice_hor_pos = 0;
658         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
659     }
660
661     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663     OUT_BCS_BATCH(batch, slice_type);
664     OUT_BCS_BATCH(batch, 
665                   (num_ref_idx_l1 << 24) |
666                   (num_ref_idx_l0 << 16) |
667                   (slice_param->chroma_log2_weight_denom << 8) |
668                   (slice_param->luma_log2_weight_denom << 0));
669     OUT_BCS_BATCH(batch, 
670                   (slice_param->direct_spatial_mv_pred_flag << 29) |
671                   (slice_param->disable_deblocking_filter_idc << 27) |
672                   (slice_param->cabac_init_idc << 24) |
673                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
676     OUT_BCS_BATCH(batch, 
677                   (slice_ver_pos << 24) |
678                   (slice_hor_pos << 16) | 
679                   (first_mb_in_slice << 0));
680     OUT_BCS_BATCH(batch,
681                   (next_slice_ver_pos << 16) |
682                   (next_slice_hor_pos << 0));
683     OUT_BCS_BATCH(batch, 
684                   (next_slice_param == NULL) << 19); /* last slice flag */
685     OUT_BCS_BATCH(batch, 0);
686     OUT_BCS_BATCH(batch, 0);
687     OUT_BCS_BATCH(batch, 0);
688     OUT_BCS_BATCH(batch, 0);
689     ADVANCE_BCS_BATCH(batch);
690 }
691
692 static inline void
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694                            VAPictureParameterBufferH264 *pic_param,
695                            VASliceParameterBufferH264 *slice_param,
696                            struct gen7_mfd_context *gen7_mfd_context)
697 {
698     gen6_send_avc_ref_idx_state(
699         gen7_mfd_context->base.batch,
700         slice_param,
701         gen7_mfd_context->reference_surface
702     );
703 }
704
705 static void
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707                                 VAPictureParameterBufferH264 *pic_param,
708                                 VASliceParameterBufferH264 *slice_param,
709                                 struct gen7_mfd_context *gen7_mfd_context)
710 {
711     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712     int i, j, num_weight_offset_table = 0;
713     short weightoffsets[32 * 6];
714
715     if ((slice_param->slice_type == SLICE_TYPE_P ||
716          slice_param->slice_type == SLICE_TYPE_SP) &&
717         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718         num_weight_offset_table = 1;
719     }
720     
721     if ((slice_param->slice_type == SLICE_TYPE_B) &&
722         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723         num_weight_offset_table = 2;
724     }
725
726     for (i = 0; i < num_weight_offset_table; i++) {
727         BEGIN_BCS_BATCH(batch, 98);
728         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729         OUT_BCS_BATCH(batch, i);
730
731         if (i == 0) {
732             for (j = 0; j < 32; j++) {
733                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
739             }
740         } else {
741             for (j = 0; j < 32; j++) {
742                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
748             }
749         }
750
751         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752         ADVANCE_BCS_BATCH(batch);
753     }
754 }
755
756 static void
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758                         VAPictureParameterBufferH264 *pic_param,
759                         VASliceParameterBufferH264 *slice_param,
760                         dri_bo *slice_data_bo,
761                         VASliceParameterBufferH264 *next_slice_param,
762                         struct gen7_mfd_context *gen7_mfd_context)
763 {
764     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
766                                                             slice_param,
767                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
768
769     /* the input bitsteam format on GEN7 differs from GEN6 */
770     BEGIN_BCS_BATCH(batch, 6);
771     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
772     OUT_BCS_BATCH(batch, 
773                   (slice_param->slice_data_size));
774     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
775     OUT_BCS_BATCH(batch,
776                   (0 << 31) |
777                   (0 << 14) |
778                   (0 << 12) |
779                   (0 << 10) |
780                   (0 << 8));
781     OUT_BCS_BATCH(batch,
782                   ((slice_data_bit_offset >> 3) << 16) |
783                   (1 << 7)  |
784                   (0 << 5)  |
785                   (0 << 4)  |
786                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787                   (slice_data_bit_offset & 0x7));
788     OUT_BCS_BATCH(batch, 0);
789     ADVANCE_BCS_BATCH(batch);
790 }
791
792 static inline void
793 gen8_mfd_avc_context_init(
794     VADriverContextP         ctx,
795     struct gen7_mfd_context *gen7_mfd_context
796 )
797 {
798     /* Initialize flat scaling lists */
799     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
800 }
801
802 static void
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804                          struct decode_state *decode_state,
805                          struct gen7_mfd_context *gen7_mfd_context)
806 {
807     VAPictureParameterBufferH264 *pic_param;
808     VASliceParameterBufferH264 *slice_param;
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810     struct object_surface *obj_surface;
811     dri_bo *bo;
812     int i, j, enable_avc_ildb = 0;
813     unsigned int width_in_mbs, height_in_mbs;
814
815     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
818
819         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821             assert((slice_param->slice_type == SLICE_TYPE_I) ||
822                    (slice_param->slice_type == SLICE_TYPE_SI) ||
823                    (slice_param->slice_type == SLICE_TYPE_P) ||
824                    (slice_param->slice_type == SLICE_TYPE_SP) ||
825                    (slice_param->slice_type == SLICE_TYPE_B));
826
827             if (slice_param->disable_deblocking_filter_idc != 1) {
828                 enable_avc_ildb = 1;
829                 break;
830             }
831
832             slice_param++;
833         }
834     }
835
836     assert(decode_state->pic_param && decode_state->pic_param->buffer);
837     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838     intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
839     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
840     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
841     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
842     assert(height_in_mbs > 0 && height_in_mbs <= 256);
843
844     /* Current decoded picture */
845     obj_surface = decode_state->render_object;
846     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
847     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
848     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
849
850     /* initial uv component for YUV400 case */
851     if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
852          unsigned int uv_offset = obj_surface->width * obj_surface->height; 
853          unsigned int uv_size   = obj_surface->width * obj_surface->height / 2; 
854
855          drm_intel_gem_bo_map_gtt(obj_surface->bo);
856          memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
857          drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
858     }
859
860     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
861
862     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
863     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
864     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
865     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
866
867     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
868     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
869     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
870     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
871
872     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
873     bo = dri_bo_alloc(i965->intel.bufmgr,
874                       "intra row store",
875                       width_in_mbs * 64,
876                       0x1000);
877     assert(bo);
878     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
879     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
880
881     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
882     bo = dri_bo_alloc(i965->intel.bufmgr,
883                       "deblocking filter row store",
884                       width_in_mbs * 64 * 4,
885                       0x1000);
886     assert(bo);
887     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
888     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
889
890     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
891     bo = dri_bo_alloc(i965->intel.bufmgr,
892                       "bsd mpc row store",
893                       width_in_mbs * 64 * 2,
894                       0x1000);
895     assert(bo);
896     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
897     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
898
899     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
900     bo = dri_bo_alloc(i965->intel.bufmgr,
901                       "mpr row store",
902                       width_in_mbs * 64 * 2,
903                       0x1000);
904     assert(bo);
905     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
906     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
907
908     gen7_mfd_context->bitplane_read_buffer.valid = 0;
909 }
910
911 static void
912 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
913                             struct decode_state *decode_state,
914                             struct gen7_mfd_context *gen7_mfd_context)
915 {
916     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
917     VAPictureParameterBufferH264 *pic_param;
918     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
919     dri_bo *slice_data_bo;
920     int i, j;
921
922     assert(decode_state->pic_param && decode_state->pic_param->buffer);
923     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
924     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
925
926     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
927     intel_batchbuffer_emit_mi_flush(batch);
928     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
929     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
930     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
931     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
932     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
933     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
934     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
935
936     for (j = 0; j < decode_state->num_slice_params; j++) {
937         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
938         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
939         slice_data_bo = decode_state->slice_datas[j]->bo;
940         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
941
942         if (j == decode_state->num_slice_params - 1)
943             next_slice_group_param = NULL;
944         else
945             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
946
947         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
948             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
949             assert((slice_param->slice_type == SLICE_TYPE_I) ||
950                    (slice_param->slice_type == SLICE_TYPE_SI) ||
951                    (slice_param->slice_type == SLICE_TYPE_P) ||
952                    (slice_param->slice_type == SLICE_TYPE_SP) ||
953                    (slice_param->slice_type == SLICE_TYPE_B));
954
955             if (i < decode_state->slice_params[j]->num_elements - 1)
956                 next_slice_param = slice_param + 1;
957             else
958                 next_slice_param = next_slice_group_param;
959
960             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
961             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
962             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
963             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
964             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
965             slice_param++;
966         }
967     }
968
969     intel_batchbuffer_end_atomic(batch);
970     intel_batchbuffer_flush(batch);
971 }
972
973 static void
974 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
975                            struct decode_state *decode_state,
976                            struct gen7_mfd_context *gen7_mfd_context)
977 {
978     VAPictureParameterBufferMPEG2 *pic_param;
979     struct i965_driver_data *i965 = i965_driver_data(ctx);
980     struct object_surface *obj_surface;
981     dri_bo *bo;
982     unsigned int width_in_mbs;
983
984     assert(decode_state->pic_param && decode_state->pic_param->buffer);
985     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
986     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
987
988     mpeg2_set_reference_surfaces(
989         ctx,
990         gen7_mfd_context->reference_surface,
991         decode_state,
992         pic_param
993     );
994
995     /* Current decoded picture */
996     obj_surface = decode_state->render_object;
997     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
998
999     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1000     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1001     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1002     gen7_mfd_context->pre_deblocking_output.valid = 1;
1003
1004     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1005     bo = dri_bo_alloc(i965->intel.bufmgr,
1006                       "bsd mpc row store",
1007                       width_in_mbs * 96,
1008                       0x1000);
1009     assert(bo);
1010     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1011     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1012
1013     gen7_mfd_context->post_deblocking_output.valid = 0;
1014     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1015     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1016     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1017     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1018 }
1019
1020 static void
1021 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1022                          struct decode_state *decode_state,
1023                          struct gen7_mfd_context *gen7_mfd_context)
1024 {
1025     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1026     VAPictureParameterBufferMPEG2 *pic_param;
1027     unsigned int slice_concealment_disable_bit = 0;
1028
1029     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1030     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1031
1032     slice_concealment_disable_bit = 1;
1033
1034     BEGIN_BCS_BATCH(batch, 13);
1035     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1036     OUT_BCS_BATCH(batch,
1037                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1038                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1039                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1040                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1041                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1042                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1043                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1044                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1045                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1046                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1047                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1048                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1049     OUT_BCS_BATCH(batch,
1050                   pic_param->picture_coding_type << 9);
1051     OUT_BCS_BATCH(batch,
1052                   (slice_concealment_disable_bit << 31) |
1053                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1054                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1055     OUT_BCS_BATCH(batch, 0);
1056     OUT_BCS_BATCH(batch, 0);
1057     OUT_BCS_BATCH(batch, 0);
1058     OUT_BCS_BATCH(batch, 0);
1059     OUT_BCS_BATCH(batch, 0);
1060     OUT_BCS_BATCH(batch, 0);
1061     OUT_BCS_BATCH(batch, 0);
1062     OUT_BCS_BATCH(batch, 0);
1063     OUT_BCS_BATCH(batch, 0);
1064     ADVANCE_BCS_BATCH(batch);
1065 }
1066
1067 static void
1068 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1069                         struct decode_state *decode_state,
1070                         struct gen7_mfd_context *gen7_mfd_context)
1071 {
1072     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1073     int i, j;
1074
1075     /* Update internal QM state */
1076     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1077         VAIQMatrixBufferMPEG2 * const iq_matrix =
1078             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1079
1080         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1081             iq_matrix->load_intra_quantiser_matrix) {
1082             gen_iq_matrix->load_intra_quantiser_matrix =
1083                 iq_matrix->load_intra_quantiser_matrix;
1084             if (iq_matrix->load_intra_quantiser_matrix) {
1085                 for (j = 0; j < 64; j++)
1086                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1087                         iq_matrix->intra_quantiser_matrix[j];
1088             }
1089         }
1090
1091         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1092             iq_matrix->load_non_intra_quantiser_matrix) {
1093             gen_iq_matrix->load_non_intra_quantiser_matrix =
1094                 iq_matrix->load_non_intra_quantiser_matrix;
1095             if (iq_matrix->load_non_intra_quantiser_matrix) {
1096                 for (j = 0; j < 64; j++)
1097                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1098                         iq_matrix->non_intra_quantiser_matrix[j];
1099             }
1100         }
1101     }
1102
1103     /* Commit QM state to HW */
1104     for (i = 0; i < 2; i++) {
1105         unsigned char *qm = NULL;
1106         int qm_type;
1107
1108         if (i == 0) {
1109             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1110                 qm = gen_iq_matrix->intra_quantiser_matrix;
1111                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1112             }
1113         } else {
1114             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1115                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1116                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1117             }
1118         }
1119
1120         if (!qm)
1121             continue;
1122
1123         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1124     }
1125 }
1126
1127 static void
1128 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1129                           VAPictureParameterBufferMPEG2 *pic_param,
1130                           VASliceParameterBufferMPEG2 *slice_param,
1131                           VASliceParameterBufferMPEG2 *next_slice_param,
1132                           struct gen7_mfd_context *gen7_mfd_context)
1133 {
1134     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1135     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1136     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1137
1138     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1139         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1140         is_field_pic = 1;
1141     is_field_pic_wa = is_field_pic &&
1142         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1143
1144     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1145     hpos0 = slice_param->slice_horizontal_position;
1146
1147     if (next_slice_param == NULL) {
1148         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1149         hpos1 = 0;
1150     } else {
1151         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1152         hpos1 = next_slice_param->slice_horizontal_position;
1153     }
1154
1155     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1156
1157     BEGIN_BCS_BATCH(batch, 5);
1158     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1159     OUT_BCS_BATCH(batch, 
1160                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1161     OUT_BCS_BATCH(batch, 
1162                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1163     OUT_BCS_BATCH(batch,
1164                   hpos0 << 24 |
1165                   vpos0 << 16 |
1166                   mb_count << 8 |
1167                   (next_slice_param == NULL) << 5 |
1168                   (next_slice_param == NULL) << 3 |
1169                   (slice_param->macroblock_offset & 0x7));
1170     OUT_BCS_BATCH(batch,
1171                   (slice_param->quantiser_scale_code << 24) |
1172                   (vpos1 << 8 | hpos1));
1173     ADVANCE_BCS_BATCH(batch);
1174 }
1175
1176 static void
1177 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1178                               struct decode_state *decode_state,
1179                               struct gen7_mfd_context *gen7_mfd_context)
1180 {
1181     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1182     VAPictureParameterBufferMPEG2 *pic_param;
1183     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1184     dri_bo *slice_data_bo;
1185     int i, j;
1186
1187     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1188     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1189
1190     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1191     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1192     intel_batchbuffer_emit_mi_flush(batch);
1193     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1194     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1195     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1196     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1197     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1198     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1199
1200     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1201         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1202             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1203
1204     for (j = 0; j < decode_state->num_slice_params; j++) {
1205         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1206         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1207         slice_data_bo = decode_state->slice_datas[j]->bo;
1208         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1209
1210         if (j == decode_state->num_slice_params - 1)
1211             next_slice_group_param = NULL;
1212         else
1213             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1214
1215         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1216             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1217
1218             if (i < decode_state->slice_params[j]->num_elements - 1)
1219                 next_slice_param = slice_param + 1;
1220             else
1221                 next_slice_param = next_slice_group_param;
1222
1223             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1224             slice_param++;
1225         }
1226     }
1227
1228     intel_batchbuffer_end_atomic(batch);
1229     intel_batchbuffer_flush(batch);
1230 }
1231
1232 static const int va_to_gen7_vc1_pic_type[5] = {
1233     GEN7_VC1_I_PICTURE,
1234     GEN7_VC1_P_PICTURE,
1235     GEN7_VC1_B_PICTURE,
1236     GEN7_VC1_BI_PICTURE,
1237     GEN7_VC1_P_PICTURE,
1238 };
1239
1240 static const int va_to_gen7_vc1_mv[4] = {
1241     1, /* 1-MV */
1242     2, /* 1-MV half-pel */
1243     3, /* 1-MV half-pef bilinear */
1244     0, /* Mixed MV */
1245 };
1246
1247 static const int b_picture_scale_factor[21] = {
1248     128, 85,  170, 64,  192,
1249     51,  102, 153, 204, 43,
1250     215, 37,  74,  111, 148,
1251     185, 222, 32,  96,  160, 
1252     224,
1253 };
1254
1255 static const int va_to_gen7_vc1_condover[3] = {
1256     0,
1257     2,
1258     3
1259 };
1260
1261 static const int va_to_gen7_vc1_profile[4] = {
1262     GEN7_VC1_SIMPLE_PROFILE,
1263     GEN7_VC1_MAIN_PROFILE,
1264     GEN7_VC1_RESERVED_PROFILE,
1265     GEN7_VC1_ADVANCED_PROFILE
1266 };
1267
1268 static void 
1269 gen8_mfd_free_vc1_surface(void **data)
1270 {
1271     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1272
1273     if (!gen7_vc1_surface)
1274         return;
1275
1276     dri_bo_unreference(gen7_vc1_surface->dmv);
1277     free(gen7_vc1_surface);
1278     *data = NULL;
1279 }
1280
1281 static void
1282 gen8_mfd_init_vc1_surface(VADriverContextP ctx, 
1283                           VAPictureParameterBufferVC1 *pic_param,
1284                           struct object_surface *obj_surface)
1285 {
1286     struct i965_driver_data *i965 = i965_driver_data(ctx);
1287     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1288     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1289     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1290
1291     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1292
1293     if (!gen7_vc1_surface) {
1294         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1295         assert((obj_surface->size & 0x3f) == 0);
1296         obj_surface->private_data = gen7_vc1_surface;
1297     }
1298
1299     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1300
1301     if (gen7_vc1_surface->dmv == NULL) {
1302         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1303                                              "direct mv w/r buffer",
1304                                              width_in_mbs * height_in_mbs * 64,
1305                                              0x1000);
1306     }
1307 }
1308
1309 static void
1310 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1311                          struct decode_state *decode_state,
1312                          struct gen7_mfd_context *gen7_mfd_context)
1313 {
1314     VAPictureParameterBufferVC1 *pic_param;
1315     struct i965_driver_data *i965 = i965_driver_data(ctx);
1316     struct object_surface *obj_surface;
1317     dri_bo *bo;
1318     int width_in_mbs;
1319     int picture_type;
1320
1321     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1322     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1323     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1324     picture_type = pic_param->picture_fields.bits.picture_type;
1325  
1326     intel_update_vc1_frame_store_index(ctx,
1327                                        decode_state,
1328                                        pic_param,
1329                                        gen7_mfd_context->reference_surface);
1330
1331     /* Current decoded picture */
1332     obj_surface = decode_state->render_object;
1333     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1334     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1335
1336     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1337     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1338     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1339     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1340
1341     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1342     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1343     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1344     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1345
1346     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1347     bo = dri_bo_alloc(i965->intel.bufmgr,
1348                       "intra row store",
1349                       width_in_mbs * 64,
1350                       0x1000);
1351     assert(bo);
1352     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1353     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1354
1355     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1356     bo = dri_bo_alloc(i965->intel.bufmgr,
1357                       "deblocking filter row store",
1358                       width_in_mbs * 7 * 64,
1359                       0x1000);
1360     assert(bo);
1361     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1362     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1363
1364     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1365     bo = dri_bo_alloc(i965->intel.bufmgr,
1366                       "bsd mpc row store",
1367                       width_in_mbs * 96,
1368                       0x1000);
1369     assert(bo);
1370     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1371     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1372
1373     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1374
1375     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1376     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1377     
1378     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1379         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1380         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1381         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1382         int src_w, src_h;
1383         uint8_t *src = NULL, *dst = NULL;
1384
1385         assert(decode_state->bit_plane->buffer);
1386         src = decode_state->bit_plane->buffer;
1387
1388         bo = dri_bo_alloc(i965->intel.bufmgr,
1389                           "VC-1 Bitplane",
1390                           bitplane_width * height_in_mbs,
1391                           0x1000);
1392         assert(bo);
1393         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1394
1395         dri_bo_map(bo, True);
1396         assert(bo->virtual);
1397         dst = bo->virtual;
1398
1399         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1400             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1401                 int src_index, dst_index;
1402                 int src_shift;
1403                 uint8_t src_value;
1404
1405                 src_index = (src_h * width_in_mbs + src_w) / 2;
1406                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1407                 src_value = ((src[src_index] >> src_shift) & 0xf);
1408
1409                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1410                     src_value |= 0x2;
1411                 }
1412
1413                 dst_index = src_w / 2;
1414                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1415             }
1416
1417             if (src_w & 1)
1418                 dst[src_w / 2] >>= 4;
1419
1420             dst += bitplane_width;
1421         }
1422
1423         dri_bo_unmap(bo);
1424     } else
1425         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1426 }
1427
1428 static void
1429 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1430                        struct decode_state *decode_state,
1431                        struct gen7_mfd_context *gen7_mfd_context)
1432 {
1433     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1434     VAPictureParameterBufferVC1 *pic_param;
1435     struct object_surface *obj_surface;
1436     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1437     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1438     int unified_mv_mode;
1439     int ref_field_pic_polarity = 0;
1440     int scale_factor = 0;
1441     int trans_ac_y = 0;
1442     int dmv_surface_valid = 0;
1443     int brfd = 0;
1444     int fcm = 0;
1445     int picture_type;
1446     int profile;
1447     int overlap;
1448     int interpolation_mode = 0;
1449
1450     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1451     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1452
1453     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1454     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1455     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1456     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1457     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1458     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1459     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1460     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1461
1462     if (dquant == 0) {
1463         alt_pquant_config = 0;
1464         alt_pquant_edge_mask = 0;
1465     } else if (dquant == 2) {
1466         alt_pquant_config = 1;
1467         alt_pquant_edge_mask = 0xf;
1468     } else {
1469         assert(dquant == 1);
1470         if (dquantfrm == 0) {
1471             alt_pquant_config = 0;
1472             alt_pquant_edge_mask = 0;
1473             alt_pq = 0;
1474         } else {
1475             assert(dquantfrm == 1);
1476             alt_pquant_config = 1;
1477
1478             switch (dqprofile) {
1479             case 3:
1480                 if (dqbilevel == 0) {
1481                     alt_pquant_config = 2;
1482                     alt_pquant_edge_mask = 0;
1483                 } else {
1484                     assert(dqbilevel == 1);
1485                     alt_pquant_config = 3;
1486                     alt_pquant_edge_mask = 0;
1487                 }
1488                 break;
1489                 
1490             case 0:
1491                 alt_pquant_edge_mask = 0xf;
1492                 break;
1493
1494             case 1:
1495                 if (dqdbedge == 3)
1496                     alt_pquant_edge_mask = 0x9;
1497                 else
1498                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1499
1500                 break;
1501
1502             case 2:
1503                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1504                 break;
1505
1506             default:
1507                 assert(0);
1508             }
1509         }
1510     }
1511
1512     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1513         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1514         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1515     } else {
1516         assert(pic_param->mv_fields.bits.mv_mode < 4);
1517         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1518     }
1519
1520     if (pic_param->sequence_fields.bits.interlace == 1 &&
1521         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1522         /* FIXME: calculate reference field picture polarity */
1523         assert(0);
1524         ref_field_pic_polarity = 0;
1525     }
1526
1527     if (pic_param->b_picture_fraction < 21)
1528         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1529
1530     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1531     
1532     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1533         picture_type == GEN7_VC1_I_PICTURE)
1534         picture_type = GEN7_VC1_BI_PICTURE;
1535
1536     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1537         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1538     else {
1539         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1540
1541         /*
1542          * 8.3.6.2.1 Transform Type Selection
1543          * If variable-sized transform coding is not enabled,
1544          * then the 8x8 transform shall be used for all blocks.
1545          * it is also MFX_VC1_PIC_STATE requirement.
1546          */
1547         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1548             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1549             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1550         }
1551     }
1552
1553     if (picture_type == GEN7_VC1_B_PICTURE) {
1554         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1555
1556         obj_surface = decode_state->reference_objects[1];
1557
1558         if (obj_surface)
1559             gen7_vc1_surface = obj_surface->private_data;
1560
1561         if (!gen7_vc1_surface || 
1562             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1563              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1564             dmv_surface_valid = 0;
1565         else
1566             dmv_surface_valid = 1;
1567     }
1568
1569     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1570
1571     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1572         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1573     else {
1574         if (pic_param->picture_fields.bits.top_field_first)
1575             fcm = 2;
1576         else
1577             fcm = 3;
1578     }
1579
1580     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1581         brfd = pic_param->reference_fields.bits.reference_distance;
1582         brfd = (scale_factor * brfd) >> 8;
1583         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1584
1585         if (brfd < 0)
1586             brfd = 0;
1587     }
1588
1589     overlap = 0;
1590     if (profile != GEN7_VC1_ADVANCED_PROFILE){
1591         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1592             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1593             overlap = 1; 
1594         }
1595     }else {
1596         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1597              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1598               overlap = 1; 
1599         }
1600         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1601             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1602              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1603                 overlap = 1; 
1604              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1605                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1606                  overlap = 1;
1607              }
1608         }
1609     } 
1610
1611     assert(pic_param->conditional_overlap_flag < 3);
1612     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1613
1614     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1615         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1616          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1617         interpolation_mode = 9; /* Half-pel bilinear */
1618     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1619              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1620               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1621         interpolation_mode = 1; /* Half-pel bicubic */
1622     else
1623         interpolation_mode = 0; /* Quarter-pel bicubic */
1624
1625     BEGIN_BCS_BATCH(batch, 6);
1626     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1627     OUT_BCS_BATCH(batch,
1628                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1629                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1630     OUT_BCS_BATCH(batch,
1631                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1632                   dmv_surface_valid << 15 |
1633                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1634                   pic_param->rounding_control << 13 |
1635                   pic_param->sequence_fields.bits.syncmarker << 12 |
1636                   interpolation_mode << 8 |
1637                   0 << 7 | /* FIXME: scale up or down ??? */
1638                   pic_param->range_reduction_frame << 6 |
1639                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1640                   overlap << 4 |
1641                   !pic_param->picture_fields.bits.is_first_field << 3 |
1642                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1643     OUT_BCS_BATCH(batch,
1644                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1645                   picture_type << 26 |
1646                   fcm << 24 |
1647                   alt_pq << 16 |
1648                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1649                   scale_factor << 0);
1650     OUT_BCS_BATCH(batch,
1651                   unified_mv_mode << 28 |
1652                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1653                   pic_param->fast_uvmc_flag << 26 |
1654                   ref_field_pic_polarity << 25 |
1655                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1656                   pic_param->reference_fields.bits.reference_distance << 20 |
1657                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1658                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1659                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1660                   alt_pquant_edge_mask << 4 |
1661                   alt_pquant_config << 2 |
1662                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1663                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1664     OUT_BCS_BATCH(batch,
1665                   !!pic_param->bitplane_present.value << 31 |
1666                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1667                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1668                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1669                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1670                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1671                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1672                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1673                   pic_param->mv_fields.bits.mv_table << 20 |
1674                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1675                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1676                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1677                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1678                   pic_param->mb_mode_table << 8 |
1679                   trans_ac_y << 6 |
1680                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1681                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1682                   pic_param->cbp_table << 0);
1683     ADVANCE_BCS_BATCH(batch);
1684 }
1685
1686 static void
1687 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1688                              struct decode_state *decode_state,
1689                              struct gen7_mfd_context *gen7_mfd_context)
1690 {
1691     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1692     VAPictureParameterBufferVC1 *pic_param;
1693     int intensitycomp_single;
1694
1695     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1696     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1697
1698     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1699     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1700     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1701
1702     BEGIN_BCS_BATCH(batch, 6);
1703     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1704     OUT_BCS_BATCH(batch,
1705                   0 << 14 | /* FIXME: double ??? */
1706                   0 << 12 |
1707                   intensitycomp_single << 10 |
1708                   intensitycomp_single << 8 |
1709                   0 << 4 | /* FIXME: interlace mode */
1710                   0);
1711     OUT_BCS_BATCH(batch,
1712                   pic_param->luma_shift << 16 |
1713                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1714     OUT_BCS_BATCH(batch, 0);
1715     OUT_BCS_BATCH(batch, 0);
1716     OUT_BCS_BATCH(batch, 0);
1717     ADVANCE_BCS_BATCH(batch);
1718 }
1719
1720 static void
1721 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1722                               struct decode_state *decode_state,
1723                               struct gen7_mfd_context *gen7_mfd_context)
1724 {
1725     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1726     struct object_surface *obj_surface;
1727     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1728
1729     obj_surface = decode_state->render_object;
1730
1731     if (obj_surface && obj_surface->private_data) {
1732         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1733     }
1734
1735     obj_surface = decode_state->reference_objects[1];
1736
1737     if (obj_surface && obj_surface->private_data) {
1738         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1739     }
1740
1741     BEGIN_BCS_BATCH(batch, 7);
1742     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1743
1744     if (dmv_write_buffer)
1745         OUT_BCS_RELOC(batch, dmv_write_buffer,
1746                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1747                       0);
1748     else
1749         OUT_BCS_BATCH(batch, 0);
1750
1751     OUT_BCS_BATCH(batch, 0);
1752     OUT_BCS_BATCH(batch, 0);
1753
1754     if (dmv_read_buffer)
1755         OUT_BCS_RELOC(batch, dmv_read_buffer,
1756                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1757                       0);
1758     else
1759         OUT_BCS_BATCH(batch, 0);
1760     
1761     OUT_BCS_BATCH(batch, 0);
1762     OUT_BCS_BATCH(batch, 0);
1763                   
1764     ADVANCE_BCS_BATCH(batch);
1765 }
1766
1767 static int
1768 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1769 {
1770     int out_slice_data_bit_offset;
1771     int slice_header_size = in_slice_data_bit_offset / 8;
1772     int i, j;
1773
1774     if (profile != 3)
1775         out_slice_data_bit_offset = in_slice_data_bit_offset;
1776     else {
1777         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1778             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1779                 i++, j += 2;
1780             }
1781         }
1782
1783         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1784     }
1785
1786     return out_slice_data_bit_offset;
1787 }
1788
1789 static void
1790 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1791                         VAPictureParameterBufferVC1 *pic_param,
1792                         VASliceParameterBufferVC1 *slice_param,
1793                         VASliceParameterBufferVC1 *next_slice_param,
1794                         dri_bo *slice_data_bo,
1795                         struct gen7_mfd_context *gen7_mfd_context)
1796 {
1797     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1798     int next_slice_start_vert_pos;
1799     int macroblock_offset;
1800     uint8_t *slice_data = NULL;
1801
1802     dri_bo_map(slice_data_bo, 0);
1803     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1804     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1805                                                                slice_param->macroblock_offset,
1806                                                                pic_param->sequence_fields.bits.profile);
1807     dri_bo_unmap(slice_data_bo);
1808
1809     if (next_slice_param)
1810         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1811     else
1812         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1813
1814     BEGIN_BCS_BATCH(batch, 5);
1815     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1816     OUT_BCS_BATCH(batch, 
1817                   slice_param->slice_data_size - (macroblock_offset >> 3));
1818     OUT_BCS_BATCH(batch, 
1819                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1820     OUT_BCS_BATCH(batch,
1821                   slice_param->slice_vertical_position << 16 |
1822                   next_slice_start_vert_pos << 0);
1823     OUT_BCS_BATCH(batch,
1824                   (macroblock_offset & 0x7));
1825     ADVANCE_BCS_BATCH(batch);
1826 }
1827
1828 static void
1829 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1830                             struct decode_state *decode_state,
1831                             struct gen7_mfd_context *gen7_mfd_context)
1832 {
1833     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1834     VAPictureParameterBufferVC1 *pic_param;
1835     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1836     dri_bo *slice_data_bo;
1837     int i, j;
1838
1839     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1840     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1841
1842     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1843     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1844     intel_batchbuffer_emit_mi_flush(batch);
1845     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1846     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1847     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1848     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1849     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1850     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1851     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1852
1853     for (j = 0; j < decode_state->num_slice_params; j++) {
1854         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1855         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1856         slice_data_bo = decode_state->slice_datas[j]->bo;
1857         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1858
1859         if (j == decode_state->num_slice_params - 1)
1860             next_slice_group_param = NULL;
1861         else
1862             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1863
1864         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1865             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1866
1867             if (i < decode_state->slice_params[j]->num_elements - 1)
1868                 next_slice_param = slice_param + 1;
1869             else
1870                 next_slice_param = next_slice_group_param;
1871
1872             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1873             slice_param++;
1874         }
1875     }
1876
1877     intel_batchbuffer_end_atomic(batch);
1878     intel_batchbuffer_flush(batch);
1879 }
1880
1881 static void
1882 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1883                           struct decode_state *decode_state,
1884                           struct gen7_mfd_context *gen7_mfd_context)
1885 {
1886     struct object_surface *obj_surface;
1887     VAPictureParameterBufferJPEGBaseline *pic_param;
1888     int subsampling = SUBSAMPLE_YUV420;
1889     int fourcc = VA_FOURCC('I', 'M', 'C', '3');
1890
1891     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1892
1893     if (pic_param->num_components == 1)
1894         subsampling = SUBSAMPLE_YUV400;
1895     else if (pic_param->num_components == 3) {
1896         int h1 = pic_param->components[0].h_sampling_factor;
1897         int h2 = pic_param->components[1].h_sampling_factor;
1898         int h3 = pic_param->components[2].h_sampling_factor;
1899         int v1 = pic_param->components[0].v_sampling_factor;
1900         int v2 = pic_param->components[1].v_sampling_factor;
1901         int v3 = pic_param->components[2].v_sampling_factor;
1902
1903         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1904             v1 == 2 && v2 == 1 && v3 == 1) {
1905             subsampling = SUBSAMPLE_YUV420;
1906             fourcc = VA_FOURCC('I', 'M', 'C', '3');
1907         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1908                    v1 == 1 && v2 == 1 && v3 == 1) {
1909             subsampling = SUBSAMPLE_YUV422H;
1910             fourcc = VA_FOURCC('4', '2', '2', 'H');
1911         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1912                    v1 == 1 && v2 == 1 && v3 == 1) {
1913             subsampling = SUBSAMPLE_YUV444;
1914             fourcc = VA_FOURCC('4', '4', '4', 'P');
1915         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1916                    v1 == 1 && v2 == 1 && v3 == 1) {
1917             subsampling = SUBSAMPLE_YUV411;
1918             fourcc = VA_FOURCC('4', '1', '1', 'P');
1919         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1920                    v1 == 2 && v2 == 1 && v3 == 1) {
1921             subsampling = SUBSAMPLE_YUV422V;
1922             fourcc = VA_FOURCC('4', '2', '2', 'V');
1923         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1924                    v1 == 2 && v2 == 2 && v3 == 2) {
1925             subsampling = SUBSAMPLE_YUV422H;
1926             fourcc = VA_FOURCC('4', '2', '2', 'H');
1927         } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1928                    v1 == 2 && v2 == 1 && v3 == 1) {
1929             subsampling = SUBSAMPLE_YUV422V;
1930             fourcc = VA_FOURCC('4', '2', '2', 'V');
1931         } else
1932             assert(0);
1933     }
1934     else {
1935         assert(0);
1936     }
1937
1938     /* Current decoded picture */
1939     obj_surface = decode_state->render_object;
1940     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1941
1942     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1943     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1944     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1945     gen7_mfd_context->pre_deblocking_output.valid = 1;
1946
1947     gen7_mfd_context->post_deblocking_output.bo = NULL;
1948     gen7_mfd_context->post_deblocking_output.valid = 0;
1949
1950     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1951     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1952
1953     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1954     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1955
1956     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1957     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1958
1959     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1960     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1961
1962     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1963     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1964 }
1965
1966 static const int va_to_gen7_jpeg_rotation[4] = {
1967     GEN7_JPEG_ROTATION_0,
1968     GEN7_JPEG_ROTATION_90,
1969     GEN7_JPEG_ROTATION_180,
1970     GEN7_JPEG_ROTATION_270
1971 };
1972
1973 static void
1974 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1975                         struct decode_state *decode_state,
1976                         struct gen7_mfd_context *gen7_mfd_context)
1977 {
1978     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1979     VAPictureParameterBufferJPEGBaseline *pic_param;
1980     int chroma_type = GEN7_YUV420;
1981     int frame_width_in_blks;
1982     int frame_height_in_blks;
1983
1984     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1985     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1986
1987     if (pic_param->num_components == 1)
1988         chroma_type = GEN7_YUV400;
1989     else if (pic_param->num_components == 3) {
1990         int h1 = pic_param->components[0].h_sampling_factor;
1991         int h2 = pic_param->components[1].h_sampling_factor;
1992         int h3 = pic_param->components[2].h_sampling_factor;
1993         int v1 = pic_param->components[0].v_sampling_factor;
1994         int v2 = pic_param->components[1].v_sampling_factor;
1995         int v3 = pic_param->components[2].v_sampling_factor;
1996
1997         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1998             v1 == 2 && v2 == 1 && v3 == 1)
1999             chroma_type = GEN7_YUV420;
2000         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2001                  v1 == 1 && v2 == 1 && v3 == 1)
2002             chroma_type = GEN7_YUV422H_2Y;
2003         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2004                  v1 == 1 && v2 == 1 && v3 == 1)
2005             chroma_type = GEN7_YUV444;
2006         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2007                  v1 == 1 && v2 == 1 && v3 == 1)
2008             chroma_type = GEN7_YUV411;
2009         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2010                  v1 == 2 && v2 == 1 && v3 == 1)
2011             chroma_type = GEN7_YUV422V_2Y;
2012         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2013                  v1 == 2 && v2 == 2 && v3 == 2)
2014             chroma_type = GEN7_YUV422H_4Y;
2015         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2016                  v1 == 2 && v2 == 1 && v3 == 1)
2017             chroma_type = GEN7_YUV422V_4Y;
2018         else
2019             assert(0);
2020     }
2021
2022     if (chroma_type == GEN7_YUV400 ||
2023         chroma_type == GEN7_YUV444 ||
2024         chroma_type == GEN7_YUV422V_2Y) {
2025         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2026         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2027     } else if (chroma_type == GEN7_YUV411) {
2028         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2029         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2030     } else {
2031         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2032         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2033     }
2034
2035     BEGIN_BCS_BATCH(batch, 3);
2036     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2037     OUT_BCS_BATCH(batch,
2038                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2039                   (chroma_type << 0));
2040     OUT_BCS_BATCH(batch,
2041                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2042                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2043     ADVANCE_BCS_BATCH(batch);
2044 }
2045
2046 static const int va_to_gen7_jpeg_hufftable[2] = {
2047     MFX_HUFFTABLE_ID_Y,
2048     MFX_HUFFTABLE_ID_UV
2049 };
2050
2051 static void
2052 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2053                                struct decode_state *decode_state,
2054                                struct gen7_mfd_context *gen7_mfd_context,
2055                                int num_tables)
2056 {
2057     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2058     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2059     int index;
2060
2061     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2062         return;
2063
2064     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2065
2066     for (index = 0; index < num_tables; index++) {
2067         int id = va_to_gen7_jpeg_hufftable[index];
2068         if (!huffman_table->load_huffman_table[index])
2069             continue;
2070         BEGIN_BCS_BATCH(batch, 53);
2071         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2072         OUT_BCS_BATCH(batch, id);
2073         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2074         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2075         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2076         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2077         ADVANCE_BCS_BATCH(batch);
2078     }
2079 }
2080
2081 static const int va_to_gen7_jpeg_qm[5] = {
2082     -1,
2083     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2084     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2085     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2086     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2087 };
2088
2089 static void
2090 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2091                        struct decode_state *decode_state,
2092                        struct gen7_mfd_context *gen7_mfd_context)
2093 {
2094     VAPictureParameterBufferJPEGBaseline *pic_param;
2095     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2096     int index;
2097
2098     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2099         return;
2100
2101     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2102     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2103
2104     assert(pic_param->num_components <= 3);
2105
2106     for (index = 0; index < pic_param->num_components; index++) {
2107         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2108         int qm_type;
2109         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2110         unsigned char raster_qm[64];
2111         int j;
2112
2113         if (id > 4 || id < 1)
2114             continue;
2115
2116         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2117             continue;
2118
2119         qm_type = va_to_gen7_jpeg_qm[id];
2120
2121         for (j = 0; j < 64; j++)
2122             raster_qm[zigzag_direct[j]] = qm[j];
2123
2124         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2125     }
2126 }
2127
2128 static void
2129 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2130                          VAPictureParameterBufferJPEGBaseline *pic_param,
2131                          VASliceParameterBufferJPEGBaseline *slice_param,
2132                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2133                          dri_bo *slice_data_bo,
2134                          struct gen7_mfd_context *gen7_mfd_context)
2135 {
2136     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2137     int scan_component_mask = 0;
2138     int i;
2139
2140     assert(slice_param->num_components > 0);
2141     assert(slice_param->num_components < 4);
2142     assert(slice_param->num_components <= pic_param->num_components);
2143
2144     for (i = 0; i < slice_param->num_components; i++) {
2145         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2146         case 1:
2147             scan_component_mask |= (1 << 0);
2148             break;
2149         case 2:
2150             scan_component_mask |= (1 << 1);
2151             break;
2152         case 3:
2153             scan_component_mask |= (1 << 2);
2154             break;
2155         default:
2156             assert(0);
2157             break;
2158         }
2159     }
2160
2161     BEGIN_BCS_BATCH(batch, 6);
2162     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2163     OUT_BCS_BATCH(batch, 
2164                   slice_param->slice_data_size);
2165     OUT_BCS_BATCH(batch, 
2166                   slice_param->slice_data_offset);
2167     OUT_BCS_BATCH(batch,
2168                   slice_param->slice_horizontal_position << 16 |
2169                   slice_param->slice_vertical_position << 0);
2170     OUT_BCS_BATCH(batch,
2171                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2172                   (scan_component_mask << 27) |                 /* scan components */
2173                   (0 << 26) |   /* disable interrupt allowed */
2174                   (slice_param->num_mcus << 0));                /* MCU count */
2175     OUT_BCS_BATCH(batch,
2176                   (slice_param->restart_interval << 0));    /* RestartInterval */
2177     ADVANCE_BCS_BATCH(batch);
2178 }
2179
2180 /* Workaround for JPEG decoding on Ivybridge */
2181 #ifdef JPEG_WA
2182
2183 VAStatus 
2184 i965_DestroySurfaces(VADriverContextP ctx,
2185                      VASurfaceID *surface_list,
2186                      int num_surfaces);
2187 VAStatus 
2188 i965_CreateSurfaces(VADriverContextP ctx,
2189                     int width,
2190                     int height,
2191                     int format,
2192                     int num_surfaces,
2193                     VASurfaceID *surfaces);
2194
2195 static struct {
2196     int width;
2197     int height;
2198     unsigned char data[32];
2199     int data_size;
2200     int data_bit_offset;
2201     int qp;
2202 } gen7_jpeg_wa_clip = {
2203     16,
2204     16,
2205     {
2206         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2207         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2208     },
2209     14,
2210     40,
2211     28,
2212 };
2213
2214 static void
2215 gen8_jpeg_wa_init(VADriverContextP ctx,
2216                   struct gen7_mfd_context *gen7_mfd_context)
2217 {
2218     struct i965_driver_data *i965 = i965_driver_data(ctx);
2219     VAStatus status;
2220     struct object_surface *obj_surface;
2221
2222     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2223         i965_DestroySurfaces(ctx,
2224                              &gen7_mfd_context->jpeg_wa_surface_id,
2225                              1);
2226
2227     status = i965_CreateSurfaces(ctx,
2228                                  gen7_jpeg_wa_clip.width,
2229                                  gen7_jpeg_wa_clip.height,
2230                                  VA_RT_FORMAT_YUV420,
2231                                  1,
2232                                  &gen7_mfd_context->jpeg_wa_surface_id);
2233     assert(status == VA_STATUS_SUCCESS);
2234
2235     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2236     assert(obj_surface);
2237     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
2238     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2239
2240     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2241         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2242                                                                "JPEG WA data",
2243                                                                0x1000,
2244                                                                0x1000);
2245         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2246                        0,
2247                        gen7_jpeg_wa_clip.data_size,
2248                        gen7_jpeg_wa_clip.data);
2249     }
2250 }
2251
2252 static void
2253 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2254                               struct gen7_mfd_context *gen7_mfd_context)
2255 {
2256     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2257
2258     BEGIN_BCS_BATCH(batch, 5);
2259     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2260     OUT_BCS_BATCH(batch,
2261                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2262                   (MFD_MODE_VLD << 15) | /* VLD mode */
2263                   (0 << 10) | /* disable Stream-Out */
2264                   (0 << 9)  | /* Post Deblocking Output */
2265                   (1 << 8)  | /* Pre Deblocking Output */
2266                   (0 << 5)  | /* not in stitch mode */
2267                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2268                   (MFX_FORMAT_AVC << 0));
2269     OUT_BCS_BATCH(batch,
2270                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2271                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2272                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2273                   (0 << 1)  |
2274                   (0 << 0));
2275     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2276     OUT_BCS_BATCH(batch, 0); /* reserved */
2277     ADVANCE_BCS_BATCH(batch);
2278 }
2279
2280 static void
2281 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2282                            struct gen7_mfd_context *gen7_mfd_context)
2283 {
2284     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2285     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2286
2287     BEGIN_BCS_BATCH(batch, 6);
2288     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2289     OUT_BCS_BATCH(batch, 0);
2290     OUT_BCS_BATCH(batch,
2291                   ((obj_surface->orig_width - 1) << 18) |
2292                   ((obj_surface->orig_height - 1) << 4));
2293     OUT_BCS_BATCH(batch,
2294                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2295                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2296                   (0 << 22) | /* surface object control state, ignored */
2297                   ((obj_surface->width - 1) << 3) | /* pitch */
2298                   (0 << 2)  | /* must be 0 */
2299                   (1 << 1)  | /* must be tiled */
2300                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2301     OUT_BCS_BATCH(batch,
2302                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2303                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2304     OUT_BCS_BATCH(batch,
2305                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2306                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2307     ADVANCE_BCS_BATCH(batch);
2308 }
2309
2310 static void
2311 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2312                                  struct gen7_mfd_context *gen7_mfd_context)
2313 {
2314     struct i965_driver_data *i965 = i965_driver_data(ctx);
2315     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2316     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2317     dri_bo *intra_bo;
2318     int i;
2319
2320     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2321                             "intra row store",
2322                             128 * 64,
2323                             0x1000);
2324
2325     BEGIN_BCS_BATCH(batch, 61);
2326     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2327     OUT_BCS_RELOC(batch,
2328                   obj_surface->bo,
2329                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2330                   0);
2331         OUT_BCS_BATCH(batch, 0);
2332         OUT_BCS_BATCH(batch, 0);
2333     
2334
2335     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2336         OUT_BCS_BATCH(batch, 0);
2337         OUT_BCS_BATCH(batch, 0);
2338
2339         /* uncompressed-video & stream out 7-12 */
2340     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2341     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2342         OUT_BCS_BATCH(batch, 0);
2343         OUT_BCS_BATCH(batch, 0);
2344         OUT_BCS_BATCH(batch, 0);
2345         OUT_BCS_BATCH(batch, 0);
2346
2347         /* the DW 13-15 is for intra row store scratch */
2348     OUT_BCS_RELOC(batch,
2349                   intra_bo,
2350                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2351                   0);
2352         OUT_BCS_BATCH(batch, 0);
2353         OUT_BCS_BATCH(batch, 0);
2354
2355         /* the DW 16-18 is for deblocking filter */ 
2356     OUT_BCS_BATCH(batch, 0);
2357         OUT_BCS_BATCH(batch, 0);
2358         OUT_BCS_BATCH(batch, 0);
2359
2360     /* DW 19..50 */
2361     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2362         OUT_BCS_BATCH(batch, 0);
2363         OUT_BCS_BATCH(batch, 0);
2364     }
2365     OUT_BCS_BATCH(batch, 0);
2366
2367         /* the DW52-54 is for mb status address */
2368     OUT_BCS_BATCH(batch, 0);
2369         OUT_BCS_BATCH(batch, 0);
2370         OUT_BCS_BATCH(batch, 0);
2371         /* the DW56-60 is for ILDB & second ILDB address */
2372     OUT_BCS_BATCH(batch, 0);
2373         OUT_BCS_BATCH(batch, 0);
2374         OUT_BCS_BATCH(batch, 0);
2375     OUT_BCS_BATCH(batch, 0);
2376         OUT_BCS_BATCH(batch, 0);
2377         OUT_BCS_BATCH(batch, 0);
2378
2379     ADVANCE_BCS_BATCH(batch);
2380
2381     dri_bo_unreference(intra_bo);
2382 }
2383
2384 static void
2385 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2386                                      struct gen7_mfd_context *gen7_mfd_context)
2387 {
2388     struct i965_driver_data *i965 = i965_driver_data(ctx);
2389     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2390     dri_bo *bsd_mpc_bo, *mpr_bo;
2391
2392     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2393                               "bsd mpc row store",
2394                               11520, /* 1.5 * 120 * 64 */
2395                               0x1000);
2396
2397     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2398                           "mpr row store",
2399                           7680, /* 1. 0 * 120 * 64 */
2400                           0x1000);
2401
2402     BEGIN_BCS_BATCH(batch, 10);
2403     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2404
2405     OUT_BCS_RELOC(batch,
2406                   bsd_mpc_bo,
2407                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2408                   0);
2409
2410     OUT_BCS_BATCH(batch, 0);
2411     OUT_BCS_BATCH(batch, 0);
2412
2413     OUT_BCS_RELOC(batch,
2414                   mpr_bo,
2415                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2416                   0);
2417     OUT_BCS_BATCH(batch, 0);
2418     OUT_BCS_BATCH(batch, 0);
2419
2420     OUT_BCS_BATCH(batch, 0);
2421     OUT_BCS_BATCH(batch, 0);
2422     OUT_BCS_BATCH(batch, 0);
2423
2424     ADVANCE_BCS_BATCH(batch);
2425
2426     dri_bo_unreference(bsd_mpc_bo);
2427     dri_bo_unreference(mpr_bo);
2428 }
2429
2430 static void
2431 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2432                           struct gen7_mfd_context *gen7_mfd_context)
2433 {
2434
2435 }
2436
2437 static void
2438 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2439                            struct gen7_mfd_context *gen7_mfd_context)
2440 {
2441     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2442     int img_struct = 0;
2443     int mbaff_frame_flag = 0;
2444     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2445
2446     BEGIN_BCS_BATCH(batch, 16);
2447     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2448     OUT_BCS_BATCH(batch, 
2449                   width_in_mbs * height_in_mbs);
2450     OUT_BCS_BATCH(batch, 
2451                   ((height_in_mbs - 1) << 16) | 
2452                   ((width_in_mbs - 1) << 0));
2453     OUT_BCS_BATCH(batch, 
2454                   (0 << 24) |
2455                   (0 << 16) |
2456                   (0 << 14) |
2457                   (0 << 13) |
2458                   (0 << 12) | /* differ from GEN6 */
2459                   (0 << 10) |
2460                   (img_struct << 8));
2461     OUT_BCS_BATCH(batch,
2462                   (1 << 10) | /* 4:2:0 */
2463                   (1 << 7) |  /* CABAC */
2464                   (0 << 6) |
2465                   (0 << 5) |
2466                   (0 << 4) |
2467                   (0 << 3) |
2468                   (1 << 2) |
2469                   (mbaff_frame_flag << 1) |
2470                   (0 << 0));
2471     OUT_BCS_BATCH(batch, 0);
2472     OUT_BCS_BATCH(batch, 0);
2473     OUT_BCS_BATCH(batch, 0);
2474     OUT_BCS_BATCH(batch, 0);
2475     OUT_BCS_BATCH(batch, 0);
2476     OUT_BCS_BATCH(batch, 0);
2477     OUT_BCS_BATCH(batch, 0);
2478     OUT_BCS_BATCH(batch, 0);
2479     OUT_BCS_BATCH(batch, 0);
2480     OUT_BCS_BATCH(batch, 0);
2481     OUT_BCS_BATCH(batch, 0);
2482     ADVANCE_BCS_BATCH(batch);
2483 }
2484
2485 static void
2486 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2487                                   struct gen7_mfd_context *gen7_mfd_context)
2488 {
2489     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2490     int i;
2491
2492     BEGIN_BCS_BATCH(batch, 71);
2493     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2494
2495     /* reference surfaces 0..15 */
2496     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2497         OUT_BCS_BATCH(batch, 0); /* top */
2498         OUT_BCS_BATCH(batch, 0); /* bottom */
2499     }
2500         
2501         OUT_BCS_BATCH(batch, 0);
2502
2503     /* the current decoding frame/field */
2504     OUT_BCS_BATCH(batch, 0); /* top */
2505     OUT_BCS_BATCH(batch, 0);
2506     OUT_BCS_BATCH(batch, 0);
2507
2508     /* POC List */
2509     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2510         OUT_BCS_BATCH(batch, 0);
2511         OUT_BCS_BATCH(batch, 0);
2512     }
2513
2514     OUT_BCS_BATCH(batch, 0);
2515     OUT_BCS_BATCH(batch, 0);
2516
2517     ADVANCE_BCS_BATCH(batch);
2518 }
2519
2520 static void
2521 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2522                                      struct gen7_mfd_context *gen7_mfd_context)
2523 {
2524     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2525
2526     BEGIN_BCS_BATCH(batch, 11);
2527     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2528     OUT_BCS_RELOC(batch,
2529                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2530                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2531                   0);
2532     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2533     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2534     OUT_BCS_BATCH(batch, 0);
2535     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2536     OUT_BCS_BATCH(batch, 0);
2537     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2538     OUT_BCS_BATCH(batch, 0);
2539     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2540     OUT_BCS_BATCH(batch, 0);
2541     ADVANCE_BCS_BATCH(batch);
2542 }
2543
2544 static void
2545 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2546                             struct gen7_mfd_context *gen7_mfd_context)
2547 {
2548     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2549
2550     /* the input bitsteam format on GEN7 differs from GEN6 */
2551     BEGIN_BCS_BATCH(batch, 6);
2552     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2553     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2554     OUT_BCS_BATCH(batch, 0);
2555     OUT_BCS_BATCH(batch,
2556                   (0 << 31) |
2557                   (0 << 14) |
2558                   (0 << 12) |
2559                   (0 << 10) |
2560                   (0 << 8));
2561     OUT_BCS_BATCH(batch,
2562                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2563                   (0 << 5)  |
2564                   (0 << 4)  |
2565                   (1 << 3) | /* LastSlice Flag */
2566                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2567     OUT_BCS_BATCH(batch, 0);
2568     ADVANCE_BCS_BATCH(batch);
2569 }
2570
2571 static void
2572 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2573                              struct gen7_mfd_context *gen7_mfd_context)
2574 {
2575     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2576     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2577     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2578     int first_mb_in_slice = 0;
2579     int slice_type = SLICE_TYPE_I;
2580
2581     BEGIN_BCS_BATCH(batch, 11);
2582     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2583     OUT_BCS_BATCH(batch, slice_type);
2584     OUT_BCS_BATCH(batch, 
2585                   (num_ref_idx_l1 << 24) |
2586                   (num_ref_idx_l0 << 16) |
2587                   (0 << 8) |
2588                   (0 << 0));
2589     OUT_BCS_BATCH(batch, 
2590                   (0 << 29) |
2591                   (1 << 27) |   /* disable Deblocking */
2592                   (0 << 24) |
2593                   (gen7_jpeg_wa_clip.qp << 16) |
2594                   (0 << 8) |
2595                   (0 << 0));
2596     OUT_BCS_BATCH(batch, 
2597                   (slice_ver_pos << 24) |
2598                   (slice_hor_pos << 16) | 
2599                   (first_mb_in_slice << 0));
2600     OUT_BCS_BATCH(batch,
2601                   (next_slice_ver_pos << 16) |
2602                   (next_slice_hor_pos << 0));
2603     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2604     OUT_BCS_BATCH(batch, 0);
2605     OUT_BCS_BATCH(batch, 0);
2606     OUT_BCS_BATCH(batch, 0);
2607     OUT_BCS_BATCH(batch, 0);
2608     ADVANCE_BCS_BATCH(batch);
2609 }
2610
2611 static void
2612 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2613                  struct gen7_mfd_context *gen7_mfd_context)
2614 {
2615     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2616     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2617     intel_batchbuffer_emit_mi_flush(batch);
2618     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2619     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2620     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2621     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2622     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2623     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2624     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2625
2626     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2627     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2628     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2629 }
2630
2631 #endif
2632
2633 void
2634 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2635                              struct decode_state *decode_state,
2636                              struct gen7_mfd_context *gen7_mfd_context)
2637 {
2638     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2639     VAPictureParameterBufferJPEGBaseline *pic_param;
2640     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2641     dri_bo *slice_data_bo;
2642     int i, j, max_selector = 0;
2643
2644     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2645     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2646
2647     /* Currently only support Baseline DCT */
2648     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2649     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2650 #ifdef JPEG_WA
2651     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2652 #endif
2653     intel_batchbuffer_emit_mi_flush(batch);
2654     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2655     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2656     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2657     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2658     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2659
2660     for (j = 0; j < decode_state->num_slice_params; j++) {
2661         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2662         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2663         slice_data_bo = decode_state->slice_datas[j]->bo;
2664         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2665
2666         if (j == decode_state->num_slice_params - 1)
2667             next_slice_group_param = NULL;
2668         else
2669             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2670
2671         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2672             int component;
2673
2674             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2675
2676             if (i < decode_state->slice_params[j]->num_elements - 1)
2677                 next_slice_param = slice_param + 1;
2678             else
2679                 next_slice_param = next_slice_group_param;
2680
2681             for (component = 0; component < slice_param->num_components; component++) {
2682                 if (max_selector < slice_param->components[component].dc_table_selector)
2683                     max_selector = slice_param->components[component].dc_table_selector;
2684
2685                 if (max_selector < slice_param->components[component].ac_table_selector)
2686                     max_selector = slice_param->components[component].ac_table_selector;
2687             }
2688
2689             slice_param++;
2690         }
2691     }
2692
2693     assert(max_selector < 2);
2694     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2695
2696     for (j = 0; j < decode_state->num_slice_params; j++) {
2697         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2698         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2699         slice_data_bo = decode_state->slice_datas[j]->bo;
2700         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2701
2702         if (j == decode_state->num_slice_params - 1)
2703             next_slice_group_param = NULL;
2704         else
2705             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2706
2707         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2708             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2709
2710             if (i < decode_state->slice_params[j]->num_elements - 1)
2711                 next_slice_param = slice_param + 1;
2712             else
2713                 next_slice_param = next_slice_group_param;
2714
2715             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2716             slice_param++;
2717         }
2718     }
2719
2720     intel_batchbuffer_end_atomic(batch);
2721     intel_batchbuffer_flush(batch);
2722 }
2723
2724 static const int vp8_dc_qlookup[128] =
2725 {
2726       4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2727      18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2728      29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2729      44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2730      59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2731      75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2732      91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2733     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2734 };
2735
2736 static const int vp8_ac_qlookup[128] =
2737 {
2738       4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2739      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2740      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2741      52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2742      78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2743     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2744     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2745     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2746 };
2747
2748 static inline unsigned int vp8_clip_quantization_index(int index)
2749 {
2750     if(index > 127)
2751         return 127;
2752     else if(index <0)
2753         return 0;
2754
2755     return index;
2756 }
2757
2758 static void
2759 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2760                           struct decode_state *decode_state,
2761                           struct gen7_mfd_context *gen7_mfd_context)
2762 {
2763     struct object_surface *obj_surface;
2764     struct i965_driver_data *i965 = i965_driver_data(ctx);
2765     dri_bo *bo;
2766     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2767     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2768     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2769
2770     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2771     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2772
2773     intel_update_vp8_frame_store_index(ctx,
2774                                        decode_state,
2775                                        pic_param,
2776                                        gen7_mfd_context->reference_surface);
2777
2778     /* Current decoded picture */
2779     obj_surface = decode_state->render_object;
2780     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2781
2782     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2783     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2784     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2785     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2786
2787     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2788     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2789     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2790     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2791
2792     /* The same as AVC */
2793     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2794     bo = dri_bo_alloc(i965->intel.bufmgr,
2795                       "intra row store",
2796                       width_in_mbs * 64,
2797                       0x1000);
2798     assert(bo);
2799     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2800     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2801
2802     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2803     bo = dri_bo_alloc(i965->intel.bufmgr,
2804                       "deblocking filter row store",
2805                       width_in_mbs * 64 * 4,
2806                       0x1000);
2807     assert(bo);
2808     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2809     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2810
2811     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2812     bo = dri_bo_alloc(i965->intel.bufmgr,
2813                       "bsd mpc row store",
2814                       width_in_mbs * 64 * 2,
2815                       0x1000);
2816     assert(bo);
2817     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2818     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2819
2820     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2821     bo = dri_bo_alloc(i965->intel.bufmgr,
2822                       "mpr row store",
2823                       width_in_mbs * 64 * 2,
2824                       0x1000);
2825     assert(bo);
2826     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2827     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2828
2829     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2830 }
2831
2832 static void
2833 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2834                        struct decode_state *decode_state,
2835                        struct gen7_mfd_context *gen7_mfd_context)
2836 {
2837     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2838     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2839     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2840     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2841     dri_bo *probs_bo = decode_state->probability_data->bo;
2842     int i, j,log2num;
2843     unsigned int quantization_value[4][6];
2844
2845     log2num = (int)log2(slice_param->num_of_partitions - 1);
2846
2847     BEGIN_BCS_BATCH(batch, 38);
2848     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2849     OUT_BCS_BATCH(batch,
2850                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2851                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2852     OUT_BCS_BATCH(batch,
2853                   log2num << 24 |
2854                   pic_param->pic_fields.bits.sharpness_level << 16 |
2855                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2856                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2857                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2858                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2859                   pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2860                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2861                   0 << 7 | /* segmentation id streamin disabled */
2862                   0 << 6 | /* segmentation id streamout disabled */
2863                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2864                   pic_param->pic_fields.bits.filter_type << 4 |
2865                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2866                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2867
2868     OUT_BCS_BATCH(batch,
2869                   pic_param->loop_filter_level[3] << 24 |
2870                   pic_param->loop_filter_level[2] << 16 |
2871                   pic_param->loop_filter_level[1] <<  8 |
2872                   pic_param->loop_filter_level[0] <<  0);
2873
2874     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2875     for (i = 0; i < 4; i++) {
2876                 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2877                 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2878                 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2879                 /* 101581>>16 is equivalent to 155/100 */
2880                 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2881                 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2882                 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2883
2884                 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2885                 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2886
2887                 OUT_BCS_BATCH(batch,
2888                       quantization_value[i][0] << 16 | /* Y1AC */
2889                       quantization_value[i][1] <<  0); /* Y1DC */
2890         OUT_BCS_BATCH(batch,
2891                       quantization_value[i][5] << 16 | /* UVAC */
2892                       quantization_value[i][4] <<  0); /* UVDC */
2893         OUT_BCS_BATCH(batch,
2894                       quantization_value[i][3] << 16 | /* Y2AC */
2895                       quantization_value[i][2] <<  0); /* Y2DC */
2896     }
2897
2898     /* CoeffProbability table for non-key frame, DW16-DW18 */
2899     if (probs_bo) {
2900         OUT_BCS_RELOC(batch, probs_bo,
2901                       0, I915_GEM_DOMAIN_INSTRUCTION,
2902                       0);
2903         OUT_BCS_BATCH(batch, 0);
2904         OUT_BCS_BATCH(batch, 0);
2905     } else {
2906         OUT_BCS_BATCH(batch, 0);
2907         OUT_BCS_BATCH(batch, 0);
2908         OUT_BCS_BATCH(batch, 0);
2909     }
2910
2911     OUT_BCS_BATCH(batch,
2912                   pic_param->mb_segment_tree_probs[2] << 16 |
2913                   pic_param->mb_segment_tree_probs[1] <<  8 |
2914                   pic_param->mb_segment_tree_probs[0] <<  0);
2915
2916     OUT_BCS_BATCH(batch,
2917                   pic_param->prob_skip_false << 24 |
2918                   pic_param->prob_intra      << 16 |
2919                   pic_param->prob_last       <<  8 |
2920                   pic_param->prob_gf         <<  0);
2921
2922     OUT_BCS_BATCH(batch,
2923                   pic_param->y_mode_probs[3] << 24 |
2924                   pic_param->y_mode_probs[2] << 16 |
2925                   pic_param->y_mode_probs[1] <<  8 |
2926                   pic_param->y_mode_probs[0] <<  0);
2927
2928     OUT_BCS_BATCH(batch,
2929                   pic_param->uv_mode_probs[2] << 16 |
2930                   pic_param->uv_mode_probs[1] <<  8 |
2931                   pic_param->uv_mode_probs[0] <<  0);
2932     
2933     /* MV update value, DW23-DW32 */
2934     for (i = 0; i < 2; i++) {
2935         for (j = 0; j < 20; j += 4) {
2936             OUT_BCS_BATCH(batch,
2937                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2938                           pic_param->mv_probs[i][j + 2] << 16 |
2939                           pic_param->mv_probs[i][j + 1] <<  8 |
2940                           pic_param->mv_probs[i][j + 0] <<  0);
2941         }
2942     }
2943
2944     OUT_BCS_BATCH(batch,
2945                   pic_param->loop_filter_deltas_ref_frame[3] << 24 |
2946                   pic_param->loop_filter_deltas_ref_frame[2] << 16 |
2947                   pic_param->loop_filter_deltas_ref_frame[1] <<  8 |
2948                   pic_param->loop_filter_deltas_ref_frame[0] <<  0);
2949
2950     OUT_BCS_BATCH(batch,
2951                   pic_param->loop_filter_deltas_mode[3] << 24 |
2952                   pic_param->loop_filter_deltas_mode[2] << 16 |
2953                   pic_param->loop_filter_deltas_mode[1] <<  8 |
2954                   pic_param->loop_filter_deltas_mode[0] <<  0);
2955
2956     /* segmentation id stream base address, DW35-DW37 */
2957     OUT_BCS_BATCH(batch, 0);
2958     OUT_BCS_BATCH(batch, 0);
2959     OUT_BCS_BATCH(batch, 0);
2960     ADVANCE_BCS_BATCH(batch);
2961 }
2962
2963 static void
2964 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2965                         VAPictureParameterBufferVP8 *pic_param,
2966                         VASliceParameterBufferVP8 *slice_param,
2967                         dri_bo *slice_data_bo,
2968                         struct gen7_mfd_context *gen7_mfd_context)
2969 {
2970     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2971     int i, log2num;
2972     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2973     unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2974     unsigned int partition_size_0 = slice_param->partition_size[0];
2975
2976     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2977     if (used_bits == 8) {
2978         used_bits = 0;
2979         offset += 1;
2980         partition_size_0 -= 1;
2981     }
2982
2983     assert(slice_param->num_of_partitions >= 2);
2984     assert(slice_param->num_of_partitions <= 9);
2985
2986     log2num = (int)log2(slice_param->num_of_partitions - 1);
2987
2988     BEGIN_BCS_BATCH(batch, 22);
2989     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2990     OUT_BCS_BATCH(batch,
2991                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2992                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
2993                   log2num << 4 |
2994                   (slice_param->macroblock_offset & 0x7));
2995     OUT_BCS_BATCH(batch,
2996                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2997                   0);
2998
2999     OUT_BCS_BATCH(batch, partition_size_0);
3000     OUT_BCS_BATCH(batch, offset);
3001     //partion sizes in bytes are present after the above first partition when there are more than one token partition
3002     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
3003     for (i = 1; i < 9; i++) {
3004         if (i < slice_param->num_of_partitions) {
3005             OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
3006             OUT_BCS_BATCH(batch, offset);
3007         } else {
3008             OUT_BCS_BATCH(batch, 0);
3009             OUT_BCS_BATCH(batch, 0);
3010         }
3011
3012         offset += slice_param->partition_size[i];
3013     }
3014
3015     OUT_BCS_BATCH(batch,
3016                   1 << 31 | /* concealment method */
3017                   0);
3018
3019     ADVANCE_BCS_BATCH(batch);
3020 }
3021
3022 void
3023 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3024                             struct decode_state *decode_state,
3025                             struct gen7_mfd_context *gen7_mfd_context)
3026 {
3027     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3028     VAPictureParameterBufferVP8 *pic_param;
3029     VASliceParameterBufferVP8 *slice_param;
3030     dri_bo *slice_data_bo;
3031
3032     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3033     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3034
3035     /* one slice per frame */
3036     assert(decode_state->num_slice_params == 1);
3037     assert(decode_state->slice_params[0]->num_elements == 1);
3038     assert(decode_state->slice_params && decode_state->slice_params[0]->buffer);
3039     assert(decode_state->slice_datas[0]->bo);
3040
3041     assert(decode_state->probability_data);
3042
3043     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3044     slice_data_bo = decode_state->slice_datas[0]->bo;
3045
3046     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3047     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3048     intel_batchbuffer_emit_mi_flush(batch);
3049     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3050     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3051     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3052     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3053     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3054     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3055     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3056     intel_batchbuffer_end_atomic(batch);
3057     intel_batchbuffer_flush(batch);
3058 }
3059
3060 static VAStatus
3061 gen8_mfd_decode_picture(VADriverContextP ctx, 
3062                         VAProfile profile, 
3063                         union codec_state *codec_state,
3064                         struct hw_context *hw_context)
3065
3066 {
3067     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3068     struct decode_state *decode_state = &codec_state->decode;
3069     VAStatus vaStatus;
3070
3071     assert(gen7_mfd_context);
3072
3073     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3074
3075     if (vaStatus != VA_STATUS_SUCCESS)
3076         goto out;
3077
3078     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3079
3080     switch (profile) {
3081     case VAProfileMPEG2Simple:
3082     case VAProfileMPEG2Main:
3083         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3084         break;
3085         
3086     case VAProfileH264ConstrainedBaseline:
3087     case VAProfileH264Main:
3088     case VAProfileH264High:
3089         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3090         break;
3091
3092     case VAProfileVC1Simple:
3093     case VAProfileVC1Main:
3094     case VAProfileVC1Advanced:
3095         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3096         break;
3097
3098     case VAProfileJPEGBaseline:
3099         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3100         break;
3101
3102     case VAProfileVP8Version0_3:
3103         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3104         break;
3105
3106     default:
3107         assert(0);
3108         break;
3109     }
3110
3111     vaStatus = VA_STATUS_SUCCESS;
3112
3113 out:
3114     return vaStatus;
3115 }
3116
3117 static void
3118 gen8_mfd_context_destroy(void *hw_context)
3119 {
3120     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3121
3122     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3123     gen7_mfd_context->post_deblocking_output.bo = NULL;
3124
3125     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3126     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3127
3128     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3129     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3130
3131     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3132     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3133
3134     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3135     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3136
3137     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3138     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3139
3140     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3141     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3142
3143     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3144
3145     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3146     free(gen7_mfd_context);
3147 }
3148
3149 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3150                                     struct gen7_mfd_context *gen7_mfd_context)
3151 {
3152     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3153     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3154     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3155     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3156 }
3157
3158 struct hw_context *
3159 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3160 {
3161     struct intel_driver_data *intel = intel_driver_data(ctx);
3162     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3163     int i;
3164
3165     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3166     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3167     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3168
3169     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3170         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3171         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3172     }
3173
3174     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3175
3176     switch (obj_config->profile) {
3177     case VAProfileMPEG2Simple:
3178     case VAProfileMPEG2Main:
3179         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3180         break;
3181
3182     case VAProfileH264ConstrainedBaseline:
3183     case VAProfileH264Main:
3184     case VAProfileH264High:
3185         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3186         break;
3187     default:
3188         break;
3189     }
3190     return (struct hw_context *)gen7_mfd_context;
3191 }