Enabling SNB avc encoding configuration
[platform/upstream/libva-intel-driver.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx, 
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77         gen7_avc_surface->frame_store_id = -1;
78         assert((obj_surface->size & 0x3f) == 0);
79         obj_surface->private_data = gen7_avc_surface;
80     }
81
82     /* DMV buffers now relate to the whole frame, irrespective of
83        field coding modes */
84     if (gen7_avc_surface->dmv_top == NULL) {
85         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86                                                  "direct mv w/r buffer",
87                                                  width_in_mbs * height_in_mbs * 128,
88                                                  0x1000);
89         assert(gen7_avc_surface->dmv_top);
90     }
91 }
92
93 static void
94 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
95                           struct decode_state *decode_state,
96                           int standard_select,
97                           struct gen7_mfd_context *gen7_mfd_context)
98 {
99     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
100
101     assert(standard_select == MFX_FORMAT_MPEG2 ||
102            standard_select == MFX_FORMAT_AVC ||
103            standard_select == MFX_FORMAT_VC1 ||
104            standard_select == MFX_FORMAT_JPEG ||
105            standard_select == MFX_FORMAT_VP8);
106
107     BEGIN_BCS_BATCH(batch, 5);
108     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
109     OUT_BCS_BATCH(batch,
110                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
111                   (MFD_MODE_VLD << 15) | /* VLD mode */
112                   (0 << 10) | /* disable Stream-Out */
113                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
114                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
115                   (0 << 5)  | /* not in stitch mode */
116                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
117                   (standard_select << 0));
118     OUT_BCS_BATCH(batch,
119                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
120                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
121                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
122                   (0 << 1)  |
123                   (0 << 0));
124     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
125     OUT_BCS_BATCH(batch, 0); /* reserved */
126     ADVANCE_BCS_BATCH(batch);
127 }
128
129 static void
130 gen8_mfd_surface_state(VADriverContextP ctx,
131                        struct decode_state *decode_state,
132                        int standard_select,
133                        struct gen7_mfd_context *gen7_mfd_context)
134 {
135     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
136     struct object_surface *obj_surface = decode_state->render_object;
137     unsigned int y_cb_offset;
138     unsigned int y_cr_offset;
139     unsigned int surface_format;
140
141     assert(obj_surface);
142
143     y_cb_offset = obj_surface->y_cb_offset;
144     y_cr_offset = obj_surface->y_cr_offset;
145
146     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
147         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
148
149     BEGIN_BCS_BATCH(batch, 6);
150     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
151     OUT_BCS_BATCH(batch, 0);
152     OUT_BCS_BATCH(batch,
153                   ((obj_surface->orig_height - 1) << 18) |
154                   ((obj_surface->orig_width - 1) << 4));
155     OUT_BCS_BATCH(batch,
156                   (surface_format << 28) | /* 420 planar YUV surface */
157                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
158                   (0 << 22) | /* surface object control state, ignored */
159                   ((obj_surface->width - 1) << 3) | /* pitch */
160                   (0 << 2)  | /* must be 0 */
161                   (1 << 1)  | /* must be tiled */
162                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
163     OUT_BCS_BATCH(batch,
164                   (0 << 16) | /* X offset for U(Cb), must be 0 */
165                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
166     OUT_BCS_BATCH(batch,
167                   (0 << 16) | /* X offset for V(Cr), must be 0 */
168                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
169     ADVANCE_BCS_BATCH(batch);
170 }
171
172 static void
173 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
174                              struct decode_state *decode_state,
175                              int standard_select,
176                              struct gen7_mfd_context *gen7_mfd_context)
177 {
178     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
179     int i;
180
181     BEGIN_BCS_BATCH(batch, 61);
182     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
183         /* Pre-deblock 1-3 */
184     if (gen7_mfd_context->pre_deblocking_output.valid)
185         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
186                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
187                       0);
188     else
189         OUT_BCS_BATCH(batch, 0);
190
191         OUT_BCS_BATCH(batch, 0);
192         OUT_BCS_BATCH(batch, 0);
193         /* Post-debloing 4-6 */
194     if (gen7_mfd_context->post_deblocking_output.valid)
195         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
196                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
197                       0);
198     else
199         OUT_BCS_BATCH(batch, 0);
200
201         OUT_BCS_BATCH(batch, 0);
202         OUT_BCS_BATCH(batch, 0);
203
204         /* uncompressed-video & stream out 7-12 */
205     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
206     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
207         OUT_BCS_BATCH(batch, 0);
208         OUT_BCS_BATCH(batch, 0);
209         OUT_BCS_BATCH(batch, 0);
210         OUT_BCS_BATCH(batch, 0);
211
212         /* intra row-store scratch 13-15 */
213     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
214         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
215                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
216                       0);
217     else
218         OUT_BCS_BATCH(batch, 0);
219
220         OUT_BCS_BATCH(batch, 0);
221         OUT_BCS_BATCH(batch, 0);
222         /* deblocking-filter-row-store 16-18 */
223     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
224         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
225                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
226                       0);
227     else
228         OUT_BCS_BATCH(batch, 0);
229         OUT_BCS_BATCH(batch, 0);
230         OUT_BCS_BATCH(batch, 0);
231
232     /* DW 19..50 */
233     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
234         struct object_surface *obj_surface;
235
236         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
237             gen7_mfd_context->reference_surface[i].obj_surface &&
238             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
239             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
240
241             OUT_BCS_RELOC(batch, obj_surface->bo,
242                           I915_GEM_DOMAIN_INSTRUCTION, 0,
243                           0);
244         } else {
245             OUT_BCS_BATCH(batch, 0);
246         }
247         
248         OUT_BCS_BATCH(batch, 0);
249     }
250     
251     /* reference property 51 */
252     OUT_BCS_BATCH(batch, 0);  
253         
254     /* Macroblock status & ILDB 52-57 */
255     OUT_BCS_BATCH(batch, 0);
256     OUT_BCS_BATCH(batch, 0);
257     OUT_BCS_BATCH(batch, 0);
258     OUT_BCS_BATCH(batch, 0);
259     OUT_BCS_BATCH(batch, 0);
260     OUT_BCS_BATCH(batch, 0);
261
262     /* the second Macroblock status 58-60 */    
263     OUT_BCS_BATCH(batch, 0);
264     OUT_BCS_BATCH(batch, 0);
265     OUT_BCS_BATCH(batch, 0);
266
267     ADVANCE_BCS_BATCH(batch);
268 }
269
270 static void
271 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
272                                  dri_bo *slice_data_bo,
273                                  int standard_select,
274                                  struct gen7_mfd_context *gen7_mfd_context)
275 {
276     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
277
278     BEGIN_BCS_BATCH(batch, 26);
279     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
280         /* MFX In BS 1-5 */
281     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
282     OUT_BCS_BATCH(batch, 0);
283     OUT_BCS_BATCH(batch, 0);
284         /* Upper bound 4-5 */   
285     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
286     OUT_BCS_BATCH(batch, 0);
287
288         /* MFX indirect MV 6-10 */
289     OUT_BCS_BATCH(batch, 0);
290     OUT_BCS_BATCH(batch, 0);
291     OUT_BCS_BATCH(batch, 0);
292     OUT_BCS_BATCH(batch, 0);
293     OUT_BCS_BATCH(batch, 0);
294         
295         /* MFX IT_COFF 11-15 */
296     OUT_BCS_BATCH(batch, 0);
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299     OUT_BCS_BATCH(batch, 0);
300     OUT_BCS_BATCH(batch, 0);
301
302         /* MFX IT_DBLK 16-20 */
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306     OUT_BCS_BATCH(batch, 0);
307     OUT_BCS_BATCH(batch, 0);
308
309         /* MFX PAK_BSE object for encoder 21-25 */
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313     OUT_BCS_BATCH(batch, 0);
314     OUT_BCS_BATCH(batch, 0);
315
316     ADVANCE_BCS_BATCH(batch);
317 }
318
319 static void
320 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
321                                  struct decode_state *decode_state,
322                                  int standard_select,
323                                  struct gen7_mfd_context *gen7_mfd_context)
324 {
325     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
326
327     BEGIN_BCS_BATCH(batch, 10);
328     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
329
330     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
331         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
332                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
333                       0);
334         else
335                 OUT_BCS_BATCH(batch, 0);
336                 
337     OUT_BCS_BATCH(batch, 0);
338     OUT_BCS_BATCH(batch, 0);
339         /* MPR Row Store Scratch buffer 4-6 */
340     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
341         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
342                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
343                       0);
344     else
345         OUT_BCS_BATCH(batch, 0);
346
347     OUT_BCS_BATCH(batch, 0);
348     OUT_BCS_BATCH(batch, 0);
349
350         /* Bitplane 7-9 */ 
351     if (gen7_mfd_context->bitplane_read_buffer.valid)
352         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
353                       I915_GEM_DOMAIN_INSTRUCTION, 0,
354                       0);
355     else
356         OUT_BCS_BATCH(batch, 0);
357     OUT_BCS_BATCH(batch, 0);
358     OUT_BCS_BATCH(batch, 0);
359     ADVANCE_BCS_BATCH(batch);
360 }
361
362 static void
363 gen8_mfd_qm_state(VADriverContextP ctx,
364                   int qm_type,
365                   unsigned char *qm,
366                   int qm_length,
367                   struct gen7_mfd_context *gen7_mfd_context)
368 {
369     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
370     unsigned int qm_buffer[16];
371
372     assert(qm_length <= 16 * 4);
373     memcpy(qm_buffer, qm, qm_length);
374
375     BEGIN_BCS_BATCH(batch, 18);
376     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
377     OUT_BCS_BATCH(batch, qm_type << 0);
378     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
379     ADVANCE_BCS_BATCH(batch);
380 }
381
382 static void
383 gen8_mfd_avc_img_state(VADriverContextP ctx,
384                        struct decode_state *decode_state,
385                        struct gen7_mfd_context *gen7_mfd_context)
386 {
387     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
388     int img_struct;
389     int mbaff_frame_flag;
390     unsigned int width_in_mbs, height_in_mbs;
391     VAPictureParameterBufferH264 *pic_param;
392
393     assert(decode_state->pic_param && decode_state->pic_param->buffer);
394     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
395     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
396
397     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
398         img_struct = 1;
399     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
400         img_struct = 3;
401     else
402         img_struct = 0;
403
404     if ((img_struct & 0x1) == 0x1) {
405         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
406     } else {
407         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
408     }
409
410     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
411         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
412         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
413     } else {
414         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
415     }
416
417     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
418                         !pic_param->pic_fields.bits.field_pic_flag);
419
420     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
421     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
422
423     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
424     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
425            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
426     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
427
428     BEGIN_BCS_BATCH(batch, 17);
429     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
430     OUT_BCS_BATCH(batch, 
431                   (width_in_mbs * height_in_mbs - 1));
432     OUT_BCS_BATCH(batch, 
433                   ((height_in_mbs - 1) << 16) | 
434                   ((width_in_mbs - 1) << 0));
435     OUT_BCS_BATCH(batch, 
436                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
437                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
438                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
439                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
440                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
441                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
442                   (img_struct << 8));
443     OUT_BCS_BATCH(batch,
444                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
445                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
446                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
447                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
448                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
449                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
450                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
451                   (mbaff_frame_flag << 1) |
452                   (pic_param->pic_fields.bits.field_pic_flag << 0));
453     OUT_BCS_BATCH(batch, 0);
454     OUT_BCS_BATCH(batch, 0);
455     OUT_BCS_BATCH(batch, 0);
456     OUT_BCS_BATCH(batch, 0);
457     OUT_BCS_BATCH(batch, 0);
458     OUT_BCS_BATCH(batch, 0);
459     OUT_BCS_BATCH(batch, 0);
460     OUT_BCS_BATCH(batch, 0);
461     OUT_BCS_BATCH(batch, 0);
462     OUT_BCS_BATCH(batch, 0);
463     OUT_BCS_BATCH(batch, 0);
464     OUT_BCS_BATCH(batch, 0);
465     ADVANCE_BCS_BATCH(batch);
466 }
467
468 static void
469 gen8_mfd_avc_qm_state(VADriverContextP ctx,
470                       struct decode_state *decode_state,
471                       struct gen7_mfd_context *gen7_mfd_context)
472 {
473     VAIQMatrixBufferH264 *iq_matrix;
474     VAPictureParameterBufferH264 *pic_param;
475
476     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
477         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
478     else
479         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
480
481     assert(decode_state->pic_param && decode_state->pic_param->buffer);
482     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
483
484     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
485     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
486
487     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
488         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
489         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
490     }
491 }
492
493 static inline void
494 gen8_mfd_avc_picid_state(VADriverContextP ctx,
495     struct decode_state *decode_state,
496     struct gen7_mfd_context *gen7_mfd_context)
497 {
498     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
499         gen7_mfd_context->reference_surface);
500 }
501
502 static void
503 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
504                               struct decode_state *decode_state,
505                               VAPictureParameterBufferH264 *pic_param,
506                               VASliceParameterBufferH264 *slice_param,
507                               struct gen7_mfd_context *gen7_mfd_context)
508 {
509     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
510     struct object_surface *obj_surface;
511     GenAvcSurface *gen7_avc_surface;
512     VAPictureH264 *va_pic;
513     int i;
514
515     BEGIN_BCS_BATCH(batch, 71);
516     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
517
518     /* reference surfaces 0..15 */
519     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
520         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
521             gen7_mfd_context->reference_surface[i].obj_surface &&
522             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
523
524             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
525             gen7_avc_surface = obj_surface->private_data;
526
527             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
528                           I915_GEM_DOMAIN_INSTRUCTION, 0,
529                           0);
530             OUT_BCS_BATCH(batch, 0);
531         } else {
532             OUT_BCS_BATCH(batch, 0);
533             OUT_BCS_BATCH(batch, 0);
534         }
535     }
536     
537     OUT_BCS_BATCH(batch, 0);
538
539     /* the current decoding frame/field */
540     va_pic = &pic_param->CurrPic;
541     obj_surface = decode_state->render_object;
542     assert(obj_surface->bo && obj_surface->private_data);
543     gen7_avc_surface = obj_surface->private_data;
544
545     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
546                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
547                   0);
548
549     OUT_BCS_BATCH(batch, 0);
550     OUT_BCS_BATCH(batch, 0);
551
552     /* POC List */
553     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
554         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
555
556         if (obj_surface) {
557             const VAPictureH264 * const va_pic = avc_find_picture(
558                 obj_surface->base.id, pic_param->ReferenceFrames,
559                 ARRAY_ELEMS(pic_param->ReferenceFrames));
560
561             assert(va_pic != NULL);
562             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
563             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
564         } else {
565             OUT_BCS_BATCH(batch, 0);
566             OUT_BCS_BATCH(batch, 0);
567         }
568     }
569
570     va_pic = &pic_param->CurrPic;
571     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
572     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
573
574     ADVANCE_BCS_BATCH(batch);
575 }
576
577 static void
578 gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
579                                  VAPictureParameterBufferH264 *pic_param,
580                                  VASliceParameterBufferH264 *next_slice_param,
581                                  struct gen7_mfd_context *gen7_mfd_context)
582 {
583     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
584 }
585
586 static void
587 gen8_mfd_avc_slice_state(VADriverContextP ctx,
588                          VAPictureParameterBufferH264 *pic_param,
589                          VASliceParameterBufferH264 *slice_param,
590                          VASliceParameterBufferH264 *next_slice_param,
591                          struct gen7_mfd_context *gen7_mfd_context)
592 {
593     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
594     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
595     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
596     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
597     int num_ref_idx_l0, num_ref_idx_l1;
598     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
599                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
600     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
601     int slice_type;
602
603     if (slice_param->slice_type == SLICE_TYPE_I ||
604         slice_param->slice_type == SLICE_TYPE_SI) {
605         slice_type = SLICE_TYPE_I;
606     } else if (slice_param->slice_type == SLICE_TYPE_P ||
607                slice_param->slice_type == SLICE_TYPE_SP) {
608         slice_type = SLICE_TYPE_P;
609     } else { 
610         assert(slice_param->slice_type == SLICE_TYPE_B);
611         slice_type = SLICE_TYPE_B;
612     }
613
614     if (slice_type == SLICE_TYPE_I) {
615         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
616         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
617         num_ref_idx_l0 = 0;
618         num_ref_idx_l1 = 0;
619     } else if (slice_type == SLICE_TYPE_P) {
620         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
621         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
622         num_ref_idx_l1 = 0;
623     } else {
624         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
625         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
626     }
627
628     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
629     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
630     slice_ver_pos = first_mb_in_slice / width_in_mbs;
631
632     if (next_slice_param) {
633         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
634         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
635         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
636     } else {
637         next_slice_hor_pos = 0;
638         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
639     }
640
641     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
642     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
643     OUT_BCS_BATCH(batch, slice_type);
644     OUT_BCS_BATCH(batch, 
645                   (num_ref_idx_l1 << 24) |
646                   (num_ref_idx_l0 << 16) |
647                   (slice_param->chroma_log2_weight_denom << 8) |
648                   (slice_param->luma_log2_weight_denom << 0));
649     OUT_BCS_BATCH(batch, 
650                   (slice_param->direct_spatial_mv_pred_flag << 29) |
651                   (slice_param->disable_deblocking_filter_idc << 27) |
652                   (slice_param->cabac_init_idc << 24) |
653                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
654                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
655                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
656     OUT_BCS_BATCH(batch, 
657                   (slice_ver_pos << 24) |
658                   (slice_hor_pos << 16) | 
659                   (first_mb_in_slice << 0));
660     OUT_BCS_BATCH(batch,
661                   (next_slice_ver_pos << 16) |
662                   (next_slice_hor_pos << 0));
663     OUT_BCS_BATCH(batch, 
664                   (next_slice_param == NULL) << 19); /* last slice flag */
665     OUT_BCS_BATCH(batch, 0);
666     OUT_BCS_BATCH(batch, 0);
667     OUT_BCS_BATCH(batch, 0);
668     OUT_BCS_BATCH(batch, 0);
669     ADVANCE_BCS_BATCH(batch);
670 }
671
672 static inline void
673 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
674                            VAPictureParameterBufferH264 *pic_param,
675                            VASliceParameterBufferH264 *slice_param,
676                            struct gen7_mfd_context *gen7_mfd_context)
677 {
678     gen6_send_avc_ref_idx_state(
679         gen7_mfd_context->base.batch,
680         slice_param,
681         gen7_mfd_context->reference_surface
682     );
683 }
684
685 static void
686 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
687                                 VAPictureParameterBufferH264 *pic_param,
688                                 VASliceParameterBufferH264 *slice_param,
689                                 struct gen7_mfd_context *gen7_mfd_context)
690 {
691     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
692     int i, j, num_weight_offset_table = 0;
693     short weightoffsets[32 * 6];
694
695     if ((slice_param->slice_type == SLICE_TYPE_P ||
696          slice_param->slice_type == SLICE_TYPE_SP) &&
697         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
698         num_weight_offset_table = 1;
699     }
700     
701     if ((slice_param->slice_type == SLICE_TYPE_B) &&
702         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
703         num_weight_offset_table = 2;
704     }
705
706     for (i = 0; i < num_weight_offset_table; i++) {
707         BEGIN_BCS_BATCH(batch, 98);
708         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
709         OUT_BCS_BATCH(batch, i);
710
711         if (i == 0) {
712             for (j = 0; j < 32; j++) {
713                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
714                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
715                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
716                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
717                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
718                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
719             }
720         } else {
721             for (j = 0; j < 32; j++) {
722                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
723                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
724                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
725                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
726                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
727                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
728             }
729         }
730
731         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
732         ADVANCE_BCS_BATCH(batch);
733     }
734 }
735
736 static void
737 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
738                         VAPictureParameterBufferH264 *pic_param,
739                         VASliceParameterBufferH264 *slice_param,
740                         dri_bo *slice_data_bo,
741                         VASliceParameterBufferH264 *next_slice_param,
742                         struct gen7_mfd_context *gen7_mfd_context)
743 {
744     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
745     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
746                                                             slice_param,
747                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
748
749     /* the input bitsteam format on GEN7 differs from GEN6 */
750     BEGIN_BCS_BATCH(batch, 6);
751     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
752     OUT_BCS_BATCH(batch, 
753                   (slice_param->slice_data_size));
754     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
755     OUT_BCS_BATCH(batch,
756                   (0 << 31) |
757                   (0 << 14) |
758                   (0 << 12) |
759                   (0 << 10) |
760                   (0 << 8));
761     OUT_BCS_BATCH(batch,
762                   ((slice_data_bit_offset >> 3) << 16) |
763                   (1 << 7)  |
764                   (0 << 5)  |
765                   (0 << 4)  |
766                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
767                   (slice_data_bit_offset & 0x7));
768     OUT_BCS_BATCH(batch, 0);
769     ADVANCE_BCS_BATCH(batch);
770 }
771
772 static inline void
773 gen8_mfd_avc_context_init(
774     VADriverContextP         ctx,
775     struct gen7_mfd_context *gen7_mfd_context
776 )
777 {
778     /* Initialize flat scaling lists */
779     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
780 }
781
782 static void
783 gen8_mfd_avc_decode_init(VADriverContextP ctx,
784                          struct decode_state *decode_state,
785                          struct gen7_mfd_context *gen7_mfd_context)
786 {
787     VAPictureParameterBufferH264 *pic_param;
788     VASliceParameterBufferH264 *slice_param;
789     struct i965_driver_data *i965 = i965_driver_data(ctx);
790     struct object_surface *obj_surface;
791     dri_bo *bo;
792     int i, j, enable_avc_ildb = 0;
793     unsigned int width_in_mbs, height_in_mbs;
794
795     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
796         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
797         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
798
799         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
800             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
801             assert((slice_param->slice_type == SLICE_TYPE_I) ||
802                    (slice_param->slice_type == SLICE_TYPE_SI) ||
803                    (slice_param->slice_type == SLICE_TYPE_P) ||
804                    (slice_param->slice_type == SLICE_TYPE_SP) ||
805                    (slice_param->slice_type == SLICE_TYPE_B));
806
807             if (slice_param->disable_deblocking_filter_idc != 1) {
808                 enable_avc_ildb = 1;
809                 break;
810             }
811
812             slice_param++;
813         }
814     }
815
816     assert(decode_state->pic_param && decode_state->pic_param->buffer);
817     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
818     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
819         gen7_mfd_context->reference_surface);
820     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
821     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
822     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
823     assert(height_in_mbs > 0 && height_in_mbs <= 256);
824
825     /* Current decoded picture */
826     obj_surface = decode_state->render_object;
827     if (pic_param->pic_fields.bits.reference_pic_flag)
828         obj_surface->flags |= SURFACE_REFERENCED;
829     else
830         obj_surface->flags &= ~SURFACE_REFERENCED;
831
832     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
833     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
834
835     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
836     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
837     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
838     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
839
840     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
841     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
842     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
843     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
844
845     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
846     bo = dri_bo_alloc(i965->intel.bufmgr,
847                       "intra row store",
848                       width_in_mbs * 64,
849                       0x1000);
850     assert(bo);
851     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
852     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
853
854     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
855     bo = dri_bo_alloc(i965->intel.bufmgr,
856                       "deblocking filter row store",
857                       width_in_mbs * 64 * 4,
858                       0x1000);
859     assert(bo);
860     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
861     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
862
863     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
864     bo = dri_bo_alloc(i965->intel.bufmgr,
865                       "bsd mpc row store",
866                       width_in_mbs * 64 * 2,
867                       0x1000);
868     assert(bo);
869     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
870     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
871
872     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
873     bo = dri_bo_alloc(i965->intel.bufmgr,
874                       "mpr row store",
875                       width_in_mbs * 64 * 2,
876                       0x1000);
877     assert(bo);
878     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
879     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
880
881     gen7_mfd_context->bitplane_read_buffer.valid = 0;
882 }
883
884 static void
885 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
886                             struct decode_state *decode_state,
887                             struct gen7_mfd_context *gen7_mfd_context)
888 {
889     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
890     VAPictureParameterBufferH264 *pic_param;
891     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
892     dri_bo *slice_data_bo;
893     int i, j;
894
895     assert(decode_state->pic_param && decode_state->pic_param->buffer);
896     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
897     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
898
899     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
900     intel_batchbuffer_emit_mi_flush(batch);
901     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
902     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
903     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
904     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
905     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
906     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
907     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
908
909     for (j = 0; j < decode_state->num_slice_params; j++) {
910         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
911         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
912         slice_data_bo = decode_state->slice_datas[j]->bo;
913         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
914
915         if (j == decode_state->num_slice_params - 1)
916             next_slice_group_param = NULL;
917         else
918             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
919
920         if (j == 0 && slice_param->first_mb_in_slice)
921             gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context); 
922
923         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
924             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
925             assert((slice_param->slice_type == SLICE_TYPE_I) ||
926                    (slice_param->slice_type == SLICE_TYPE_SI) ||
927                    (slice_param->slice_type == SLICE_TYPE_P) ||
928                    (slice_param->slice_type == SLICE_TYPE_SP) ||
929                    (slice_param->slice_type == SLICE_TYPE_B));
930
931             if (i < decode_state->slice_params[j]->num_elements - 1)
932                 next_slice_param = slice_param + 1;
933             else
934                 next_slice_param = next_slice_group_param;
935
936             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
937             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
938             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
939             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
940             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
941             slice_param++;
942         }
943     }
944
945     intel_batchbuffer_end_atomic(batch);
946     intel_batchbuffer_flush(batch);
947 }
948
949 static void
950 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
951                            struct decode_state *decode_state,
952                            struct gen7_mfd_context *gen7_mfd_context)
953 {
954     VAPictureParameterBufferMPEG2 *pic_param;
955     struct i965_driver_data *i965 = i965_driver_data(ctx);
956     struct object_surface *obj_surface;
957     dri_bo *bo;
958     unsigned int width_in_mbs;
959
960     assert(decode_state->pic_param && decode_state->pic_param->buffer);
961     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
962     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
963
964     mpeg2_set_reference_surfaces(
965         ctx,
966         gen7_mfd_context->reference_surface,
967         decode_state,
968         pic_param
969     );
970
971     /* Current decoded picture */
972     obj_surface = decode_state->render_object;
973     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
974
975     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
976     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
977     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
978     gen7_mfd_context->pre_deblocking_output.valid = 1;
979
980     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
981     bo = dri_bo_alloc(i965->intel.bufmgr,
982                       "bsd mpc row store",
983                       width_in_mbs * 96,
984                       0x1000);
985     assert(bo);
986     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
987     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
988
989     gen7_mfd_context->post_deblocking_output.valid = 0;
990     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
991     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
992     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
993     gen7_mfd_context->bitplane_read_buffer.valid = 0;
994 }
995
996 static void
997 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
998                          struct decode_state *decode_state,
999                          struct gen7_mfd_context *gen7_mfd_context)
1000 {
1001     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1002     VAPictureParameterBufferMPEG2 *pic_param;
1003     unsigned int slice_concealment_disable_bit = 0;
1004
1005     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1006     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1007
1008     slice_concealment_disable_bit = 1;
1009
1010     BEGIN_BCS_BATCH(batch, 13);
1011     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1012     OUT_BCS_BATCH(batch,
1013                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1014                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1015                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1016                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1017                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1018                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1019                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1020                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1021                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1022                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1023                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1024                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1025     OUT_BCS_BATCH(batch,
1026                   pic_param->picture_coding_type << 9);
1027     OUT_BCS_BATCH(batch,
1028                   (slice_concealment_disable_bit << 31) |
1029                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1030                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1031     OUT_BCS_BATCH(batch, 0);
1032     OUT_BCS_BATCH(batch, 0);
1033     OUT_BCS_BATCH(batch, 0);
1034     OUT_BCS_BATCH(batch, 0);
1035     OUT_BCS_BATCH(batch, 0);
1036     OUT_BCS_BATCH(batch, 0);
1037     OUT_BCS_BATCH(batch, 0);
1038     OUT_BCS_BATCH(batch, 0);
1039     OUT_BCS_BATCH(batch, 0);
1040     ADVANCE_BCS_BATCH(batch);
1041 }
1042
1043 static void
1044 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1045                         struct decode_state *decode_state,
1046                         struct gen7_mfd_context *gen7_mfd_context)
1047 {
1048     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1049     int i, j;
1050
1051     /* Update internal QM state */
1052     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1053         VAIQMatrixBufferMPEG2 * const iq_matrix =
1054             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1055
1056         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1057             iq_matrix->load_intra_quantiser_matrix) {
1058             gen_iq_matrix->load_intra_quantiser_matrix =
1059                 iq_matrix->load_intra_quantiser_matrix;
1060             if (iq_matrix->load_intra_quantiser_matrix) {
1061                 for (j = 0; j < 64; j++)
1062                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1063                         iq_matrix->intra_quantiser_matrix[j];
1064             }
1065         }
1066
1067         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1068             iq_matrix->load_non_intra_quantiser_matrix) {
1069             gen_iq_matrix->load_non_intra_quantiser_matrix =
1070                 iq_matrix->load_non_intra_quantiser_matrix;
1071             if (iq_matrix->load_non_intra_quantiser_matrix) {
1072                 for (j = 0; j < 64; j++)
1073                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1074                         iq_matrix->non_intra_quantiser_matrix[j];
1075             }
1076         }
1077     }
1078
1079     /* Commit QM state to HW */
1080     for (i = 0; i < 2; i++) {
1081         unsigned char *qm = NULL;
1082         int qm_type;
1083
1084         if (i == 0) {
1085             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1086                 qm = gen_iq_matrix->intra_quantiser_matrix;
1087                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1088             }
1089         } else {
1090             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1091                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1092                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1093             }
1094         }
1095
1096         if (!qm)
1097             continue;
1098
1099         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1100     }
1101 }
1102
1103 static void
1104 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1105                           VAPictureParameterBufferMPEG2 *pic_param,
1106                           VASliceParameterBufferMPEG2 *slice_param,
1107                           VASliceParameterBufferMPEG2 *next_slice_param,
1108                           struct gen7_mfd_context *gen7_mfd_context)
1109 {
1110     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1111     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1112     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1113
1114     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1115         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1116         is_field_pic = 1;
1117     is_field_pic_wa = is_field_pic &&
1118         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1119
1120     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1121     hpos0 = slice_param->slice_horizontal_position;
1122
1123     if (next_slice_param == NULL) {
1124         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1125         hpos1 = 0;
1126     } else {
1127         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1128         hpos1 = next_slice_param->slice_horizontal_position;
1129     }
1130
1131     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1132
1133     BEGIN_BCS_BATCH(batch, 5);
1134     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1135     OUT_BCS_BATCH(batch, 
1136                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1137     OUT_BCS_BATCH(batch, 
1138                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1139     OUT_BCS_BATCH(batch,
1140                   hpos0 << 24 |
1141                   vpos0 << 16 |
1142                   mb_count << 8 |
1143                   (next_slice_param == NULL) << 5 |
1144                   (next_slice_param == NULL) << 3 |
1145                   (slice_param->macroblock_offset & 0x7));
1146     OUT_BCS_BATCH(batch,
1147                   (slice_param->quantiser_scale_code << 24) |
1148                   (vpos1 << 8 | hpos1));
1149     ADVANCE_BCS_BATCH(batch);
1150 }
1151
1152 static void
1153 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1154                               struct decode_state *decode_state,
1155                               struct gen7_mfd_context *gen7_mfd_context)
1156 {
1157     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1158     VAPictureParameterBufferMPEG2 *pic_param;
1159     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1160     dri_bo *slice_data_bo;
1161     int i, j;
1162
1163     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1164     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1165
1166     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1167     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1168     intel_batchbuffer_emit_mi_flush(batch);
1169     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1170     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1171     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1172     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1173     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1174     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1175
1176     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1177         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1178             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1179
1180     for (j = 0; j < decode_state->num_slice_params; j++) {
1181         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1182         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1183         slice_data_bo = decode_state->slice_datas[j]->bo;
1184         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1185
1186         if (j == decode_state->num_slice_params - 1)
1187             next_slice_group_param = NULL;
1188         else
1189             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1190
1191         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1192             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1193
1194             if (i < decode_state->slice_params[j]->num_elements - 1)
1195                 next_slice_param = slice_param + 1;
1196             else
1197                 next_slice_param = next_slice_group_param;
1198
1199             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1200             slice_param++;
1201         }
1202     }
1203
1204     intel_batchbuffer_end_atomic(batch);
1205     intel_batchbuffer_flush(batch);
1206 }
1207
1208 static const int va_to_gen7_vc1_pic_type[5] = {
1209     GEN7_VC1_I_PICTURE,
1210     GEN7_VC1_P_PICTURE,
1211     GEN7_VC1_B_PICTURE,
1212     GEN7_VC1_BI_PICTURE,
1213     GEN7_VC1_P_PICTURE,
1214 };
1215
1216 static const int va_to_gen7_vc1_mv[4] = {
1217     1, /* 1-MV */
1218     2, /* 1-MV half-pel */
1219     3, /* 1-MV half-pef bilinear */
1220     0, /* Mixed MV */
1221 };
1222
1223 static const int b_picture_scale_factor[21] = {
1224     128, 85,  170, 64,  192,
1225     51,  102, 153, 204, 43,
1226     215, 37,  74,  111, 148,
1227     185, 222, 32,  96,  160, 
1228     224,
1229 };
1230
1231 static const int va_to_gen7_vc1_condover[3] = {
1232     0,
1233     2,
1234     3
1235 };
1236
1237 static const int va_to_gen7_vc1_profile[4] = {
1238     GEN7_VC1_SIMPLE_PROFILE,
1239     GEN7_VC1_MAIN_PROFILE,
1240     GEN7_VC1_RESERVED_PROFILE,
1241     GEN7_VC1_ADVANCED_PROFILE
1242 };
1243
1244 static void 
1245 gen8_mfd_free_vc1_surface(void **data)
1246 {
1247     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1248
1249     if (!gen7_vc1_surface)
1250         return;
1251
1252     dri_bo_unreference(gen7_vc1_surface->dmv);
1253     free(gen7_vc1_surface);
1254     *data = NULL;
1255 }
1256
1257 static void
1258 gen8_mfd_init_vc1_surface(VADriverContextP ctx, 
1259                           VAPictureParameterBufferVC1 *pic_param,
1260                           struct object_surface *obj_surface)
1261 {
1262     struct i965_driver_data *i965 = i965_driver_data(ctx);
1263     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1264     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1265     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1266
1267     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1268
1269     if (!gen7_vc1_surface) {
1270         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1271         assert((obj_surface->size & 0x3f) == 0);
1272         obj_surface->private_data = gen7_vc1_surface;
1273     }
1274
1275     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1276
1277     if (gen7_vc1_surface->dmv == NULL) {
1278         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1279                                              "direct mv w/r buffer",
1280                                              width_in_mbs * height_in_mbs * 64,
1281                                              0x1000);
1282     }
1283 }
1284
1285 static void
1286 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1287                          struct decode_state *decode_state,
1288                          struct gen7_mfd_context *gen7_mfd_context)
1289 {
1290     VAPictureParameterBufferVC1 *pic_param;
1291     struct i965_driver_data *i965 = i965_driver_data(ctx);
1292     struct object_surface *obj_surface;
1293     dri_bo *bo;
1294     int width_in_mbs;
1295     int picture_type;
1296
1297     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1298     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1299     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1300     picture_type = pic_param->picture_fields.bits.picture_type;
1301  
1302     intel_update_vc1_frame_store_index(ctx,
1303                                        decode_state,
1304                                        pic_param,
1305                                        gen7_mfd_context->reference_surface);
1306
1307     /* Current decoded picture */
1308     obj_surface = decode_state->render_object;
1309     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1310     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1311
1312     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1313     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1314     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1315     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1316
1317     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1318     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1319     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1320     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1321
1322     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1323     bo = dri_bo_alloc(i965->intel.bufmgr,
1324                       "intra row store",
1325                       width_in_mbs * 64,
1326                       0x1000);
1327     assert(bo);
1328     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1329     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1330
1331     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1332     bo = dri_bo_alloc(i965->intel.bufmgr,
1333                       "deblocking filter row store",
1334                       width_in_mbs * 7 * 64,
1335                       0x1000);
1336     assert(bo);
1337     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1338     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1339
1340     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1341     bo = dri_bo_alloc(i965->intel.bufmgr,
1342                       "bsd mpc row store",
1343                       width_in_mbs * 96,
1344                       0x1000);
1345     assert(bo);
1346     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1347     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1348
1349     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1350
1351     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1352     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1353     
1354     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1355         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1356         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1357         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1358         int src_w, src_h;
1359         uint8_t *src = NULL, *dst = NULL;
1360
1361         assert(decode_state->bit_plane->buffer);
1362         src = decode_state->bit_plane->buffer;
1363
1364         bo = dri_bo_alloc(i965->intel.bufmgr,
1365                           "VC-1 Bitplane",
1366                           bitplane_width * height_in_mbs,
1367                           0x1000);
1368         assert(bo);
1369         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1370
1371         dri_bo_map(bo, True);
1372         assert(bo->virtual);
1373         dst = bo->virtual;
1374
1375         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1376             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1377                 int src_index, dst_index;
1378                 int src_shift;
1379                 uint8_t src_value;
1380
1381                 src_index = (src_h * width_in_mbs + src_w) / 2;
1382                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1383                 src_value = ((src[src_index] >> src_shift) & 0xf);
1384
1385                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1386                     src_value |= 0x2;
1387                 }
1388
1389                 dst_index = src_w / 2;
1390                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1391             }
1392
1393             if (src_w & 1)
1394                 dst[src_w / 2] >>= 4;
1395
1396             dst += bitplane_width;
1397         }
1398
1399         dri_bo_unmap(bo);
1400     } else
1401         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1402 }
1403
1404 static void
1405 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1406                        struct decode_state *decode_state,
1407                        struct gen7_mfd_context *gen7_mfd_context)
1408 {
1409     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1410     VAPictureParameterBufferVC1 *pic_param;
1411     struct object_surface *obj_surface;
1412     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1413     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1414     int unified_mv_mode;
1415     int ref_field_pic_polarity = 0;
1416     int scale_factor = 0;
1417     int trans_ac_y = 0;
1418     int dmv_surface_valid = 0;
1419     int brfd = 0;
1420     int fcm = 0;
1421     int picture_type;
1422     int profile;
1423     int overlap;
1424     int interpolation_mode = 0;
1425
1426     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1427     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1428
1429     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1430     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1431     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1432     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1433     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1434     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1435     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1436     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1437
1438     if (dquant == 0) {
1439         alt_pquant_config = 0;
1440         alt_pquant_edge_mask = 0;
1441     } else if (dquant == 2) {
1442         alt_pquant_config = 1;
1443         alt_pquant_edge_mask = 0xf;
1444     } else {
1445         assert(dquant == 1);
1446         if (dquantfrm == 0) {
1447             alt_pquant_config = 0;
1448             alt_pquant_edge_mask = 0;
1449             alt_pq = 0;
1450         } else {
1451             assert(dquantfrm == 1);
1452             alt_pquant_config = 1;
1453
1454             switch (dqprofile) {
1455             case 3:
1456                 if (dqbilevel == 0) {
1457                     alt_pquant_config = 2;
1458                     alt_pquant_edge_mask = 0;
1459                 } else {
1460                     assert(dqbilevel == 1);
1461                     alt_pquant_config = 3;
1462                     alt_pquant_edge_mask = 0;
1463                 }
1464                 break;
1465                 
1466             case 0:
1467                 alt_pquant_edge_mask = 0xf;
1468                 break;
1469
1470             case 1:
1471                 if (dqdbedge == 3)
1472                     alt_pquant_edge_mask = 0x9;
1473                 else
1474                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1475
1476                 break;
1477
1478             case 2:
1479                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1480                 break;
1481
1482             default:
1483                 assert(0);
1484             }
1485         }
1486     }
1487
1488     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1489         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1490         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1491     } else {
1492         assert(pic_param->mv_fields.bits.mv_mode < 4);
1493         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1494     }
1495
1496     if (pic_param->sequence_fields.bits.interlace == 1 &&
1497         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1498         /* FIXME: calculate reference field picture polarity */
1499         assert(0);
1500         ref_field_pic_polarity = 0;
1501     }
1502
1503     if (pic_param->b_picture_fraction < 21)
1504         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1505
1506     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1507     
1508     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1509         picture_type == GEN7_VC1_I_PICTURE)
1510         picture_type = GEN7_VC1_BI_PICTURE;
1511
1512     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1513         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1514     else {
1515         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1516
1517         /*
1518          * 8.3.6.2.1 Transform Type Selection
1519          * If variable-sized transform coding is not enabled,
1520          * then the 8x8 transform shall be used for all blocks.
1521          * it is also MFX_VC1_PIC_STATE requirement.
1522          */
1523         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1524             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1525             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1526         }
1527     }
1528
1529     if (picture_type == GEN7_VC1_B_PICTURE) {
1530         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1531
1532         obj_surface = decode_state->reference_objects[1];
1533
1534         if (obj_surface)
1535             gen7_vc1_surface = obj_surface->private_data;
1536
1537         if (!gen7_vc1_surface || 
1538             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1539              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1540             dmv_surface_valid = 0;
1541         else
1542             dmv_surface_valid = 1;
1543     }
1544
1545     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1546
1547     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1548         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1549     else {
1550         if (pic_param->picture_fields.bits.top_field_first)
1551             fcm = 2;
1552         else
1553             fcm = 3;
1554     }
1555
1556     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1557         brfd = pic_param->reference_fields.bits.reference_distance;
1558         brfd = (scale_factor * brfd) >> 8;
1559         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1560
1561         if (brfd < 0)
1562             brfd = 0;
1563     }
1564
1565     overlap = 0;
1566     if (profile != GEN7_VC1_ADVANCED_PROFILE){
1567         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1568             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1569             overlap = 1; 
1570         }
1571     }else {
1572         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1573              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1574               overlap = 1; 
1575         }
1576         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1577             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1578              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1579                 overlap = 1; 
1580              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1581                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1582                  overlap = 1;
1583              }
1584         }
1585     } 
1586
1587     assert(pic_param->conditional_overlap_flag < 3);
1588     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1589
1590     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1591         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1592          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1593         interpolation_mode = 9; /* Half-pel bilinear */
1594     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1595              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1596               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1597         interpolation_mode = 1; /* Half-pel bicubic */
1598     else
1599         interpolation_mode = 0; /* Quarter-pel bicubic */
1600
1601     BEGIN_BCS_BATCH(batch, 6);
1602     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1603     OUT_BCS_BATCH(batch,
1604                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1605                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1606     OUT_BCS_BATCH(batch,
1607                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1608                   dmv_surface_valid << 15 |
1609                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1610                   pic_param->rounding_control << 13 |
1611                   pic_param->sequence_fields.bits.syncmarker << 12 |
1612                   interpolation_mode << 8 |
1613                   0 << 7 | /* FIXME: scale up or down ??? */
1614                   pic_param->range_reduction_frame << 6 |
1615                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1616                   overlap << 4 |
1617                   !pic_param->picture_fields.bits.is_first_field << 3 |
1618                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1619     OUT_BCS_BATCH(batch,
1620                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1621                   picture_type << 26 |
1622                   fcm << 24 |
1623                   alt_pq << 16 |
1624                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1625                   scale_factor << 0);
1626     OUT_BCS_BATCH(batch,
1627                   unified_mv_mode << 28 |
1628                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1629                   pic_param->fast_uvmc_flag << 26 |
1630                   ref_field_pic_polarity << 25 |
1631                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1632                   pic_param->reference_fields.bits.reference_distance << 20 |
1633                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1634                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1635                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1636                   alt_pquant_edge_mask << 4 |
1637                   alt_pquant_config << 2 |
1638                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1639                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1640     OUT_BCS_BATCH(batch,
1641                   !!pic_param->bitplane_present.value << 31 |
1642                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1643                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1644                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1645                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1646                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1647                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1648                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1649                   pic_param->mv_fields.bits.mv_table << 20 |
1650                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1651                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1652                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1653                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1654                   pic_param->mb_mode_table << 8 |
1655                   trans_ac_y << 6 |
1656                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1657                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1658                   pic_param->cbp_table << 0);
1659     ADVANCE_BCS_BATCH(batch);
1660 }
1661
1662 static void
1663 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1664                              struct decode_state *decode_state,
1665                              struct gen7_mfd_context *gen7_mfd_context)
1666 {
1667     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1668     VAPictureParameterBufferVC1 *pic_param;
1669     int intensitycomp_single;
1670
1671     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1672     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1673
1674     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1675     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1676     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1677
1678     BEGIN_BCS_BATCH(batch, 6);
1679     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1680     OUT_BCS_BATCH(batch,
1681                   0 << 14 | /* FIXME: double ??? */
1682                   0 << 12 |
1683                   intensitycomp_single << 10 |
1684                   intensitycomp_single << 8 |
1685                   0 << 4 | /* FIXME: interlace mode */
1686                   0);
1687     OUT_BCS_BATCH(batch,
1688                   pic_param->luma_shift << 16 |
1689                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1690     OUT_BCS_BATCH(batch, 0);
1691     OUT_BCS_BATCH(batch, 0);
1692     OUT_BCS_BATCH(batch, 0);
1693     ADVANCE_BCS_BATCH(batch);
1694 }
1695
1696 static void
1697 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1698                               struct decode_state *decode_state,
1699                               struct gen7_mfd_context *gen7_mfd_context)
1700 {
1701     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1702     struct object_surface *obj_surface;
1703     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1704
1705     obj_surface = decode_state->render_object;
1706
1707     if (obj_surface && obj_surface->private_data) {
1708         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1709     }
1710
1711     obj_surface = decode_state->reference_objects[1];
1712
1713     if (obj_surface && obj_surface->private_data) {
1714         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1715     }
1716
1717     BEGIN_BCS_BATCH(batch, 7);
1718     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1719
1720     if (dmv_write_buffer)
1721         OUT_BCS_RELOC(batch, dmv_write_buffer,
1722                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1723                       0);
1724     else
1725         OUT_BCS_BATCH(batch, 0);
1726
1727     OUT_BCS_BATCH(batch, 0);
1728     OUT_BCS_BATCH(batch, 0);
1729
1730     if (dmv_read_buffer)
1731         OUT_BCS_RELOC(batch, dmv_read_buffer,
1732                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1733                       0);
1734     else
1735         OUT_BCS_BATCH(batch, 0);
1736     
1737     OUT_BCS_BATCH(batch, 0);
1738     OUT_BCS_BATCH(batch, 0);
1739                   
1740     ADVANCE_BCS_BATCH(batch);
1741 }
1742
1743 static int
1744 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1745 {
1746     int out_slice_data_bit_offset;
1747     int slice_header_size = in_slice_data_bit_offset / 8;
1748     int i, j;
1749
1750     if (profile != 3)
1751         out_slice_data_bit_offset = in_slice_data_bit_offset;
1752     else {
1753         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1754             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1755                 i++, j += 2;
1756             }
1757         }
1758
1759         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1760     }
1761
1762     return out_slice_data_bit_offset;
1763 }
1764
1765 static void
1766 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1767                         VAPictureParameterBufferVC1 *pic_param,
1768                         VASliceParameterBufferVC1 *slice_param,
1769                         VASliceParameterBufferVC1 *next_slice_param,
1770                         dri_bo *slice_data_bo,
1771                         struct gen7_mfd_context *gen7_mfd_context)
1772 {
1773     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1774     int next_slice_start_vert_pos;
1775     int macroblock_offset;
1776     uint8_t *slice_data = NULL;
1777
1778     dri_bo_map(slice_data_bo, 0);
1779     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1780     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1781                                                                slice_param->macroblock_offset,
1782                                                                pic_param->sequence_fields.bits.profile);
1783     dri_bo_unmap(slice_data_bo);
1784
1785     if (next_slice_param)
1786         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1787     else
1788         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1789
1790     BEGIN_BCS_BATCH(batch, 5);
1791     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1792     OUT_BCS_BATCH(batch, 
1793                   slice_param->slice_data_size - (macroblock_offset >> 3));
1794     OUT_BCS_BATCH(batch, 
1795                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1796     OUT_BCS_BATCH(batch,
1797                   slice_param->slice_vertical_position << 16 |
1798                   next_slice_start_vert_pos << 0);
1799     OUT_BCS_BATCH(batch,
1800                   (macroblock_offset & 0x7));
1801     ADVANCE_BCS_BATCH(batch);
1802 }
1803
1804 static void
1805 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1806                             struct decode_state *decode_state,
1807                             struct gen7_mfd_context *gen7_mfd_context)
1808 {
1809     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1810     VAPictureParameterBufferVC1 *pic_param;
1811     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1812     dri_bo *slice_data_bo;
1813     int i, j;
1814
1815     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1816     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1817
1818     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1819     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1820     intel_batchbuffer_emit_mi_flush(batch);
1821     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1822     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1823     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1824     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1825     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1826     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1827     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1828
1829     for (j = 0; j < decode_state->num_slice_params; j++) {
1830         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1831         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1832         slice_data_bo = decode_state->slice_datas[j]->bo;
1833         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1834
1835         if (j == decode_state->num_slice_params - 1)
1836             next_slice_group_param = NULL;
1837         else
1838             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1839
1840         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1841             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1842
1843             if (i < decode_state->slice_params[j]->num_elements - 1)
1844                 next_slice_param = slice_param + 1;
1845             else
1846                 next_slice_param = next_slice_group_param;
1847
1848             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1849             slice_param++;
1850         }
1851     }
1852
1853     intel_batchbuffer_end_atomic(batch);
1854     intel_batchbuffer_flush(batch);
1855 }
1856
1857 static void
1858 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1859                           struct decode_state *decode_state,
1860                           struct gen7_mfd_context *gen7_mfd_context)
1861 {
1862     struct object_surface *obj_surface;
1863     VAPictureParameterBufferJPEGBaseline *pic_param;
1864     int subsampling = SUBSAMPLE_YUV420;
1865     int fourcc = VA_FOURCC_IMC3;
1866
1867     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1868
1869     if (pic_param->num_components == 1)
1870         subsampling = SUBSAMPLE_YUV400;
1871     else if (pic_param->num_components == 3) {
1872         int h1 = pic_param->components[0].h_sampling_factor;
1873         int h2 = pic_param->components[1].h_sampling_factor;
1874         int h3 = pic_param->components[2].h_sampling_factor;
1875         int v1 = pic_param->components[0].v_sampling_factor;
1876         int v2 = pic_param->components[1].v_sampling_factor;
1877         int v3 = pic_param->components[2].v_sampling_factor;
1878
1879         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1880             v1 == 2 && v2 == 1 && v3 == 1) {
1881             subsampling = SUBSAMPLE_YUV420;
1882             fourcc = VA_FOURCC_IMC3;
1883         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1884                    v1 == 1 && v2 == 1 && v3 == 1) {
1885             subsampling = SUBSAMPLE_YUV422H;
1886             fourcc = VA_FOURCC_422H;
1887         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1888                    v1 == 1 && v2 == 1 && v3 == 1) {
1889             subsampling = SUBSAMPLE_YUV444;
1890             fourcc = VA_FOURCC_444P;
1891         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1892                    v1 == 1 && v2 == 1 && v3 == 1) {
1893             subsampling = SUBSAMPLE_YUV411;
1894             fourcc = VA_FOURCC_411P;
1895         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1896                    v1 == 2 && v2 == 1 && v3 == 1) {
1897             subsampling = SUBSAMPLE_YUV422V;
1898             fourcc = VA_FOURCC_422V;
1899         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1900                    v1 == 2 && v2 == 2 && v3 == 2) {
1901             subsampling = SUBSAMPLE_YUV422H;
1902             fourcc = VA_FOURCC_422H;
1903         } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1904                    v1 == 2 && v2 == 1 && v3 == 1) {
1905             subsampling = SUBSAMPLE_YUV422V;
1906             fourcc = VA_FOURCC_422V;
1907         } else
1908             assert(0);
1909     }
1910     else {
1911         assert(0);
1912     }
1913
1914     /* Current decoded picture */
1915     obj_surface = decode_state->render_object;
1916     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1917
1918     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1919     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1920     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1921     gen7_mfd_context->pre_deblocking_output.valid = 1;
1922
1923     gen7_mfd_context->post_deblocking_output.bo = NULL;
1924     gen7_mfd_context->post_deblocking_output.valid = 0;
1925
1926     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1927     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1928
1929     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1930     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1931
1932     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1933     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1934
1935     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1936     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1937
1938     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1939     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1940 }
1941
1942 static const int va_to_gen7_jpeg_rotation[4] = {
1943     GEN7_JPEG_ROTATION_0,
1944     GEN7_JPEG_ROTATION_90,
1945     GEN7_JPEG_ROTATION_180,
1946     GEN7_JPEG_ROTATION_270
1947 };
1948
1949 static void
1950 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1951                         struct decode_state *decode_state,
1952                         struct gen7_mfd_context *gen7_mfd_context)
1953 {
1954     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1955     VAPictureParameterBufferJPEGBaseline *pic_param;
1956     int chroma_type = GEN7_YUV420;
1957     int frame_width_in_blks;
1958     int frame_height_in_blks;
1959
1960     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1961     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1962
1963     if (pic_param->num_components == 1)
1964         chroma_type = GEN7_YUV400;
1965     else if (pic_param->num_components == 3) {
1966         int h1 = pic_param->components[0].h_sampling_factor;
1967         int h2 = pic_param->components[1].h_sampling_factor;
1968         int h3 = pic_param->components[2].h_sampling_factor;
1969         int v1 = pic_param->components[0].v_sampling_factor;
1970         int v2 = pic_param->components[1].v_sampling_factor;
1971         int v3 = pic_param->components[2].v_sampling_factor;
1972
1973         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1974             v1 == 2 && v2 == 1 && v3 == 1)
1975             chroma_type = GEN7_YUV420;
1976         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1977                  v1 == 1 && v2 == 1 && v3 == 1)
1978             chroma_type = GEN7_YUV422H_2Y;
1979         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1980                  v1 == 1 && v2 == 1 && v3 == 1)
1981             chroma_type = GEN7_YUV444;
1982         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1983                  v1 == 1 && v2 == 1 && v3 == 1)
1984             chroma_type = GEN7_YUV411;
1985         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1986                  v1 == 2 && v2 == 1 && v3 == 1)
1987             chroma_type = GEN7_YUV422V_2Y;
1988         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1989                  v1 == 2 && v2 == 2 && v3 == 2)
1990             chroma_type = GEN7_YUV422H_4Y;
1991         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1992                  v1 == 2 && v2 == 1 && v3 == 1)
1993             chroma_type = GEN7_YUV422V_4Y;
1994         else
1995             assert(0);
1996     }
1997
1998     if (chroma_type == GEN7_YUV400 ||
1999         chroma_type == GEN7_YUV444 ||
2000         chroma_type == GEN7_YUV422V_2Y) {
2001         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2002         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2003     } else if (chroma_type == GEN7_YUV411) {
2004         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2005         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2006     } else {
2007         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2008         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2009     }
2010
2011     BEGIN_BCS_BATCH(batch, 3);
2012     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2013     OUT_BCS_BATCH(batch,
2014                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2015                   (chroma_type << 0));
2016     OUT_BCS_BATCH(batch,
2017                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2018                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2019     ADVANCE_BCS_BATCH(batch);
2020 }
2021
2022 static const int va_to_gen7_jpeg_hufftable[2] = {
2023     MFX_HUFFTABLE_ID_Y,
2024     MFX_HUFFTABLE_ID_UV
2025 };
2026
2027 static void
2028 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2029                                struct decode_state *decode_state,
2030                                struct gen7_mfd_context *gen7_mfd_context,
2031                                int num_tables)
2032 {
2033     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2034     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2035     int index;
2036
2037     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2038         return;
2039
2040     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2041
2042     for (index = 0; index < num_tables; index++) {
2043         int id = va_to_gen7_jpeg_hufftable[index];
2044         if (!huffman_table->load_huffman_table[index])
2045             continue;
2046         BEGIN_BCS_BATCH(batch, 53);
2047         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2048         OUT_BCS_BATCH(batch, id);
2049         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2050         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2051         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2052         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2053         ADVANCE_BCS_BATCH(batch);
2054     }
2055 }
2056
2057 static const int va_to_gen7_jpeg_qm[5] = {
2058     -1,
2059     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2060     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2061     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2062     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2063 };
2064
2065 static void
2066 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2067                        struct decode_state *decode_state,
2068                        struct gen7_mfd_context *gen7_mfd_context)
2069 {
2070     VAPictureParameterBufferJPEGBaseline *pic_param;
2071     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2072     int index;
2073
2074     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2075         return;
2076
2077     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2078     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2079
2080     assert(pic_param->num_components <= 3);
2081
2082     for (index = 0; index < pic_param->num_components; index++) {
2083         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2084         int qm_type;
2085         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2086         unsigned char raster_qm[64];
2087         int j;
2088
2089         if (id > 4 || id < 1)
2090             continue;
2091
2092         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2093             continue;
2094
2095         qm_type = va_to_gen7_jpeg_qm[id];
2096
2097         for (j = 0; j < 64; j++)
2098             raster_qm[zigzag_direct[j]] = qm[j];
2099
2100         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2101     }
2102 }
2103
2104 static void
2105 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2106                          VAPictureParameterBufferJPEGBaseline *pic_param,
2107                          VASliceParameterBufferJPEGBaseline *slice_param,
2108                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2109                          dri_bo *slice_data_bo,
2110                          struct gen7_mfd_context *gen7_mfd_context)
2111 {
2112     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2113     int scan_component_mask = 0;
2114     int i;
2115
2116     assert(slice_param->num_components > 0);
2117     assert(slice_param->num_components < 4);
2118     assert(slice_param->num_components <= pic_param->num_components);
2119
2120     for (i = 0; i < slice_param->num_components; i++) {
2121         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2122         case 1:
2123             scan_component_mask |= (1 << 0);
2124             break;
2125         case 2:
2126             scan_component_mask |= (1 << 1);
2127             break;
2128         case 3:
2129             scan_component_mask |= (1 << 2);
2130             break;
2131         default:
2132             assert(0);
2133             break;
2134         }
2135     }
2136
2137     BEGIN_BCS_BATCH(batch, 6);
2138     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2139     OUT_BCS_BATCH(batch, 
2140                   slice_param->slice_data_size);
2141     OUT_BCS_BATCH(batch, 
2142                   slice_param->slice_data_offset);
2143     OUT_BCS_BATCH(batch,
2144                   slice_param->slice_horizontal_position << 16 |
2145                   slice_param->slice_vertical_position << 0);
2146     OUT_BCS_BATCH(batch,
2147                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2148                   (scan_component_mask << 27) |                 /* scan components */
2149                   (0 << 26) |   /* disable interrupt allowed */
2150                   (slice_param->num_mcus << 0));                /* MCU count */
2151     OUT_BCS_BATCH(batch,
2152                   (slice_param->restart_interval << 0));    /* RestartInterval */
2153     ADVANCE_BCS_BATCH(batch);
2154 }
2155
2156 /* Workaround for JPEG decoding on Ivybridge */
2157 #ifdef JPEG_WA
2158
2159 static struct {
2160     int width;
2161     int height;
2162     unsigned char data[32];
2163     int data_size;
2164     int data_bit_offset;
2165     int qp;
2166 } gen7_jpeg_wa_clip = {
2167     16,
2168     16,
2169     {
2170         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2171         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2172     },
2173     14,
2174     40,
2175     28,
2176 };
2177
2178 static void
2179 gen8_jpeg_wa_init(VADriverContextP ctx,
2180                   struct gen7_mfd_context *gen7_mfd_context)
2181 {
2182     struct i965_driver_data *i965 = i965_driver_data(ctx);
2183     VAStatus status;
2184     struct object_surface *obj_surface;
2185
2186     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2187         i965_DestroySurfaces(ctx,
2188                              &gen7_mfd_context->jpeg_wa_surface_id,
2189                              1);
2190
2191     status = i965_CreateSurfaces(ctx,
2192                                  gen7_jpeg_wa_clip.width,
2193                                  gen7_jpeg_wa_clip.height,
2194                                  VA_RT_FORMAT_YUV420,
2195                                  1,
2196                                  &gen7_mfd_context->jpeg_wa_surface_id);
2197     assert(status == VA_STATUS_SUCCESS);
2198
2199     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2200     assert(obj_surface);
2201     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2202     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2203
2204     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2205         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2206                                                                "JPEG WA data",
2207                                                                0x1000,
2208                                                                0x1000);
2209         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2210                        0,
2211                        gen7_jpeg_wa_clip.data_size,
2212                        gen7_jpeg_wa_clip.data);
2213     }
2214 }
2215
2216 static void
2217 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2218                               struct gen7_mfd_context *gen7_mfd_context)
2219 {
2220     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2221
2222     BEGIN_BCS_BATCH(batch, 5);
2223     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2224     OUT_BCS_BATCH(batch,
2225                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2226                   (MFD_MODE_VLD << 15) | /* VLD mode */
2227                   (0 << 10) | /* disable Stream-Out */
2228                   (0 << 9)  | /* Post Deblocking Output */
2229                   (1 << 8)  | /* Pre Deblocking Output */
2230                   (0 << 5)  | /* not in stitch mode */
2231                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2232                   (MFX_FORMAT_AVC << 0));
2233     OUT_BCS_BATCH(batch,
2234                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2235                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2236                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2237                   (0 << 1)  |
2238                   (0 << 0));
2239     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2240     OUT_BCS_BATCH(batch, 0); /* reserved */
2241     ADVANCE_BCS_BATCH(batch);
2242 }
2243
2244 static void
2245 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2246                            struct gen7_mfd_context *gen7_mfd_context)
2247 {
2248     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2249     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2250
2251     BEGIN_BCS_BATCH(batch, 6);
2252     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2253     OUT_BCS_BATCH(batch, 0);
2254     OUT_BCS_BATCH(batch,
2255                   ((obj_surface->orig_width - 1) << 18) |
2256                   ((obj_surface->orig_height - 1) << 4));
2257     OUT_BCS_BATCH(batch,
2258                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2259                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2260                   (0 << 22) | /* surface object control state, ignored */
2261                   ((obj_surface->width - 1) << 3) | /* pitch */
2262                   (0 << 2)  | /* must be 0 */
2263                   (1 << 1)  | /* must be tiled */
2264                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2265     OUT_BCS_BATCH(batch,
2266                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2267                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2268     OUT_BCS_BATCH(batch,
2269                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2270                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2271     ADVANCE_BCS_BATCH(batch);
2272 }
2273
2274 static void
2275 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2276                                  struct gen7_mfd_context *gen7_mfd_context)
2277 {
2278     struct i965_driver_data *i965 = i965_driver_data(ctx);
2279     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2280     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2281     dri_bo *intra_bo;
2282     int i;
2283
2284     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2285                             "intra row store",
2286                             128 * 64,
2287                             0x1000);
2288
2289     BEGIN_BCS_BATCH(batch, 61);
2290     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2291     OUT_BCS_RELOC(batch,
2292                   obj_surface->bo,
2293                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2294                   0);
2295         OUT_BCS_BATCH(batch, 0);
2296         OUT_BCS_BATCH(batch, 0);
2297     
2298
2299     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2300         OUT_BCS_BATCH(batch, 0);
2301         OUT_BCS_BATCH(batch, 0);
2302
2303         /* uncompressed-video & stream out 7-12 */
2304     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2305     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2306         OUT_BCS_BATCH(batch, 0);
2307         OUT_BCS_BATCH(batch, 0);
2308         OUT_BCS_BATCH(batch, 0);
2309         OUT_BCS_BATCH(batch, 0);
2310
2311         /* the DW 13-15 is for intra row store scratch */
2312     OUT_BCS_RELOC(batch,
2313                   intra_bo,
2314                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2315                   0);
2316         OUT_BCS_BATCH(batch, 0);
2317         OUT_BCS_BATCH(batch, 0);
2318
2319         /* the DW 16-18 is for deblocking filter */ 
2320     OUT_BCS_BATCH(batch, 0);
2321         OUT_BCS_BATCH(batch, 0);
2322         OUT_BCS_BATCH(batch, 0);
2323
2324     /* DW 19..50 */
2325     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2326         OUT_BCS_BATCH(batch, 0);
2327         OUT_BCS_BATCH(batch, 0);
2328     }
2329     OUT_BCS_BATCH(batch, 0);
2330
2331         /* the DW52-54 is for mb status address */
2332     OUT_BCS_BATCH(batch, 0);
2333         OUT_BCS_BATCH(batch, 0);
2334         OUT_BCS_BATCH(batch, 0);
2335         /* the DW56-60 is for ILDB & second ILDB address */
2336     OUT_BCS_BATCH(batch, 0);
2337         OUT_BCS_BATCH(batch, 0);
2338         OUT_BCS_BATCH(batch, 0);
2339     OUT_BCS_BATCH(batch, 0);
2340         OUT_BCS_BATCH(batch, 0);
2341         OUT_BCS_BATCH(batch, 0);
2342
2343     ADVANCE_BCS_BATCH(batch);
2344
2345     dri_bo_unreference(intra_bo);
2346 }
2347
2348 static void
2349 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2350                                      struct gen7_mfd_context *gen7_mfd_context)
2351 {
2352     struct i965_driver_data *i965 = i965_driver_data(ctx);
2353     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2354     dri_bo *bsd_mpc_bo, *mpr_bo;
2355
2356     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2357                               "bsd mpc row store",
2358                               11520, /* 1.5 * 120 * 64 */
2359                               0x1000);
2360
2361     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2362                           "mpr row store",
2363                           7680, /* 1. 0 * 120 * 64 */
2364                           0x1000);
2365
2366     BEGIN_BCS_BATCH(batch, 10);
2367     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2368
2369     OUT_BCS_RELOC(batch,
2370                   bsd_mpc_bo,
2371                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2372                   0);
2373
2374     OUT_BCS_BATCH(batch, 0);
2375     OUT_BCS_BATCH(batch, 0);
2376
2377     OUT_BCS_RELOC(batch,
2378                   mpr_bo,
2379                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2380                   0);
2381     OUT_BCS_BATCH(batch, 0);
2382     OUT_BCS_BATCH(batch, 0);
2383
2384     OUT_BCS_BATCH(batch, 0);
2385     OUT_BCS_BATCH(batch, 0);
2386     OUT_BCS_BATCH(batch, 0);
2387
2388     ADVANCE_BCS_BATCH(batch);
2389
2390     dri_bo_unreference(bsd_mpc_bo);
2391     dri_bo_unreference(mpr_bo);
2392 }
2393
2394 static void
2395 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2396                           struct gen7_mfd_context *gen7_mfd_context)
2397 {
2398
2399 }
2400
2401 static void
2402 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2403                            struct gen7_mfd_context *gen7_mfd_context)
2404 {
2405     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2406     int img_struct = 0;
2407     int mbaff_frame_flag = 0;
2408     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2409
2410     BEGIN_BCS_BATCH(batch, 16);
2411     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2412     OUT_BCS_BATCH(batch, 
2413                   width_in_mbs * height_in_mbs);
2414     OUT_BCS_BATCH(batch, 
2415                   ((height_in_mbs - 1) << 16) | 
2416                   ((width_in_mbs - 1) << 0));
2417     OUT_BCS_BATCH(batch, 
2418                   (0 << 24) |
2419                   (0 << 16) |
2420                   (0 << 14) |
2421                   (0 << 13) |
2422                   (0 << 12) | /* differ from GEN6 */
2423                   (0 << 10) |
2424                   (img_struct << 8));
2425     OUT_BCS_BATCH(batch,
2426                   (1 << 10) | /* 4:2:0 */
2427                   (1 << 7) |  /* CABAC */
2428                   (0 << 6) |
2429                   (0 << 5) |
2430                   (0 << 4) |
2431                   (0 << 3) |
2432                   (1 << 2) |
2433                   (mbaff_frame_flag << 1) |
2434                   (0 << 0));
2435     OUT_BCS_BATCH(batch, 0);
2436     OUT_BCS_BATCH(batch, 0);
2437     OUT_BCS_BATCH(batch, 0);
2438     OUT_BCS_BATCH(batch, 0);
2439     OUT_BCS_BATCH(batch, 0);
2440     OUT_BCS_BATCH(batch, 0);
2441     OUT_BCS_BATCH(batch, 0);
2442     OUT_BCS_BATCH(batch, 0);
2443     OUT_BCS_BATCH(batch, 0);
2444     OUT_BCS_BATCH(batch, 0);
2445     OUT_BCS_BATCH(batch, 0);
2446     ADVANCE_BCS_BATCH(batch);
2447 }
2448
2449 static void
2450 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2451                                   struct gen7_mfd_context *gen7_mfd_context)
2452 {
2453     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2454     int i;
2455
2456     BEGIN_BCS_BATCH(batch, 71);
2457     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2458
2459     /* reference surfaces 0..15 */
2460     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2461         OUT_BCS_BATCH(batch, 0); /* top */
2462         OUT_BCS_BATCH(batch, 0); /* bottom */
2463     }
2464         
2465         OUT_BCS_BATCH(batch, 0);
2466
2467     /* the current decoding frame/field */
2468     OUT_BCS_BATCH(batch, 0); /* top */
2469     OUT_BCS_BATCH(batch, 0);
2470     OUT_BCS_BATCH(batch, 0);
2471
2472     /* POC List */
2473     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2474         OUT_BCS_BATCH(batch, 0);
2475         OUT_BCS_BATCH(batch, 0);
2476     }
2477
2478     OUT_BCS_BATCH(batch, 0);
2479     OUT_BCS_BATCH(batch, 0);
2480
2481     ADVANCE_BCS_BATCH(batch);
2482 }
2483
2484 static void
2485 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2486                                      struct gen7_mfd_context *gen7_mfd_context)
2487 {
2488     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2489
2490     BEGIN_BCS_BATCH(batch, 11);
2491     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2492     OUT_BCS_RELOC(batch,
2493                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2494                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2495                   0);
2496     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2497     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2498     OUT_BCS_BATCH(batch, 0);
2499     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2500     OUT_BCS_BATCH(batch, 0);
2501     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2502     OUT_BCS_BATCH(batch, 0);
2503     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2504     OUT_BCS_BATCH(batch, 0);
2505     ADVANCE_BCS_BATCH(batch);
2506 }
2507
2508 static void
2509 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2510                             struct gen7_mfd_context *gen7_mfd_context)
2511 {
2512     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2513
2514     /* the input bitsteam format on GEN7 differs from GEN6 */
2515     BEGIN_BCS_BATCH(batch, 6);
2516     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2517     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2518     OUT_BCS_BATCH(batch, 0);
2519     OUT_BCS_BATCH(batch,
2520                   (0 << 31) |
2521                   (0 << 14) |
2522                   (0 << 12) |
2523                   (0 << 10) |
2524                   (0 << 8));
2525     OUT_BCS_BATCH(batch,
2526                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2527                   (0 << 5)  |
2528                   (0 << 4)  |
2529                   (1 << 3) | /* LastSlice Flag */
2530                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2531     OUT_BCS_BATCH(batch, 0);
2532     ADVANCE_BCS_BATCH(batch);
2533 }
2534
2535 static void
2536 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2537                              struct gen7_mfd_context *gen7_mfd_context)
2538 {
2539     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2540     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2541     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2542     int first_mb_in_slice = 0;
2543     int slice_type = SLICE_TYPE_I;
2544
2545     BEGIN_BCS_BATCH(batch, 11);
2546     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2547     OUT_BCS_BATCH(batch, slice_type);
2548     OUT_BCS_BATCH(batch, 
2549                   (num_ref_idx_l1 << 24) |
2550                   (num_ref_idx_l0 << 16) |
2551                   (0 << 8) |
2552                   (0 << 0));
2553     OUT_BCS_BATCH(batch, 
2554                   (0 << 29) |
2555                   (1 << 27) |   /* disable Deblocking */
2556                   (0 << 24) |
2557                   (gen7_jpeg_wa_clip.qp << 16) |
2558                   (0 << 8) |
2559                   (0 << 0));
2560     OUT_BCS_BATCH(batch, 
2561                   (slice_ver_pos << 24) |
2562                   (slice_hor_pos << 16) | 
2563                   (first_mb_in_slice << 0));
2564     OUT_BCS_BATCH(batch,
2565                   (next_slice_ver_pos << 16) |
2566                   (next_slice_hor_pos << 0));
2567     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2568     OUT_BCS_BATCH(batch, 0);
2569     OUT_BCS_BATCH(batch, 0);
2570     OUT_BCS_BATCH(batch, 0);
2571     OUT_BCS_BATCH(batch, 0);
2572     ADVANCE_BCS_BATCH(batch);
2573 }
2574
2575 static void
2576 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2577                  struct gen7_mfd_context *gen7_mfd_context)
2578 {
2579     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2580     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2581     intel_batchbuffer_emit_mi_flush(batch);
2582     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2583     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2584     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2585     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2586     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2587     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2588     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2589
2590     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2591     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2592     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2593 }
2594
2595 #endif
2596
2597 void
2598 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2599                              struct decode_state *decode_state,
2600                              struct gen7_mfd_context *gen7_mfd_context)
2601 {
2602     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2603     VAPictureParameterBufferJPEGBaseline *pic_param;
2604     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2605     dri_bo *slice_data_bo;
2606     int i, j, max_selector = 0;
2607
2608     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2609     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2610
2611     /* Currently only support Baseline DCT */
2612     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2613     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2614 #ifdef JPEG_WA
2615     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2616 #endif
2617     intel_batchbuffer_emit_mi_flush(batch);
2618     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2619     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2620     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2621     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2622     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2623
2624     for (j = 0; j < decode_state->num_slice_params; j++) {
2625         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2626         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2627         slice_data_bo = decode_state->slice_datas[j]->bo;
2628         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2629
2630         if (j == decode_state->num_slice_params - 1)
2631             next_slice_group_param = NULL;
2632         else
2633             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2634
2635         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2636             int component;
2637
2638             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2639
2640             if (i < decode_state->slice_params[j]->num_elements - 1)
2641                 next_slice_param = slice_param + 1;
2642             else
2643                 next_slice_param = next_slice_group_param;
2644
2645             for (component = 0; component < slice_param->num_components; component++) {
2646                 if (max_selector < slice_param->components[component].dc_table_selector)
2647                     max_selector = slice_param->components[component].dc_table_selector;
2648
2649                 if (max_selector < slice_param->components[component].ac_table_selector)
2650                     max_selector = slice_param->components[component].ac_table_selector;
2651             }
2652
2653             slice_param++;
2654         }
2655     }
2656
2657     assert(max_selector < 2);
2658     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2659
2660     for (j = 0; j < decode_state->num_slice_params; j++) {
2661         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2662         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2663         slice_data_bo = decode_state->slice_datas[j]->bo;
2664         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2665
2666         if (j == decode_state->num_slice_params - 1)
2667             next_slice_group_param = NULL;
2668         else
2669             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2670
2671         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2672             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2673
2674             if (i < decode_state->slice_params[j]->num_elements - 1)
2675                 next_slice_param = slice_param + 1;
2676             else
2677                 next_slice_param = next_slice_group_param;
2678
2679             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2680             slice_param++;
2681         }
2682     }
2683
2684     intel_batchbuffer_end_atomic(batch);
2685     intel_batchbuffer_flush(batch);
2686 }
2687
2688 static const int vp8_dc_qlookup[128] =
2689 {
2690       4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2691      18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2692      29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2693      44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2694      59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2695      75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2696      91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2697     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2698 };
2699
2700 static const int vp8_ac_qlookup[128] =
2701 {
2702       4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2703      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2704      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2705      52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2706      78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2707     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2708     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2709     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2710 };
2711
2712 static inline unsigned int vp8_clip_quantization_index(int index)
2713 {
2714     if(index > 127)
2715         return 127;
2716     else if(index <0)
2717         return 0;
2718
2719     return index;
2720 }
2721
2722 static void
2723 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2724                           struct decode_state *decode_state,
2725                           struct gen7_mfd_context *gen7_mfd_context)
2726 {
2727     struct object_surface *obj_surface;
2728     struct i965_driver_data *i965 = i965_driver_data(ctx);
2729     dri_bo *bo;
2730     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2731     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2732     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2733
2734     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2735     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2736
2737     intel_update_vp8_frame_store_index(ctx,
2738                                        decode_state,
2739                                        pic_param,
2740                                        gen7_mfd_context->reference_surface);
2741
2742     /* Current decoded picture */
2743     obj_surface = decode_state->render_object;
2744     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2745
2746     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2747     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2748     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2749     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2750
2751     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2752     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2753     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2754     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2755
2756     intel_ensure_vp8_segmentation_buffer(ctx,
2757         &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2758
2759     /* The same as AVC */
2760     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2761     bo = dri_bo_alloc(i965->intel.bufmgr,
2762                       "intra row store",
2763                       width_in_mbs * 64,
2764                       0x1000);
2765     assert(bo);
2766     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2767     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2768
2769     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2770     bo = dri_bo_alloc(i965->intel.bufmgr,
2771                       "deblocking filter row store",
2772                       width_in_mbs * 64 * 4,
2773                       0x1000);
2774     assert(bo);
2775     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2776     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2777
2778     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2779     bo = dri_bo_alloc(i965->intel.bufmgr,
2780                       "bsd mpc row store",
2781                       width_in_mbs * 64 * 2,
2782                       0x1000);
2783     assert(bo);
2784     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2785     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2786
2787     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2788     bo = dri_bo_alloc(i965->intel.bufmgr,
2789                       "mpr row store",
2790                       width_in_mbs * 64 * 2,
2791                       0x1000);
2792     assert(bo);
2793     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2794     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2795
2796     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2797 }
2798
2799 static void
2800 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2801                        struct decode_state *decode_state,
2802                        struct gen7_mfd_context *gen7_mfd_context)
2803 {
2804     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2805     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2806     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2807     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2808     dri_bo *probs_bo = decode_state->probability_data->bo;
2809     int i, j,log2num;
2810     unsigned int quantization_value[4][6];
2811
2812     /* There is no safe way to error out if the segmentation buffer
2813        could not be allocated. So, instead of aborting, simply decode
2814        something even if the result may look totally inacurate */
2815     const unsigned int enable_segmentation =
2816         pic_param->pic_fields.bits.segmentation_enabled &&
2817         gen7_mfd_context->segmentation_buffer.valid;
2818         
2819     log2num = (int)log2(slice_param->num_of_partitions - 1);
2820
2821     BEGIN_BCS_BATCH(batch, 38);
2822     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2823     OUT_BCS_BATCH(batch,
2824                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2825                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2826     OUT_BCS_BATCH(batch,
2827                   log2num << 24 |
2828                   pic_param->pic_fields.bits.sharpness_level << 16 |
2829                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2830                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2831                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2832                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2833                   pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2834                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2835                   (enable_segmentation &&
2836                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2837                   (enable_segmentation &&
2838                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2839                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2840                   pic_param->pic_fields.bits.filter_type << 4 |
2841                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2842                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2843
2844     OUT_BCS_BATCH(batch,
2845                   pic_param->loop_filter_level[3] << 24 |
2846                   pic_param->loop_filter_level[2] << 16 |
2847                   pic_param->loop_filter_level[1] <<  8 |
2848                   pic_param->loop_filter_level[0] <<  0);
2849
2850     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2851     for (i = 0; i < 4; i++) {
2852                 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2853                 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2854                 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2855                 /* 101581>>16 is equivalent to 155/100 */
2856                 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2857                 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2858                 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2859
2860                 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2861                 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2862
2863                 OUT_BCS_BATCH(batch,
2864                       quantization_value[i][0] << 16 | /* Y1AC */
2865                       quantization_value[i][1] <<  0); /* Y1DC */
2866         OUT_BCS_BATCH(batch,
2867                       quantization_value[i][5] << 16 | /* UVAC */
2868                       quantization_value[i][4] <<  0); /* UVDC */
2869         OUT_BCS_BATCH(batch,
2870                       quantization_value[i][3] << 16 | /* Y2AC */
2871                       quantization_value[i][2] <<  0); /* Y2DC */
2872     }
2873
2874     /* CoeffProbability table for non-key frame, DW16-DW18 */
2875     if (probs_bo) {
2876         OUT_BCS_RELOC(batch, probs_bo,
2877                       0, I915_GEM_DOMAIN_INSTRUCTION,
2878                       0);
2879         OUT_BCS_BATCH(batch, 0);
2880         OUT_BCS_BATCH(batch, 0);
2881     } else {
2882         OUT_BCS_BATCH(batch, 0);
2883         OUT_BCS_BATCH(batch, 0);
2884         OUT_BCS_BATCH(batch, 0);
2885     }
2886
2887     OUT_BCS_BATCH(batch,
2888                   pic_param->mb_segment_tree_probs[2] << 16 |
2889                   pic_param->mb_segment_tree_probs[1] <<  8 |
2890                   pic_param->mb_segment_tree_probs[0] <<  0);
2891
2892     OUT_BCS_BATCH(batch,
2893                   pic_param->prob_skip_false << 24 |
2894                   pic_param->prob_intra      << 16 |
2895                   pic_param->prob_last       <<  8 |
2896                   pic_param->prob_gf         <<  0);
2897
2898     OUT_BCS_BATCH(batch,
2899                   pic_param->y_mode_probs[3] << 24 |
2900                   pic_param->y_mode_probs[2] << 16 |
2901                   pic_param->y_mode_probs[1] <<  8 |
2902                   pic_param->y_mode_probs[0] <<  0);
2903
2904     OUT_BCS_BATCH(batch,
2905                   pic_param->uv_mode_probs[2] << 16 |
2906                   pic_param->uv_mode_probs[1] <<  8 |
2907                   pic_param->uv_mode_probs[0] <<  0);
2908     
2909     /* MV update value, DW23-DW32 */
2910     for (i = 0; i < 2; i++) {
2911         for (j = 0; j < 20; j += 4) {
2912             OUT_BCS_BATCH(batch,
2913                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2914                           pic_param->mv_probs[i][j + 2] << 16 |
2915                           pic_param->mv_probs[i][j + 1] <<  8 |
2916                           pic_param->mv_probs[i][j + 0] <<  0);
2917         }
2918     }
2919
2920     OUT_BCS_BATCH(batch,
2921                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2922                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2923                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
2924                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
2925
2926     OUT_BCS_BATCH(batch,
2927                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2928                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2929                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
2930                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
2931
2932     /* segmentation id stream base address, DW35-DW37 */
2933     if (enable_segmentation) {
2934         OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2935                       0, I915_GEM_DOMAIN_INSTRUCTION,
2936                       0);
2937         OUT_BCS_BATCH(batch, 0);
2938         OUT_BCS_BATCH(batch, 0);
2939     }
2940     else {
2941         OUT_BCS_BATCH(batch, 0);
2942         OUT_BCS_BATCH(batch, 0);
2943         OUT_BCS_BATCH(batch, 0);
2944     }
2945     ADVANCE_BCS_BATCH(batch);
2946 }
2947
2948 static void
2949 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2950                         VAPictureParameterBufferVP8 *pic_param,
2951                         VASliceParameterBufferVP8 *slice_param,
2952                         dri_bo *slice_data_bo,
2953                         struct gen7_mfd_context *gen7_mfd_context)
2954 {
2955     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2956     int i, log2num;
2957     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2958     unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2959     unsigned int partition_size_0 = slice_param->partition_size[0];
2960
2961     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2962     if (used_bits == 8) {
2963         used_bits = 0;
2964         offset += 1;
2965         partition_size_0 -= 1;
2966     }
2967
2968     assert(slice_param->num_of_partitions >= 2);
2969     assert(slice_param->num_of_partitions <= 9);
2970
2971     log2num = (int)log2(slice_param->num_of_partitions - 1);
2972
2973     BEGIN_BCS_BATCH(batch, 22);
2974     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2975     OUT_BCS_BATCH(batch,
2976                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2977                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
2978                   log2num << 4 |
2979                   (slice_param->macroblock_offset & 0x7));
2980     OUT_BCS_BATCH(batch,
2981                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2982                   0);
2983
2984     OUT_BCS_BATCH(batch, partition_size_0);
2985     OUT_BCS_BATCH(batch, offset);
2986     //partion sizes in bytes are present after the above first partition when there are more than one token partition
2987     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
2988     for (i = 1; i < 9; i++) {
2989         if (i < slice_param->num_of_partitions) {
2990             OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
2991             OUT_BCS_BATCH(batch, offset);
2992         } else {
2993             OUT_BCS_BATCH(batch, 0);
2994             OUT_BCS_BATCH(batch, 0);
2995         }
2996
2997         offset += slice_param->partition_size[i];
2998     }
2999
3000     OUT_BCS_BATCH(batch,
3001                   1 << 31 | /* concealment method */
3002                   0);
3003
3004     ADVANCE_BCS_BATCH(batch);
3005 }
3006
3007 void
3008 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3009                             struct decode_state *decode_state,
3010                             struct gen7_mfd_context *gen7_mfd_context)
3011 {
3012     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3013     VAPictureParameterBufferVP8 *pic_param;
3014     VASliceParameterBufferVP8 *slice_param;
3015     dri_bo *slice_data_bo;
3016
3017     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3018     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3019
3020     /* one slice per frame */
3021     if (decode_state->num_slice_params != 1 ||
3022         (!decode_state->slice_params ||
3023          !decode_state->slice_params[0] ||
3024          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3025         (!decode_state->slice_datas ||
3026          !decode_state->slice_datas[0] ||
3027          !decode_state->slice_datas[0]->bo) ||
3028         !decode_state->probability_data) {
3029         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3030
3031         return;
3032     }
3033
3034     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3035     slice_data_bo = decode_state->slice_datas[0]->bo;
3036
3037     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3038     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3039     intel_batchbuffer_emit_mi_flush(batch);
3040     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3041     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3042     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3043     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3044     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3045     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3046     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3047     intel_batchbuffer_end_atomic(batch);
3048     intel_batchbuffer_flush(batch);
3049 }
3050
3051 static VAStatus
3052 gen8_mfd_decode_picture(VADriverContextP ctx, 
3053                         VAProfile profile, 
3054                         union codec_state *codec_state,
3055                         struct hw_context *hw_context)
3056
3057 {
3058     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3059     struct decode_state *decode_state = &codec_state->decode;
3060     VAStatus vaStatus;
3061
3062     assert(gen7_mfd_context);
3063
3064     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3065
3066     if (vaStatus != VA_STATUS_SUCCESS)
3067         goto out;
3068
3069     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3070
3071     switch (profile) {
3072     case VAProfileMPEG2Simple:
3073     case VAProfileMPEG2Main:
3074         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3075         break;
3076         
3077     case VAProfileH264ConstrainedBaseline:
3078     case VAProfileH264Main:
3079     case VAProfileH264High:
3080     case VAProfileH264StereoHigh:
3081     case VAProfileH264MultiviewHigh:
3082         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3083         break;
3084
3085     case VAProfileVC1Simple:
3086     case VAProfileVC1Main:
3087     case VAProfileVC1Advanced:
3088         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3089         break;
3090
3091     case VAProfileJPEGBaseline:
3092         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3093         break;
3094
3095     case VAProfileVP8Version0_3:
3096         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3097         break;
3098
3099     default:
3100         assert(0);
3101         break;
3102     }
3103
3104     vaStatus = VA_STATUS_SUCCESS;
3105
3106 out:
3107     return vaStatus;
3108 }
3109
3110 static void
3111 gen8_mfd_context_destroy(void *hw_context)
3112 {
3113     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3114
3115     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3116     gen7_mfd_context->post_deblocking_output.bo = NULL;
3117
3118     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3119     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3120
3121     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3122     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3123
3124     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3125     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3126
3127     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3128     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3129
3130     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3131     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3132
3133     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3134     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3135
3136     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3137     gen7_mfd_context->segmentation_buffer.bo = NULL;
3138
3139     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3140
3141     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3142     free(gen7_mfd_context);
3143 }
3144
3145 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3146                                     struct gen7_mfd_context *gen7_mfd_context)
3147 {
3148     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3149     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3150     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3151     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3152 }
3153
3154 struct hw_context *
3155 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3156 {
3157     struct intel_driver_data *intel = intel_driver_data(ctx);
3158     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3159     int i;
3160
3161     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3162     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3163     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3164
3165     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3166         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3167         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3168     }
3169
3170     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3171     gen7_mfd_context->segmentation_buffer.valid = 0;
3172
3173     switch (obj_config->profile) {
3174     case VAProfileMPEG2Simple:
3175     case VAProfileMPEG2Main:
3176         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3177         break;
3178
3179     case VAProfileH264ConstrainedBaseline:
3180     case VAProfileH264Main:
3181     case VAProfileH264High:
3182     case VAProfileH264StereoHigh:
3183     case VAProfileH264MultiviewHigh:
3184         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3185         break;
3186     default:
3187         break;
3188     }
3189     return (struct hw_context *)gen7_mfd_context;
3190 }