decoder: h264: expose the set of supported MVC profiles.
[platform/upstream/libva-intel-driver.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx, 
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77         assert((obj_surface->size & 0x3f) == 0);
78         obj_surface->private_data = gen7_avc_surface;
79     }
80
81     /* DMV buffers now relate to the whole frame, irrespective of
82        field coding modes */
83     if (gen7_avc_surface->dmv_top == NULL) {
84         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
85                                                  "direct mv w/r buffer",
86                                                  width_in_mbs * height_in_mbs * 128,
87                                                  0x1000);
88         assert(gen7_avc_surface->dmv_top);
89     }
90 }
91
92 static void
93 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
94                           struct decode_state *decode_state,
95                           int standard_select,
96                           struct gen7_mfd_context *gen7_mfd_context)
97 {
98     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
99
100     assert(standard_select == MFX_FORMAT_MPEG2 ||
101            standard_select == MFX_FORMAT_AVC ||
102            standard_select == MFX_FORMAT_VC1 ||
103            standard_select == MFX_FORMAT_JPEG ||
104            standard_select == MFX_FORMAT_VP8);
105
106     BEGIN_BCS_BATCH(batch, 5);
107     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
108     OUT_BCS_BATCH(batch,
109                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
110                   (MFD_MODE_VLD << 15) | /* VLD mode */
111                   (0 << 10) | /* disable Stream-Out */
112                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
113                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
114                   (0 << 5)  | /* not in stitch mode */
115                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
116                   (standard_select << 0));
117     OUT_BCS_BATCH(batch,
118                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
119                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
120                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
121                   (0 << 1)  |
122                   (0 << 0));
123     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
124     OUT_BCS_BATCH(batch, 0); /* reserved */
125     ADVANCE_BCS_BATCH(batch);
126 }
127
128 static void
129 gen8_mfd_surface_state(VADriverContextP ctx,
130                        struct decode_state *decode_state,
131                        int standard_select,
132                        struct gen7_mfd_context *gen7_mfd_context)
133 {
134     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
135     struct object_surface *obj_surface = decode_state->render_object;
136     unsigned int y_cb_offset;
137     unsigned int y_cr_offset;
138     unsigned int surface_format;
139
140     assert(obj_surface);
141
142     y_cb_offset = obj_surface->y_cb_offset;
143     y_cr_offset = obj_surface->y_cr_offset;
144
145     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
146         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
147
148     BEGIN_BCS_BATCH(batch, 6);
149     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
150     OUT_BCS_BATCH(batch, 0);
151     OUT_BCS_BATCH(batch,
152                   ((obj_surface->orig_height - 1) << 18) |
153                   ((obj_surface->orig_width - 1) << 4));
154     OUT_BCS_BATCH(batch,
155                   (surface_format << 28) | /* 420 planar YUV surface */
156                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
157                   (0 << 22) | /* surface object control state, ignored */
158                   ((obj_surface->width - 1) << 3) | /* pitch */
159                   (0 << 2)  | /* must be 0 */
160                   (1 << 1)  | /* must be tiled */
161                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
162     OUT_BCS_BATCH(batch,
163                   (0 << 16) | /* X offset for U(Cb), must be 0 */
164                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
165     OUT_BCS_BATCH(batch,
166                   (0 << 16) | /* X offset for V(Cr), must be 0 */
167                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
168     ADVANCE_BCS_BATCH(batch);
169 }
170
171 static void
172 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
173                              struct decode_state *decode_state,
174                              int standard_select,
175                              struct gen7_mfd_context *gen7_mfd_context)
176 {
177     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
178     int i;
179
180     BEGIN_BCS_BATCH(batch, 61);
181     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
182         /* Pre-deblock 1-3 */
183     if (gen7_mfd_context->pre_deblocking_output.valid)
184         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
185                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
186                       0);
187     else
188         OUT_BCS_BATCH(batch, 0);
189
190         OUT_BCS_BATCH(batch, 0);
191         OUT_BCS_BATCH(batch, 0);
192         /* Post-debloing 4-6 */
193     if (gen7_mfd_context->post_deblocking_output.valid)
194         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
195                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
196                       0);
197     else
198         OUT_BCS_BATCH(batch, 0);
199
200         OUT_BCS_BATCH(batch, 0);
201         OUT_BCS_BATCH(batch, 0);
202
203         /* uncompressed-video & stream out 7-12 */
204     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
205     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
206         OUT_BCS_BATCH(batch, 0);
207         OUT_BCS_BATCH(batch, 0);
208         OUT_BCS_BATCH(batch, 0);
209         OUT_BCS_BATCH(batch, 0);
210
211         /* intra row-store scratch 13-15 */
212     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
213         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
214                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
215                       0);
216     else
217         OUT_BCS_BATCH(batch, 0);
218
219         OUT_BCS_BATCH(batch, 0);
220         OUT_BCS_BATCH(batch, 0);
221         /* deblocking-filter-row-store 16-18 */
222     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
223         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
224                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
225                       0);
226     else
227         OUT_BCS_BATCH(batch, 0);
228         OUT_BCS_BATCH(batch, 0);
229         OUT_BCS_BATCH(batch, 0);
230
231     /* DW 19..50 */
232     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
233         struct object_surface *obj_surface;
234
235         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
236             gen7_mfd_context->reference_surface[i].obj_surface &&
237             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
238             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
239
240             OUT_BCS_RELOC(batch, obj_surface->bo,
241                           I915_GEM_DOMAIN_INSTRUCTION, 0,
242                           0);
243         } else {
244             OUT_BCS_BATCH(batch, 0);
245         }
246         
247         OUT_BCS_BATCH(batch, 0);
248     }
249     
250     /* reference property 51 */
251     OUT_BCS_BATCH(batch, 0);  
252         
253     /* Macroblock status & ILDB 52-57 */
254     OUT_BCS_BATCH(batch, 0);
255     OUT_BCS_BATCH(batch, 0);
256     OUT_BCS_BATCH(batch, 0);
257     OUT_BCS_BATCH(batch, 0);
258     OUT_BCS_BATCH(batch, 0);
259     OUT_BCS_BATCH(batch, 0);
260
261     /* the second Macroblock status 58-60 */    
262     OUT_BCS_BATCH(batch, 0);
263     OUT_BCS_BATCH(batch, 0);
264     OUT_BCS_BATCH(batch, 0);
265
266     ADVANCE_BCS_BATCH(batch);
267 }
268
269 static void
270 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
271                                  dri_bo *slice_data_bo,
272                                  int standard_select,
273                                  struct gen7_mfd_context *gen7_mfd_context)
274 {
275     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
276
277     BEGIN_BCS_BATCH(batch, 26);
278     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
279         /* MFX In BS 1-5 */
280     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
281     OUT_BCS_BATCH(batch, 0);
282     OUT_BCS_BATCH(batch, 0);
283         /* Upper bound 4-5 */   
284     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
285     OUT_BCS_BATCH(batch, 0);
286
287         /* MFX indirect MV 6-10 */
288     OUT_BCS_BATCH(batch, 0);
289     OUT_BCS_BATCH(batch, 0);
290     OUT_BCS_BATCH(batch, 0);
291     OUT_BCS_BATCH(batch, 0);
292     OUT_BCS_BATCH(batch, 0);
293         
294         /* MFX IT_COFF 11-15 */
295     OUT_BCS_BATCH(batch, 0);
296     OUT_BCS_BATCH(batch, 0);
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299     OUT_BCS_BATCH(batch, 0);
300
301         /* MFX IT_DBLK 16-20 */
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306     OUT_BCS_BATCH(batch, 0);
307
308         /* MFX PAK_BSE object for encoder 21-25 */
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313     OUT_BCS_BATCH(batch, 0);
314
315     ADVANCE_BCS_BATCH(batch);
316 }
317
318 static void
319 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
320                                  struct decode_state *decode_state,
321                                  int standard_select,
322                                  struct gen7_mfd_context *gen7_mfd_context)
323 {
324     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
325
326     BEGIN_BCS_BATCH(batch, 10);
327     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
328
329     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
330         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
331                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
332                       0);
333         else
334                 OUT_BCS_BATCH(batch, 0);
335                 
336     OUT_BCS_BATCH(batch, 0);
337     OUT_BCS_BATCH(batch, 0);
338         /* MPR Row Store Scratch buffer 4-6 */
339     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
340         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
341                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
342                       0);
343     else
344         OUT_BCS_BATCH(batch, 0);
345
346     OUT_BCS_BATCH(batch, 0);
347     OUT_BCS_BATCH(batch, 0);
348
349         /* Bitplane 7-9 */ 
350     if (gen7_mfd_context->bitplane_read_buffer.valid)
351         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
352                       I915_GEM_DOMAIN_INSTRUCTION, 0,
353                       0);
354     else
355         OUT_BCS_BATCH(batch, 0);
356     OUT_BCS_BATCH(batch, 0);
357     OUT_BCS_BATCH(batch, 0);
358     ADVANCE_BCS_BATCH(batch);
359 }
360
361 static void
362 gen8_mfd_qm_state(VADriverContextP ctx,
363                   int qm_type,
364                   unsigned char *qm,
365                   int qm_length,
366                   struct gen7_mfd_context *gen7_mfd_context)
367 {
368     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
369     unsigned int qm_buffer[16];
370
371     assert(qm_length <= 16 * 4);
372     memcpy(qm_buffer, qm, qm_length);
373
374     BEGIN_BCS_BATCH(batch, 18);
375     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
376     OUT_BCS_BATCH(batch, qm_type << 0);
377     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
378     ADVANCE_BCS_BATCH(batch);
379 }
380
381 static void
382 gen8_mfd_avc_img_state(VADriverContextP ctx,
383                        struct decode_state *decode_state,
384                        struct gen7_mfd_context *gen7_mfd_context)
385 {
386     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
387     int img_struct;
388     int mbaff_frame_flag;
389     unsigned int width_in_mbs, height_in_mbs;
390     VAPictureParameterBufferH264 *pic_param;
391
392     assert(decode_state->pic_param && decode_state->pic_param->buffer);
393     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
394     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
395
396     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
397         img_struct = 1;
398     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
399         img_struct = 3;
400     else
401         img_struct = 0;
402
403     if ((img_struct & 0x1) == 0x1) {
404         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
405     } else {
406         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
407     }
408
409     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
410         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
411         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
412     } else {
413         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
414     }
415
416     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
417                         !pic_param->pic_fields.bits.field_pic_flag);
418
419     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
420     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
421
422     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
423     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
424            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
425     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
426
427     BEGIN_BCS_BATCH(batch, 17);
428     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
429     OUT_BCS_BATCH(batch, 
430                   (width_in_mbs * height_in_mbs - 1));
431     OUT_BCS_BATCH(batch, 
432                   ((height_in_mbs - 1) << 16) | 
433                   ((width_in_mbs - 1) << 0));
434     OUT_BCS_BATCH(batch, 
435                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
436                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
437                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
438                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
439                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
440                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
441                   (img_struct << 8));
442     OUT_BCS_BATCH(batch,
443                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
444                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
445                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
446                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
447                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
448                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
449                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
450                   (mbaff_frame_flag << 1) |
451                   (pic_param->pic_fields.bits.field_pic_flag << 0));
452     OUT_BCS_BATCH(batch, 0);
453     OUT_BCS_BATCH(batch, 0);
454     OUT_BCS_BATCH(batch, 0);
455     OUT_BCS_BATCH(batch, 0);
456     OUT_BCS_BATCH(batch, 0);
457     OUT_BCS_BATCH(batch, 0);
458     OUT_BCS_BATCH(batch, 0);
459     OUT_BCS_BATCH(batch, 0);
460     OUT_BCS_BATCH(batch, 0);
461     OUT_BCS_BATCH(batch, 0);
462     OUT_BCS_BATCH(batch, 0);
463     OUT_BCS_BATCH(batch, 0);
464     ADVANCE_BCS_BATCH(batch);
465 }
466
467 static void
468 gen8_mfd_avc_qm_state(VADriverContextP ctx,
469                       struct decode_state *decode_state,
470                       struct gen7_mfd_context *gen7_mfd_context)
471 {
472     VAIQMatrixBufferH264 *iq_matrix;
473     VAPictureParameterBufferH264 *pic_param;
474
475     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
476         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
477     else
478         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
479
480     assert(decode_state->pic_param && decode_state->pic_param->buffer);
481     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
482
483     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
484     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
485
486     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
487         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
488         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
489     }
490 }
491
492 static inline void
493 gen8_mfd_avc_picid_state(VADriverContextP ctx,
494     struct decode_state *decode_state,
495     struct gen7_mfd_context *gen7_mfd_context)
496 {
497     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
498         gen7_mfd_context->reference_surface);
499 }
500
501 static void
502 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
503                               struct decode_state *decode_state,
504                               VAPictureParameterBufferH264 *pic_param,
505                               VASliceParameterBufferH264 *slice_param,
506                               struct gen7_mfd_context *gen7_mfd_context)
507 {
508     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
509     struct object_surface *obj_surface;
510     GenAvcSurface *gen7_avc_surface;
511     VAPictureH264 *va_pic;
512     int i;
513
514     BEGIN_BCS_BATCH(batch, 71);
515     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
516
517     /* reference surfaces 0..15 */
518     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
519         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
520             gen7_mfd_context->reference_surface[i].obj_surface &&
521             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
522
523             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
524             gen7_avc_surface = obj_surface->private_data;
525
526             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
527                           I915_GEM_DOMAIN_INSTRUCTION, 0,
528                           0);
529             OUT_BCS_BATCH(batch, 0);
530         } else {
531             OUT_BCS_BATCH(batch, 0);
532             OUT_BCS_BATCH(batch, 0);
533         }
534     }
535     
536     OUT_BCS_BATCH(batch, 0);
537
538     /* the current decoding frame/field */
539     va_pic = &pic_param->CurrPic;
540     obj_surface = decode_state->render_object;
541     assert(obj_surface->bo && obj_surface->private_data);
542     gen7_avc_surface = obj_surface->private_data;
543
544     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
545                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
546                   0);
547
548     OUT_BCS_BATCH(batch, 0);
549     OUT_BCS_BATCH(batch, 0);
550
551     /* POC List */
552     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
553         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
554
555         if (obj_surface) {
556             const VAPictureH264 * const va_pic = avc_find_picture(
557                 obj_surface->base.id, pic_param->ReferenceFrames,
558                 ARRAY_ELEMS(pic_param->ReferenceFrames));
559
560             assert(va_pic != NULL);
561             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
562             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
563         } else {
564             OUT_BCS_BATCH(batch, 0);
565             OUT_BCS_BATCH(batch, 0);
566         }
567     }
568
569     va_pic = &pic_param->CurrPic;
570     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
571     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
572
573     ADVANCE_BCS_BATCH(batch);
574 }
575
576 static void
577 gen8_mfd_avc_slice_state(VADriverContextP ctx,
578                          VAPictureParameterBufferH264 *pic_param,
579                          VASliceParameterBufferH264 *slice_param,
580                          VASliceParameterBufferH264 *next_slice_param,
581                          struct gen7_mfd_context *gen7_mfd_context)
582 {
583     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
584     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
585     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
586     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
587     int num_ref_idx_l0, num_ref_idx_l1;
588     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
589                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
590     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
591     int slice_type;
592
593     if (slice_param->slice_type == SLICE_TYPE_I ||
594         slice_param->slice_type == SLICE_TYPE_SI) {
595         slice_type = SLICE_TYPE_I;
596     } else if (slice_param->slice_type == SLICE_TYPE_P ||
597                slice_param->slice_type == SLICE_TYPE_SP) {
598         slice_type = SLICE_TYPE_P;
599     } else { 
600         assert(slice_param->slice_type == SLICE_TYPE_B);
601         slice_type = SLICE_TYPE_B;
602     }
603
604     if (slice_type == SLICE_TYPE_I) {
605         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
606         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
607         num_ref_idx_l0 = 0;
608         num_ref_idx_l1 = 0;
609     } else if (slice_type == SLICE_TYPE_P) {
610         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
611         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
612         num_ref_idx_l1 = 0;
613     } else {
614         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
615         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
616     }
617
618     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
619     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
620     slice_ver_pos = first_mb_in_slice / width_in_mbs;
621
622     if (next_slice_param) {
623         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
624         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
625         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
626     } else {
627         next_slice_hor_pos = 0;
628         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
629     }
630
631     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
632     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
633     OUT_BCS_BATCH(batch, slice_type);
634     OUT_BCS_BATCH(batch, 
635                   (num_ref_idx_l1 << 24) |
636                   (num_ref_idx_l0 << 16) |
637                   (slice_param->chroma_log2_weight_denom << 8) |
638                   (slice_param->luma_log2_weight_denom << 0));
639     OUT_BCS_BATCH(batch, 
640                   (slice_param->direct_spatial_mv_pred_flag << 29) |
641                   (slice_param->disable_deblocking_filter_idc << 27) |
642                   (slice_param->cabac_init_idc << 24) |
643                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
644                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
645                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
646     OUT_BCS_BATCH(batch, 
647                   (slice_ver_pos << 24) |
648                   (slice_hor_pos << 16) | 
649                   (first_mb_in_slice << 0));
650     OUT_BCS_BATCH(batch,
651                   (next_slice_ver_pos << 16) |
652                   (next_slice_hor_pos << 0));
653     OUT_BCS_BATCH(batch, 
654                   (next_slice_param == NULL) << 19); /* last slice flag */
655     OUT_BCS_BATCH(batch, 0);
656     OUT_BCS_BATCH(batch, 0);
657     OUT_BCS_BATCH(batch, 0);
658     OUT_BCS_BATCH(batch, 0);
659     ADVANCE_BCS_BATCH(batch);
660 }
661
662 static inline void
663 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
664                            VAPictureParameterBufferH264 *pic_param,
665                            VASliceParameterBufferH264 *slice_param,
666                            struct gen7_mfd_context *gen7_mfd_context)
667 {
668     gen6_send_avc_ref_idx_state(
669         gen7_mfd_context->base.batch,
670         slice_param,
671         gen7_mfd_context->reference_surface
672     );
673 }
674
675 static void
676 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
677                                 VAPictureParameterBufferH264 *pic_param,
678                                 VASliceParameterBufferH264 *slice_param,
679                                 struct gen7_mfd_context *gen7_mfd_context)
680 {
681     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
682     int i, j, num_weight_offset_table = 0;
683     short weightoffsets[32 * 6];
684
685     if ((slice_param->slice_type == SLICE_TYPE_P ||
686          slice_param->slice_type == SLICE_TYPE_SP) &&
687         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
688         num_weight_offset_table = 1;
689     }
690     
691     if ((slice_param->slice_type == SLICE_TYPE_B) &&
692         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
693         num_weight_offset_table = 2;
694     }
695
696     for (i = 0; i < num_weight_offset_table; i++) {
697         BEGIN_BCS_BATCH(batch, 98);
698         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
699         OUT_BCS_BATCH(batch, i);
700
701         if (i == 0) {
702             for (j = 0; j < 32; j++) {
703                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
704                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
705                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
706                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
707                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
708                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
709             }
710         } else {
711             for (j = 0; j < 32; j++) {
712                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
713                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
714                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
715                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
716                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
717                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
718             }
719         }
720
721         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
722         ADVANCE_BCS_BATCH(batch);
723     }
724 }
725
726 static void
727 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
728                         VAPictureParameterBufferH264 *pic_param,
729                         VASliceParameterBufferH264 *slice_param,
730                         dri_bo *slice_data_bo,
731                         VASliceParameterBufferH264 *next_slice_param,
732                         struct gen7_mfd_context *gen7_mfd_context)
733 {
734     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
735     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
736                                                             slice_param,
737                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
738
739     /* the input bitsteam format on GEN7 differs from GEN6 */
740     BEGIN_BCS_BATCH(batch, 6);
741     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
742     OUT_BCS_BATCH(batch, 
743                   (slice_param->slice_data_size));
744     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
745     OUT_BCS_BATCH(batch,
746                   (0 << 31) |
747                   (0 << 14) |
748                   (0 << 12) |
749                   (0 << 10) |
750                   (0 << 8));
751     OUT_BCS_BATCH(batch,
752                   ((slice_data_bit_offset >> 3) << 16) |
753                   (1 << 7)  |
754                   (0 << 5)  |
755                   (0 << 4)  |
756                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
757                   (slice_data_bit_offset & 0x7));
758     OUT_BCS_BATCH(batch, 0);
759     ADVANCE_BCS_BATCH(batch);
760 }
761
762 static inline void
763 gen8_mfd_avc_context_init(
764     VADriverContextP         ctx,
765     struct gen7_mfd_context *gen7_mfd_context
766 )
767 {
768     /* Initialize flat scaling lists */
769     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
770 }
771
772 static void
773 gen8_mfd_avc_decode_init(VADriverContextP ctx,
774                          struct decode_state *decode_state,
775                          struct gen7_mfd_context *gen7_mfd_context)
776 {
777     VAPictureParameterBufferH264 *pic_param;
778     VASliceParameterBufferH264 *slice_param;
779     struct i965_driver_data *i965 = i965_driver_data(ctx);
780     struct object_surface *obj_surface;
781     dri_bo *bo;
782     int i, j, enable_avc_ildb = 0;
783     unsigned int width_in_mbs, height_in_mbs;
784
785     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
786         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
787         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
788
789         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
790             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
791             assert((slice_param->slice_type == SLICE_TYPE_I) ||
792                    (slice_param->slice_type == SLICE_TYPE_SI) ||
793                    (slice_param->slice_type == SLICE_TYPE_P) ||
794                    (slice_param->slice_type == SLICE_TYPE_SP) ||
795                    (slice_param->slice_type == SLICE_TYPE_B));
796
797             if (slice_param->disable_deblocking_filter_idc != 1) {
798                 enable_avc_ildb = 1;
799                 break;
800             }
801
802             slice_param++;
803         }
804     }
805
806     assert(decode_state->pic_param && decode_state->pic_param->buffer);
807     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
808     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
809         gen7_mfd_context->reference_surface);
810     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
811     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
812     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
813     assert(height_in_mbs > 0 && height_in_mbs <= 256);
814
815     /* Current decoded picture */
816     obj_surface = decode_state->render_object;
817     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
818     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
819
820     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
821     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
822
823     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
824     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
825     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
826     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
827
828     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
829     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
830     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
831     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
832
833     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
834     bo = dri_bo_alloc(i965->intel.bufmgr,
835                       "intra row store",
836                       width_in_mbs * 64,
837                       0x1000);
838     assert(bo);
839     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
840     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
841
842     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
843     bo = dri_bo_alloc(i965->intel.bufmgr,
844                       "deblocking filter row store",
845                       width_in_mbs * 64 * 4,
846                       0x1000);
847     assert(bo);
848     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
849     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
850
851     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
852     bo = dri_bo_alloc(i965->intel.bufmgr,
853                       "bsd mpc row store",
854                       width_in_mbs * 64 * 2,
855                       0x1000);
856     assert(bo);
857     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
858     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
859
860     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
861     bo = dri_bo_alloc(i965->intel.bufmgr,
862                       "mpr row store",
863                       width_in_mbs * 64 * 2,
864                       0x1000);
865     assert(bo);
866     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
867     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
868
869     gen7_mfd_context->bitplane_read_buffer.valid = 0;
870 }
871
872 static void
873 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
874                             struct decode_state *decode_state,
875                             struct gen7_mfd_context *gen7_mfd_context)
876 {
877     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
878     VAPictureParameterBufferH264 *pic_param;
879     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
880     dri_bo *slice_data_bo;
881     int i, j;
882
883     assert(decode_state->pic_param && decode_state->pic_param->buffer);
884     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
885     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
886
887     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
888     intel_batchbuffer_emit_mi_flush(batch);
889     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
890     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
891     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
892     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
893     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
894     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
895     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
896
897     for (j = 0; j < decode_state->num_slice_params; j++) {
898         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
899         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
900         slice_data_bo = decode_state->slice_datas[j]->bo;
901         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
902
903         if (j == decode_state->num_slice_params - 1)
904             next_slice_group_param = NULL;
905         else
906             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
907
908         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
909             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
910             assert((slice_param->slice_type == SLICE_TYPE_I) ||
911                    (slice_param->slice_type == SLICE_TYPE_SI) ||
912                    (slice_param->slice_type == SLICE_TYPE_P) ||
913                    (slice_param->slice_type == SLICE_TYPE_SP) ||
914                    (slice_param->slice_type == SLICE_TYPE_B));
915
916             if (i < decode_state->slice_params[j]->num_elements - 1)
917                 next_slice_param = slice_param + 1;
918             else
919                 next_slice_param = next_slice_group_param;
920
921             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
922             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
923             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
924             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
925             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
926             slice_param++;
927         }
928     }
929
930     intel_batchbuffer_end_atomic(batch);
931     intel_batchbuffer_flush(batch);
932 }
933
934 static void
935 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
936                            struct decode_state *decode_state,
937                            struct gen7_mfd_context *gen7_mfd_context)
938 {
939     VAPictureParameterBufferMPEG2 *pic_param;
940     struct i965_driver_data *i965 = i965_driver_data(ctx);
941     struct object_surface *obj_surface;
942     dri_bo *bo;
943     unsigned int width_in_mbs;
944
945     assert(decode_state->pic_param && decode_state->pic_param->buffer);
946     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
947     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
948
949     mpeg2_set_reference_surfaces(
950         ctx,
951         gen7_mfd_context->reference_surface,
952         decode_state,
953         pic_param
954     );
955
956     /* Current decoded picture */
957     obj_surface = decode_state->render_object;
958     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
959
960     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
961     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
962     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
963     gen7_mfd_context->pre_deblocking_output.valid = 1;
964
965     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
966     bo = dri_bo_alloc(i965->intel.bufmgr,
967                       "bsd mpc row store",
968                       width_in_mbs * 96,
969                       0x1000);
970     assert(bo);
971     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
972     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
973
974     gen7_mfd_context->post_deblocking_output.valid = 0;
975     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
976     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
977     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
978     gen7_mfd_context->bitplane_read_buffer.valid = 0;
979 }
980
981 static void
982 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
983                          struct decode_state *decode_state,
984                          struct gen7_mfd_context *gen7_mfd_context)
985 {
986     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
987     VAPictureParameterBufferMPEG2 *pic_param;
988     unsigned int slice_concealment_disable_bit = 0;
989
990     assert(decode_state->pic_param && decode_state->pic_param->buffer);
991     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
992
993     slice_concealment_disable_bit = 1;
994
995     BEGIN_BCS_BATCH(batch, 13);
996     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
997     OUT_BCS_BATCH(batch,
998                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
999                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1000                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1001                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1002                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1003                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1004                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1005                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1006                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1007                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1008                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1009                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1010     OUT_BCS_BATCH(batch,
1011                   pic_param->picture_coding_type << 9);
1012     OUT_BCS_BATCH(batch,
1013                   (slice_concealment_disable_bit << 31) |
1014                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1015                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1016     OUT_BCS_BATCH(batch, 0);
1017     OUT_BCS_BATCH(batch, 0);
1018     OUT_BCS_BATCH(batch, 0);
1019     OUT_BCS_BATCH(batch, 0);
1020     OUT_BCS_BATCH(batch, 0);
1021     OUT_BCS_BATCH(batch, 0);
1022     OUT_BCS_BATCH(batch, 0);
1023     OUT_BCS_BATCH(batch, 0);
1024     OUT_BCS_BATCH(batch, 0);
1025     ADVANCE_BCS_BATCH(batch);
1026 }
1027
1028 static void
1029 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1030                         struct decode_state *decode_state,
1031                         struct gen7_mfd_context *gen7_mfd_context)
1032 {
1033     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1034     int i, j;
1035
1036     /* Update internal QM state */
1037     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1038         VAIQMatrixBufferMPEG2 * const iq_matrix =
1039             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1040
1041         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1042             iq_matrix->load_intra_quantiser_matrix) {
1043             gen_iq_matrix->load_intra_quantiser_matrix =
1044                 iq_matrix->load_intra_quantiser_matrix;
1045             if (iq_matrix->load_intra_quantiser_matrix) {
1046                 for (j = 0; j < 64; j++)
1047                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1048                         iq_matrix->intra_quantiser_matrix[j];
1049             }
1050         }
1051
1052         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1053             iq_matrix->load_non_intra_quantiser_matrix) {
1054             gen_iq_matrix->load_non_intra_quantiser_matrix =
1055                 iq_matrix->load_non_intra_quantiser_matrix;
1056             if (iq_matrix->load_non_intra_quantiser_matrix) {
1057                 for (j = 0; j < 64; j++)
1058                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1059                         iq_matrix->non_intra_quantiser_matrix[j];
1060             }
1061         }
1062     }
1063
1064     /* Commit QM state to HW */
1065     for (i = 0; i < 2; i++) {
1066         unsigned char *qm = NULL;
1067         int qm_type;
1068
1069         if (i == 0) {
1070             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1071                 qm = gen_iq_matrix->intra_quantiser_matrix;
1072                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1073             }
1074         } else {
1075             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1076                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1077                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1078             }
1079         }
1080
1081         if (!qm)
1082             continue;
1083
1084         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1085     }
1086 }
1087
1088 static void
1089 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1090                           VAPictureParameterBufferMPEG2 *pic_param,
1091                           VASliceParameterBufferMPEG2 *slice_param,
1092                           VASliceParameterBufferMPEG2 *next_slice_param,
1093                           struct gen7_mfd_context *gen7_mfd_context)
1094 {
1095     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1096     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1097     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1098
1099     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1100         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1101         is_field_pic = 1;
1102     is_field_pic_wa = is_field_pic &&
1103         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1104
1105     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1106     hpos0 = slice_param->slice_horizontal_position;
1107
1108     if (next_slice_param == NULL) {
1109         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1110         hpos1 = 0;
1111     } else {
1112         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1113         hpos1 = next_slice_param->slice_horizontal_position;
1114     }
1115
1116     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1117
1118     BEGIN_BCS_BATCH(batch, 5);
1119     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1120     OUT_BCS_BATCH(batch, 
1121                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1122     OUT_BCS_BATCH(batch, 
1123                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1124     OUT_BCS_BATCH(batch,
1125                   hpos0 << 24 |
1126                   vpos0 << 16 |
1127                   mb_count << 8 |
1128                   (next_slice_param == NULL) << 5 |
1129                   (next_slice_param == NULL) << 3 |
1130                   (slice_param->macroblock_offset & 0x7));
1131     OUT_BCS_BATCH(batch,
1132                   (slice_param->quantiser_scale_code << 24) |
1133                   (vpos1 << 8 | hpos1));
1134     ADVANCE_BCS_BATCH(batch);
1135 }
1136
1137 static void
1138 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1139                               struct decode_state *decode_state,
1140                               struct gen7_mfd_context *gen7_mfd_context)
1141 {
1142     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1143     VAPictureParameterBufferMPEG2 *pic_param;
1144     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1145     dri_bo *slice_data_bo;
1146     int i, j;
1147
1148     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1149     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1150
1151     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1152     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1153     intel_batchbuffer_emit_mi_flush(batch);
1154     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1155     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1156     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1157     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1158     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1159     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1160
1161     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1162         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1163             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1164
1165     for (j = 0; j < decode_state->num_slice_params; j++) {
1166         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1167         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1168         slice_data_bo = decode_state->slice_datas[j]->bo;
1169         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1170
1171         if (j == decode_state->num_slice_params - 1)
1172             next_slice_group_param = NULL;
1173         else
1174             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1175
1176         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1177             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1178
1179             if (i < decode_state->slice_params[j]->num_elements - 1)
1180                 next_slice_param = slice_param + 1;
1181             else
1182                 next_slice_param = next_slice_group_param;
1183
1184             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1185             slice_param++;
1186         }
1187     }
1188
1189     intel_batchbuffer_end_atomic(batch);
1190     intel_batchbuffer_flush(batch);
1191 }
1192
1193 static const int va_to_gen7_vc1_pic_type[5] = {
1194     GEN7_VC1_I_PICTURE,
1195     GEN7_VC1_P_PICTURE,
1196     GEN7_VC1_B_PICTURE,
1197     GEN7_VC1_BI_PICTURE,
1198     GEN7_VC1_P_PICTURE,
1199 };
1200
1201 static const int va_to_gen7_vc1_mv[4] = {
1202     1, /* 1-MV */
1203     2, /* 1-MV half-pel */
1204     3, /* 1-MV half-pef bilinear */
1205     0, /* Mixed MV */
1206 };
1207
1208 static const int b_picture_scale_factor[21] = {
1209     128, 85,  170, 64,  192,
1210     51,  102, 153, 204, 43,
1211     215, 37,  74,  111, 148,
1212     185, 222, 32,  96,  160, 
1213     224,
1214 };
1215
1216 static const int va_to_gen7_vc1_condover[3] = {
1217     0,
1218     2,
1219     3
1220 };
1221
1222 static const int va_to_gen7_vc1_profile[4] = {
1223     GEN7_VC1_SIMPLE_PROFILE,
1224     GEN7_VC1_MAIN_PROFILE,
1225     GEN7_VC1_RESERVED_PROFILE,
1226     GEN7_VC1_ADVANCED_PROFILE
1227 };
1228
1229 static void 
1230 gen8_mfd_free_vc1_surface(void **data)
1231 {
1232     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1233
1234     if (!gen7_vc1_surface)
1235         return;
1236
1237     dri_bo_unreference(gen7_vc1_surface->dmv);
1238     free(gen7_vc1_surface);
1239     *data = NULL;
1240 }
1241
1242 static void
1243 gen8_mfd_init_vc1_surface(VADriverContextP ctx, 
1244                           VAPictureParameterBufferVC1 *pic_param,
1245                           struct object_surface *obj_surface)
1246 {
1247     struct i965_driver_data *i965 = i965_driver_data(ctx);
1248     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1249     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1250     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1251
1252     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1253
1254     if (!gen7_vc1_surface) {
1255         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1256         assert((obj_surface->size & 0x3f) == 0);
1257         obj_surface->private_data = gen7_vc1_surface;
1258     }
1259
1260     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1261
1262     if (gen7_vc1_surface->dmv == NULL) {
1263         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1264                                              "direct mv w/r buffer",
1265                                              width_in_mbs * height_in_mbs * 64,
1266                                              0x1000);
1267     }
1268 }
1269
1270 static void
1271 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1272                          struct decode_state *decode_state,
1273                          struct gen7_mfd_context *gen7_mfd_context)
1274 {
1275     VAPictureParameterBufferVC1 *pic_param;
1276     struct i965_driver_data *i965 = i965_driver_data(ctx);
1277     struct object_surface *obj_surface;
1278     dri_bo *bo;
1279     int width_in_mbs;
1280     int picture_type;
1281
1282     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1283     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1284     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1285     picture_type = pic_param->picture_fields.bits.picture_type;
1286  
1287     intel_update_vc1_frame_store_index(ctx,
1288                                        decode_state,
1289                                        pic_param,
1290                                        gen7_mfd_context->reference_surface);
1291
1292     /* Current decoded picture */
1293     obj_surface = decode_state->render_object;
1294     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1295     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1296
1297     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1298     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1299     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1300     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1301
1302     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1303     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1304     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1305     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1306
1307     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1308     bo = dri_bo_alloc(i965->intel.bufmgr,
1309                       "intra row store",
1310                       width_in_mbs * 64,
1311                       0x1000);
1312     assert(bo);
1313     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1314     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1315
1316     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1317     bo = dri_bo_alloc(i965->intel.bufmgr,
1318                       "deblocking filter row store",
1319                       width_in_mbs * 7 * 64,
1320                       0x1000);
1321     assert(bo);
1322     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1323     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1324
1325     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1326     bo = dri_bo_alloc(i965->intel.bufmgr,
1327                       "bsd mpc row store",
1328                       width_in_mbs * 96,
1329                       0x1000);
1330     assert(bo);
1331     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1332     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1333
1334     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1335
1336     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1337     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1338     
1339     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1340         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1341         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1342         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1343         int src_w, src_h;
1344         uint8_t *src = NULL, *dst = NULL;
1345
1346         assert(decode_state->bit_plane->buffer);
1347         src = decode_state->bit_plane->buffer;
1348
1349         bo = dri_bo_alloc(i965->intel.bufmgr,
1350                           "VC-1 Bitplane",
1351                           bitplane_width * height_in_mbs,
1352                           0x1000);
1353         assert(bo);
1354         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1355
1356         dri_bo_map(bo, True);
1357         assert(bo->virtual);
1358         dst = bo->virtual;
1359
1360         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1361             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1362                 int src_index, dst_index;
1363                 int src_shift;
1364                 uint8_t src_value;
1365
1366                 src_index = (src_h * width_in_mbs + src_w) / 2;
1367                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1368                 src_value = ((src[src_index] >> src_shift) & 0xf);
1369
1370                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1371                     src_value |= 0x2;
1372                 }
1373
1374                 dst_index = src_w / 2;
1375                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1376             }
1377
1378             if (src_w & 1)
1379                 dst[src_w / 2] >>= 4;
1380
1381             dst += bitplane_width;
1382         }
1383
1384         dri_bo_unmap(bo);
1385     } else
1386         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1387 }
1388
1389 static void
1390 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1391                        struct decode_state *decode_state,
1392                        struct gen7_mfd_context *gen7_mfd_context)
1393 {
1394     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1395     VAPictureParameterBufferVC1 *pic_param;
1396     struct object_surface *obj_surface;
1397     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1398     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1399     int unified_mv_mode;
1400     int ref_field_pic_polarity = 0;
1401     int scale_factor = 0;
1402     int trans_ac_y = 0;
1403     int dmv_surface_valid = 0;
1404     int brfd = 0;
1405     int fcm = 0;
1406     int picture_type;
1407     int profile;
1408     int overlap;
1409     int interpolation_mode = 0;
1410
1411     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1412     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1413
1414     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1415     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1416     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1417     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1418     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1419     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1420     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1421     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1422
1423     if (dquant == 0) {
1424         alt_pquant_config = 0;
1425         alt_pquant_edge_mask = 0;
1426     } else if (dquant == 2) {
1427         alt_pquant_config = 1;
1428         alt_pquant_edge_mask = 0xf;
1429     } else {
1430         assert(dquant == 1);
1431         if (dquantfrm == 0) {
1432             alt_pquant_config = 0;
1433             alt_pquant_edge_mask = 0;
1434             alt_pq = 0;
1435         } else {
1436             assert(dquantfrm == 1);
1437             alt_pquant_config = 1;
1438
1439             switch (dqprofile) {
1440             case 3:
1441                 if (dqbilevel == 0) {
1442                     alt_pquant_config = 2;
1443                     alt_pquant_edge_mask = 0;
1444                 } else {
1445                     assert(dqbilevel == 1);
1446                     alt_pquant_config = 3;
1447                     alt_pquant_edge_mask = 0;
1448                 }
1449                 break;
1450                 
1451             case 0:
1452                 alt_pquant_edge_mask = 0xf;
1453                 break;
1454
1455             case 1:
1456                 if (dqdbedge == 3)
1457                     alt_pquant_edge_mask = 0x9;
1458                 else
1459                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1460
1461                 break;
1462
1463             case 2:
1464                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1465                 break;
1466
1467             default:
1468                 assert(0);
1469             }
1470         }
1471     }
1472
1473     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1474         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1475         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1476     } else {
1477         assert(pic_param->mv_fields.bits.mv_mode < 4);
1478         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1479     }
1480
1481     if (pic_param->sequence_fields.bits.interlace == 1 &&
1482         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1483         /* FIXME: calculate reference field picture polarity */
1484         assert(0);
1485         ref_field_pic_polarity = 0;
1486     }
1487
1488     if (pic_param->b_picture_fraction < 21)
1489         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1490
1491     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1492     
1493     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1494         picture_type == GEN7_VC1_I_PICTURE)
1495         picture_type = GEN7_VC1_BI_PICTURE;
1496
1497     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1498         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1499     else {
1500         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1501
1502         /*
1503          * 8.3.6.2.1 Transform Type Selection
1504          * If variable-sized transform coding is not enabled,
1505          * then the 8x8 transform shall be used for all blocks.
1506          * it is also MFX_VC1_PIC_STATE requirement.
1507          */
1508         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1509             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1510             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1511         }
1512     }
1513
1514     if (picture_type == GEN7_VC1_B_PICTURE) {
1515         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1516
1517         obj_surface = decode_state->reference_objects[1];
1518
1519         if (obj_surface)
1520             gen7_vc1_surface = obj_surface->private_data;
1521
1522         if (!gen7_vc1_surface || 
1523             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1524              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1525             dmv_surface_valid = 0;
1526         else
1527             dmv_surface_valid = 1;
1528     }
1529
1530     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1531
1532     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1533         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1534     else {
1535         if (pic_param->picture_fields.bits.top_field_first)
1536             fcm = 2;
1537         else
1538             fcm = 3;
1539     }
1540
1541     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1542         brfd = pic_param->reference_fields.bits.reference_distance;
1543         brfd = (scale_factor * brfd) >> 8;
1544         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1545
1546         if (brfd < 0)
1547             brfd = 0;
1548     }
1549
1550     overlap = 0;
1551     if (profile != GEN7_VC1_ADVANCED_PROFILE){
1552         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1553             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1554             overlap = 1; 
1555         }
1556     }else {
1557         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1558              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1559               overlap = 1; 
1560         }
1561         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1562             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1563              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1564                 overlap = 1; 
1565              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1566                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1567                  overlap = 1;
1568              }
1569         }
1570     } 
1571
1572     assert(pic_param->conditional_overlap_flag < 3);
1573     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1574
1575     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1576         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1577          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1578         interpolation_mode = 9; /* Half-pel bilinear */
1579     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1580              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1581               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1582         interpolation_mode = 1; /* Half-pel bicubic */
1583     else
1584         interpolation_mode = 0; /* Quarter-pel bicubic */
1585
1586     BEGIN_BCS_BATCH(batch, 6);
1587     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1588     OUT_BCS_BATCH(batch,
1589                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1590                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1591     OUT_BCS_BATCH(batch,
1592                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1593                   dmv_surface_valid << 15 |
1594                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1595                   pic_param->rounding_control << 13 |
1596                   pic_param->sequence_fields.bits.syncmarker << 12 |
1597                   interpolation_mode << 8 |
1598                   0 << 7 | /* FIXME: scale up or down ??? */
1599                   pic_param->range_reduction_frame << 6 |
1600                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1601                   overlap << 4 |
1602                   !pic_param->picture_fields.bits.is_first_field << 3 |
1603                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1604     OUT_BCS_BATCH(batch,
1605                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1606                   picture_type << 26 |
1607                   fcm << 24 |
1608                   alt_pq << 16 |
1609                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1610                   scale_factor << 0);
1611     OUT_BCS_BATCH(batch,
1612                   unified_mv_mode << 28 |
1613                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1614                   pic_param->fast_uvmc_flag << 26 |
1615                   ref_field_pic_polarity << 25 |
1616                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1617                   pic_param->reference_fields.bits.reference_distance << 20 |
1618                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1619                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1620                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1621                   alt_pquant_edge_mask << 4 |
1622                   alt_pquant_config << 2 |
1623                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1624                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1625     OUT_BCS_BATCH(batch,
1626                   !!pic_param->bitplane_present.value << 31 |
1627                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1628                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1629                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1630                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1631                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1632                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1633                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1634                   pic_param->mv_fields.bits.mv_table << 20 |
1635                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1636                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1637                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1638                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1639                   pic_param->mb_mode_table << 8 |
1640                   trans_ac_y << 6 |
1641                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1642                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1643                   pic_param->cbp_table << 0);
1644     ADVANCE_BCS_BATCH(batch);
1645 }
1646
1647 static void
1648 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1649                              struct decode_state *decode_state,
1650                              struct gen7_mfd_context *gen7_mfd_context)
1651 {
1652     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1653     VAPictureParameterBufferVC1 *pic_param;
1654     int intensitycomp_single;
1655
1656     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1657     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1658
1659     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1660     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1661     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1662
1663     BEGIN_BCS_BATCH(batch, 6);
1664     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1665     OUT_BCS_BATCH(batch,
1666                   0 << 14 | /* FIXME: double ??? */
1667                   0 << 12 |
1668                   intensitycomp_single << 10 |
1669                   intensitycomp_single << 8 |
1670                   0 << 4 | /* FIXME: interlace mode */
1671                   0);
1672     OUT_BCS_BATCH(batch,
1673                   pic_param->luma_shift << 16 |
1674                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1675     OUT_BCS_BATCH(batch, 0);
1676     OUT_BCS_BATCH(batch, 0);
1677     OUT_BCS_BATCH(batch, 0);
1678     ADVANCE_BCS_BATCH(batch);
1679 }
1680
1681 static void
1682 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1683                               struct decode_state *decode_state,
1684                               struct gen7_mfd_context *gen7_mfd_context)
1685 {
1686     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1687     struct object_surface *obj_surface;
1688     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1689
1690     obj_surface = decode_state->render_object;
1691
1692     if (obj_surface && obj_surface->private_data) {
1693         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1694     }
1695
1696     obj_surface = decode_state->reference_objects[1];
1697
1698     if (obj_surface && obj_surface->private_data) {
1699         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1700     }
1701
1702     BEGIN_BCS_BATCH(batch, 7);
1703     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1704
1705     if (dmv_write_buffer)
1706         OUT_BCS_RELOC(batch, dmv_write_buffer,
1707                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1708                       0);
1709     else
1710         OUT_BCS_BATCH(batch, 0);
1711
1712     OUT_BCS_BATCH(batch, 0);
1713     OUT_BCS_BATCH(batch, 0);
1714
1715     if (dmv_read_buffer)
1716         OUT_BCS_RELOC(batch, dmv_read_buffer,
1717                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1718                       0);
1719     else
1720         OUT_BCS_BATCH(batch, 0);
1721     
1722     OUT_BCS_BATCH(batch, 0);
1723     OUT_BCS_BATCH(batch, 0);
1724                   
1725     ADVANCE_BCS_BATCH(batch);
1726 }
1727
1728 static int
1729 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1730 {
1731     int out_slice_data_bit_offset;
1732     int slice_header_size = in_slice_data_bit_offset / 8;
1733     int i, j;
1734
1735     if (profile != 3)
1736         out_slice_data_bit_offset = in_slice_data_bit_offset;
1737     else {
1738         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1739             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1740                 i++, j += 2;
1741             }
1742         }
1743
1744         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1745     }
1746
1747     return out_slice_data_bit_offset;
1748 }
1749
1750 static void
1751 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1752                         VAPictureParameterBufferVC1 *pic_param,
1753                         VASliceParameterBufferVC1 *slice_param,
1754                         VASliceParameterBufferVC1 *next_slice_param,
1755                         dri_bo *slice_data_bo,
1756                         struct gen7_mfd_context *gen7_mfd_context)
1757 {
1758     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1759     int next_slice_start_vert_pos;
1760     int macroblock_offset;
1761     uint8_t *slice_data = NULL;
1762
1763     dri_bo_map(slice_data_bo, 0);
1764     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1765     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1766                                                                slice_param->macroblock_offset,
1767                                                                pic_param->sequence_fields.bits.profile);
1768     dri_bo_unmap(slice_data_bo);
1769
1770     if (next_slice_param)
1771         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1772     else
1773         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1774
1775     BEGIN_BCS_BATCH(batch, 5);
1776     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1777     OUT_BCS_BATCH(batch, 
1778                   slice_param->slice_data_size - (macroblock_offset >> 3));
1779     OUT_BCS_BATCH(batch, 
1780                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1781     OUT_BCS_BATCH(batch,
1782                   slice_param->slice_vertical_position << 16 |
1783                   next_slice_start_vert_pos << 0);
1784     OUT_BCS_BATCH(batch,
1785                   (macroblock_offset & 0x7));
1786     ADVANCE_BCS_BATCH(batch);
1787 }
1788
1789 static void
1790 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1791                             struct decode_state *decode_state,
1792                             struct gen7_mfd_context *gen7_mfd_context)
1793 {
1794     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1795     VAPictureParameterBufferVC1 *pic_param;
1796     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1797     dri_bo *slice_data_bo;
1798     int i, j;
1799
1800     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1801     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1802
1803     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1804     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1805     intel_batchbuffer_emit_mi_flush(batch);
1806     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1807     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1808     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1809     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1810     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1811     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1812     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1813
1814     for (j = 0; j < decode_state->num_slice_params; j++) {
1815         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1816         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1817         slice_data_bo = decode_state->slice_datas[j]->bo;
1818         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1819
1820         if (j == decode_state->num_slice_params - 1)
1821             next_slice_group_param = NULL;
1822         else
1823             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1824
1825         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1826             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1827
1828             if (i < decode_state->slice_params[j]->num_elements - 1)
1829                 next_slice_param = slice_param + 1;
1830             else
1831                 next_slice_param = next_slice_group_param;
1832
1833             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1834             slice_param++;
1835         }
1836     }
1837
1838     intel_batchbuffer_end_atomic(batch);
1839     intel_batchbuffer_flush(batch);
1840 }
1841
1842 static void
1843 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1844                           struct decode_state *decode_state,
1845                           struct gen7_mfd_context *gen7_mfd_context)
1846 {
1847     struct object_surface *obj_surface;
1848     VAPictureParameterBufferJPEGBaseline *pic_param;
1849     int subsampling = SUBSAMPLE_YUV420;
1850     int fourcc = VA_FOURCC_IMC3;
1851
1852     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1853
1854     if (pic_param->num_components == 1)
1855         subsampling = SUBSAMPLE_YUV400;
1856     else if (pic_param->num_components == 3) {
1857         int h1 = pic_param->components[0].h_sampling_factor;
1858         int h2 = pic_param->components[1].h_sampling_factor;
1859         int h3 = pic_param->components[2].h_sampling_factor;
1860         int v1 = pic_param->components[0].v_sampling_factor;
1861         int v2 = pic_param->components[1].v_sampling_factor;
1862         int v3 = pic_param->components[2].v_sampling_factor;
1863
1864         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1865             v1 == 2 && v2 == 1 && v3 == 1) {
1866             subsampling = SUBSAMPLE_YUV420;
1867             fourcc = VA_FOURCC_IMC3;
1868         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1869                    v1 == 1 && v2 == 1 && v3 == 1) {
1870             subsampling = SUBSAMPLE_YUV422H;
1871             fourcc = VA_FOURCC_422H;
1872         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1873                    v1 == 1 && v2 == 1 && v3 == 1) {
1874             subsampling = SUBSAMPLE_YUV444;
1875             fourcc = VA_FOURCC_444P;
1876         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1877                    v1 == 1 && v2 == 1 && v3 == 1) {
1878             subsampling = SUBSAMPLE_YUV411;
1879             fourcc = VA_FOURCC_411P;
1880         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1881                    v1 == 2 && v2 == 1 && v3 == 1) {
1882             subsampling = SUBSAMPLE_YUV422V;
1883             fourcc = VA_FOURCC_422V;
1884         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1885                    v1 == 2 && v2 == 2 && v3 == 2) {
1886             subsampling = SUBSAMPLE_YUV422H;
1887             fourcc = VA_FOURCC_422H;
1888         } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1889                    v1 == 2 && v2 == 1 && v3 == 1) {
1890             subsampling = SUBSAMPLE_YUV422V;
1891             fourcc = VA_FOURCC_422V;
1892         } else
1893             assert(0);
1894     }
1895     else {
1896         assert(0);
1897     }
1898
1899     /* Current decoded picture */
1900     obj_surface = decode_state->render_object;
1901     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1902
1903     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1904     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1905     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1906     gen7_mfd_context->pre_deblocking_output.valid = 1;
1907
1908     gen7_mfd_context->post_deblocking_output.bo = NULL;
1909     gen7_mfd_context->post_deblocking_output.valid = 0;
1910
1911     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1912     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1913
1914     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1915     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1916
1917     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1918     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1919
1920     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1921     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1922
1923     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1924     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1925 }
1926
1927 static const int va_to_gen7_jpeg_rotation[4] = {
1928     GEN7_JPEG_ROTATION_0,
1929     GEN7_JPEG_ROTATION_90,
1930     GEN7_JPEG_ROTATION_180,
1931     GEN7_JPEG_ROTATION_270
1932 };
1933
1934 static void
1935 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1936                         struct decode_state *decode_state,
1937                         struct gen7_mfd_context *gen7_mfd_context)
1938 {
1939     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1940     VAPictureParameterBufferJPEGBaseline *pic_param;
1941     int chroma_type = GEN7_YUV420;
1942     int frame_width_in_blks;
1943     int frame_height_in_blks;
1944
1945     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1946     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1947
1948     if (pic_param->num_components == 1)
1949         chroma_type = GEN7_YUV400;
1950     else if (pic_param->num_components == 3) {
1951         int h1 = pic_param->components[0].h_sampling_factor;
1952         int h2 = pic_param->components[1].h_sampling_factor;
1953         int h3 = pic_param->components[2].h_sampling_factor;
1954         int v1 = pic_param->components[0].v_sampling_factor;
1955         int v2 = pic_param->components[1].v_sampling_factor;
1956         int v3 = pic_param->components[2].v_sampling_factor;
1957
1958         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1959             v1 == 2 && v2 == 1 && v3 == 1)
1960             chroma_type = GEN7_YUV420;
1961         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1962                  v1 == 1 && v2 == 1 && v3 == 1)
1963             chroma_type = GEN7_YUV422H_2Y;
1964         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1965                  v1 == 1 && v2 == 1 && v3 == 1)
1966             chroma_type = GEN7_YUV444;
1967         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1968                  v1 == 1 && v2 == 1 && v3 == 1)
1969             chroma_type = GEN7_YUV411;
1970         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1971                  v1 == 2 && v2 == 1 && v3 == 1)
1972             chroma_type = GEN7_YUV422V_2Y;
1973         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1974                  v1 == 2 && v2 == 2 && v3 == 2)
1975             chroma_type = GEN7_YUV422H_4Y;
1976         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1977                  v1 == 2 && v2 == 1 && v3 == 1)
1978             chroma_type = GEN7_YUV422V_4Y;
1979         else
1980             assert(0);
1981     }
1982
1983     if (chroma_type == GEN7_YUV400 ||
1984         chroma_type == GEN7_YUV444 ||
1985         chroma_type == GEN7_YUV422V_2Y) {
1986         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
1987         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
1988     } else if (chroma_type == GEN7_YUV411) {
1989         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
1990         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
1991     } else {
1992         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
1993         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
1994     }
1995
1996     BEGIN_BCS_BATCH(batch, 3);
1997     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
1998     OUT_BCS_BATCH(batch,
1999                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2000                   (chroma_type << 0));
2001     OUT_BCS_BATCH(batch,
2002                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2003                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2004     ADVANCE_BCS_BATCH(batch);
2005 }
2006
2007 static const int va_to_gen7_jpeg_hufftable[2] = {
2008     MFX_HUFFTABLE_ID_Y,
2009     MFX_HUFFTABLE_ID_UV
2010 };
2011
2012 static void
2013 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2014                                struct decode_state *decode_state,
2015                                struct gen7_mfd_context *gen7_mfd_context,
2016                                int num_tables)
2017 {
2018     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2019     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2020     int index;
2021
2022     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2023         return;
2024
2025     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2026
2027     for (index = 0; index < num_tables; index++) {
2028         int id = va_to_gen7_jpeg_hufftable[index];
2029         if (!huffman_table->load_huffman_table[index])
2030             continue;
2031         BEGIN_BCS_BATCH(batch, 53);
2032         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2033         OUT_BCS_BATCH(batch, id);
2034         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2035         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2036         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2037         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2038         ADVANCE_BCS_BATCH(batch);
2039     }
2040 }
2041
2042 static const int va_to_gen7_jpeg_qm[5] = {
2043     -1,
2044     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2045     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2046     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2047     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2048 };
2049
2050 static void
2051 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2052                        struct decode_state *decode_state,
2053                        struct gen7_mfd_context *gen7_mfd_context)
2054 {
2055     VAPictureParameterBufferJPEGBaseline *pic_param;
2056     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2057     int index;
2058
2059     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2060         return;
2061
2062     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2063     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2064
2065     assert(pic_param->num_components <= 3);
2066
2067     for (index = 0; index < pic_param->num_components; index++) {
2068         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2069         int qm_type;
2070         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2071         unsigned char raster_qm[64];
2072         int j;
2073
2074         if (id > 4 || id < 1)
2075             continue;
2076
2077         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2078             continue;
2079
2080         qm_type = va_to_gen7_jpeg_qm[id];
2081
2082         for (j = 0; j < 64; j++)
2083             raster_qm[zigzag_direct[j]] = qm[j];
2084
2085         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2086     }
2087 }
2088
2089 static void
2090 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2091                          VAPictureParameterBufferJPEGBaseline *pic_param,
2092                          VASliceParameterBufferJPEGBaseline *slice_param,
2093                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2094                          dri_bo *slice_data_bo,
2095                          struct gen7_mfd_context *gen7_mfd_context)
2096 {
2097     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2098     int scan_component_mask = 0;
2099     int i;
2100
2101     assert(slice_param->num_components > 0);
2102     assert(slice_param->num_components < 4);
2103     assert(slice_param->num_components <= pic_param->num_components);
2104
2105     for (i = 0; i < slice_param->num_components; i++) {
2106         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2107         case 1:
2108             scan_component_mask |= (1 << 0);
2109             break;
2110         case 2:
2111             scan_component_mask |= (1 << 1);
2112             break;
2113         case 3:
2114             scan_component_mask |= (1 << 2);
2115             break;
2116         default:
2117             assert(0);
2118             break;
2119         }
2120     }
2121
2122     BEGIN_BCS_BATCH(batch, 6);
2123     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2124     OUT_BCS_BATCH(batch, 
2125                   slice_param->slice_data_size);
2126     OUT_BCS_BATCH(batch, 
2127                   slice_param->slice_data_offset);
2128     OUT_BCS_BATCH(batch,
2129                   slice_param->slice_horizontal_position << 16 |
2130                   slice_param->slice_vertical_position << 0);
2131     OUT_BCS_BATCH(batch,
2132                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2133                   (scan_component_mask << 27) |                 /* scan components */
2134                   (0 << 26) |   /* disable interrupt allowed */
2135                   (slice_param->num_mcus << 0));                /* MCU count */
2136     OUT_BCS_BATCH(batch,
2137                   (slice_param->restart_interval << 0));    /* RestartInterval */
2138     ADVANCE_BCS_BATCH(batch);
2139 }
2140
2141 /* Workaround for JPEG decoding on Ivybridge */
2142 #ifdef JPEG_WA
2143
2144 static struct {
2145     int width;
2146     int height;
2147     unsigned char data[32];
2148     int data_size;
2149     int data_bit_offset;
2150     int qp;
2151 } gen7_jpeg_wa_clip = {
2152     16,
2153     16,
2154     {
2155         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2156         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2157     },
2158     14,
2159     40,
2160     28,
2161 };
2162
2163 static void
2164 gen8_jpeg_wa_init(VADriverContextP ctx,
2165                   struct gen7_mfd_context *gen7_mfd_context)
2166 {
2167     struct i965_driver_data *i965 = i965_driver_data(ctx);
2168     VAStatus status;
2169     struct object_surface *obj_surface;
2170
2171     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2172         i965_DestroySurfaces(ctx,
2173                              &gen7_mfd_context->jpeg_wa_surface_id,
2174                              1);
2175
2176     status = i965_CreateSurfaces(ctx,
2177                                  gen7_jpeg_wa_clip.width,
2178                                  gen7_jpeg_wa_clip.height,
2179                                  VA_RT_FORMAT_YUV420,
2180                                  1,
2181                                  &gen7_mfd_context->jpeg_wa_surface_id);
2182     assert(status == VA_STATUS_SUCCESS);
2183
2184     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2185     assert(obj_surface);
2186     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2187     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2188
2189     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2190         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2191                                                                "JPEG WA data",
2192                                                                0x1000,
2193                                                                0x1000);
2194         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2195                        0,
2196                        gen7_jpeg_wa_clip.data_size,
2197                        gen7_jpeg_wa_clip.data);
2198     }
2199 }
2200
2201 static void
2202 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2203                               struct gen7_mfd_context *gen7_mfd_context)
2204 {
2205     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2206
2207     BEGIN_BCS_BATCH(batch, 5);
2208     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2209     OUT_BCS_BATCH(batch,
2210                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2211                   (MFD_MODE_VLD << 15) | /* VLD mode */
2212                   (0 << 10) | /* disable Stream-Out */
2213                   (0 << 9)  | /* Post Deblocking Output */
2214                   (1 << 8)  | /* Pre Deblocking Output */
2215                   (0 << 5)  | /* not in stitch mode */
2216                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2217                   (MFX_FORMAT_AVC << 0));
2218     OUT_BCS_BATCH(batch,
2219                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2220                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2221                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2222                   (0 << 1)  |
2223                   (0 << 0));
2224     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2225     OUT_BCS_BATCH(batch, 0); /* reserved */
2226     ADVANCE_BCS_BATCH(batch);
2227 }
2228
2229 static void
2230 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2231                            struct gen7_mfd_context *gen7_mfd_context)
2232 {
2233     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2234     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2235
2236     BEGIN_BCS_BATCH(batch, 6);
2237     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2238     OUT_BCS_BATCH(batch, 0);
2239     OUT_BCS_BATCH(batch,
2240                   ((obj_surface->orig_width - 1) << 18) |
2241                   ((obj_surface->orig_height - 1) << 4));
2242     OUT_BCS_BATCH(batch,
2243                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2244                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2245                   (0 << 22) | /* surface object control state, ignored */
2246                   ((obj_surface->width - 1) << 3) | /* pitch */
2247                   (0 << 2)  | /* must be 0 */
2248                   (1 << 1)  | /* must be tiled */
2249                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2250     OUT_BCS_BATCH(batch,
2251                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2252                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2253     OUT_BCS_BATCH(batch,
2254                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2255                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2256     ADVANCE_BCS_BATCH(batch);
2257 }
2258
2259 static void
2260 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2261                                  struct gen7_mfd_context *gen7_mfd_context)
2262 {
2263     struct i965_driver_data *i965 = i965_driver_data(ctx);
2264     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2265     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2266     dri_bo *intra_bo;
2267     int i;
2268
2269     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2270                             "intra row store",
2271                             128 * 64,
2272                             0x1000);
2273
2274     BEGIN_BCS_BATCH(batch, 61);
2275     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2276     OUT_BCS_RELOC(batch,
2277                   obj_surface->bo,
2278                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2279                   0);
2280         OUT_BCS_BATCH(batch, 0);
2281         OUT_BCS_BATCH(batch, 0);
2282     
2283
2284     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2285         OUT_BCS_BATCH(batch, 0);
2286         OUT_BCS_BATCH(batch, 0);
2287
2288         /* uncompressed-video & stream out 7-12 */
2289     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2290     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2291         OUT_BCS_BATCH(batch, 0);
2292         OUT_BCS_BATCH(batch, 0);
2293         OUT_BCS_BATCH(batch, 0);
2294         OUT_BCS_BATCH(batch, 0);
2295
2296         /* the DW 13-15 is for intra row store scratch */
2297     OUT_BCS_RELOC(batch,
2298                   intra_bo,
2299                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2300                   0);
2301         OUT_BCS_BATCH(batch, 0);
2302         OUT_BCS_BATCH(batch, 0);
2303
2304         /* the DW 16-18 is for deblocking filter */ 
2305     OUT_BCS_BATCH(batch, 0);
2306         OUT_BCS_BATCH(batch, 0);
2307         OUT_BCS_BATCH(batch, 0);
2308
2309     /* DW 19..50 */
2310     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2311         OUT_BCS_BATCH(batch, 0);
2312         OUT_BCS_BATCH(batch, 0);
2313     }
2314     OUT_BCS_BATCH(batch, 0);
2315
2316         /* the DW52-54 is for mb status address */
2317     OUT_BCS_BATCH(batch, 0);
2318         OUT_BCS_BATCH(batch, 0);
2319         OUT_BCS_BATCH(batch, 0);
2320         /* the DW56-60 is for ILDB & second ILDB address */
2321     OUT_BCS_BATCH(batch, 0);
2322         OUT_BCS_BATCH(batch, 0);
2323         OUT_BCS_BATCH(batch, 0);
2324     OUT_BCS_BATCH(batch, 0);
2325         OUT_BCS_BATCH(batch, 0);
2326         OUT_BCS_BATCH(batch, 0);
2327
2328     ADVANCE_BCS_BATCH(batch);
2329
2330     dri_bo_unreference(intra_bo);
2331 }
2332
2333 static void
2334 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2335                                      struct gen7_mfd_context *gen7_mfd_context)
2336 {
2337     struct i965_driver_data *i965 = i965_driver_data(ctx);
2338     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2339     dri_bo *bsd_mpc_bo, *mpr_bo;
2340
2341     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2342                               "bsd mpc row store",
2343                               11520, /* 1.5 * 120 * 64 */
2344                               0x1000);
2345
2346     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2347                           "mpr row store",
2348                           7680, /* 1. 0 * 120 * 64 */
2349                           0x1000);
2350
2351     BEGIN_BCS_BATCH(batch, 10);
2352     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2353
2354     OUT_BCS_RELOC(batch,
2355                   bsd_mpc_bo,
2356                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2357                   0);
2358
2359     OUT_BCS_BATCH(batch, 0);
2360     OUT_BCS_BATCH(batch, 0);
2361
2362     OUT_BCS_RELOC(batch,
2363                   mpr_bo,
2364                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2365                   0);
2366     OUT_BCS_BATCH(batch, 0);
2367     OUT_BCS_BATCH(batch, 0);
2368
2369     OUT_BCS_BATCH(batch, 0);
2370     OUT_BCS_BATCH(batch, 0);
2371     OUT_BCS_BATCH(batch, 0);
2372
2373     ADVANCE_BCS_BATCH(batch);
2374
2375     dri_bo_unreference(bsd_mpc_bo);
2376     dri_bo_unreference(mpr_bo);
2377 }
2378
2379 static void
2380 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2381                           struct gen7_mfd_context *gen7_mfd_context)
2382 {
2383
2384 }
2385
2386 static void
2387 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2388                            struct gen7_mfd_context *gen7_mfd_context)
2389 {
2390     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2391     int img_struct = 0;
2392     int mbaff_frame_flag = 0;
2393     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2394
2395     BEGIN_BCS_BATCH(batch, 16);
2396     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2397     OUT_BCS_BATCH(batch, 
2398                   width_in_mbs * height_in_mbs);
2399     OUT_BCS_BATCH(batch, 
2400                   ((height_in_mbs - 1) << 16) | 
2401                   ((width_in_mbs - 1) << 0));
2402     OUT_BCS_BATCH(batch, 
2403                   (0 << 24) |
2404                   (0 << 16) |
2405                   (0 << 14) |
2406                   (0 << 13) |
2407                   (0 << 12) | /* differ from GEN6 */
2408                   (0 << 10) |
2409                   (img_struct << 8));
2410     OUT_BCS_BATCH(batch,
2411                   (1 << 10) | /* 4:2:0 */
2412                   (1 << 7) |  /* CABAC */
2413                   (0 << 6) |
2414                   (0 << 5) |
2415                   (0 << 4) |
2416                   (0 << 3) |
2417                   (1 << 2) |
2418                   (mbaff_frame_flag << 1) |
2419                   (0 << 0));
2420     OUT_BCS_BATCH(batch, 0);
2421     OUT_BCS_BATCH(batch, 0);
2422     OUT_BCS_BATCH(batch, 0);
2423     OUT_BCS_BATCH(batch, 0);
2424     OUT_BCS_BATCH(batch, 0);
2425     OUT_BCS_BATCH(batch, 0);
2426     OUT_BCS_BATCH(batch, 0);
2427     OUT_BCS_BATCH(batch, 0);
2428     OUT_BCS_BATCH(batch, 0);
2429     OUT_BCS_BATCH(batch, 0);
2430     OUT_BCS_BATCH(batch, 0);
2431     ADVANCE_BCS_BATCH(batch);
2432 }
2433
2434 static void
2435 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2436                                   struct gen7_mfd_context *gen7_mfd_context)
2437 {
2438     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2439     int i;
2440
2441     BEGIN_BCS_BATCH(batch, 71);
2442     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2443
2444     /* reference surfaces 0..15 */
2445     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2446         OUT_BCS_BATCH(batch, 0); /* top */
2447         OUT_BCS_BATCH(batch, 0); /* bottom */
2448     }
2449         
2450         OUT_BCS_BATCH(batch, 0);
2451
2452     /* the current decoding frame/field */
2453     OUT_BCS_BATCH(batch, 0); /* top */
2454     OUT_BCS_BATCH(batch, 0);
2455     OUT_BCS_BATCH(batch, 0);
2456
2457     /* POC List */
2458     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2459         OUT_BCS_BATCH(batch, 0);
2460         OUT_BCS_BATCH(batch, 0);
2461     }
2462
2463     OUT_BCS_BATCH(batch, 0);
2464     OUT_BCS_BATCH(batch, 0);
2465
2466     ADVANCE_BCS_BATCH(batch);
2467 }
2468
2469 static void
2470 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2471                                      struct gen7_mfd_context *gen7_mfd_context)
2472 {
2473     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2474
2475     BEGIN_BCS_BATCH(batch, 11);
2476     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2477     OUT_BCS_RELOC(batch,
2478                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2479                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2480                   0);
2481     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2482     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2483     OUT_BCS_BATCH(batch, 0);
2484     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2485     OUT_BCS_BATCH(batch, 0);
2486     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2487     OUT_BCS_BATCH(batch, 0);
2488     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2489     OUT_BCS_BATCH(batch, 0);
2490     ADVANCE_BCS_BATCH(batch);
2491 }
2492
2493 static void
2494 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2495                             struct gen7_mfd_context *gen7_mfd_context)
2496 {
2497     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2498
2499     /* the input bitsteam format on GEN7 differs from GEN6 */
2500     BEGIN_BCS_BATCH(batch, 6);
2501     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2502     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2503     OUT_BCS_BATCH(batch, 0);
2504     OUT_BCS_BATCH(batch,
2505                   (0 << 31) |
2506                   (0 << 14) |
2507                   (0 << 12) |
2508                   (0 << 10) |
2509                   (0 << 8));
2510     OUT_BCS_BATCH(batch,
2511                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2512                   (0 << 5)  |
2513                   (0 << 4)  |
2514                   (1 << 3) | /* LastSlice Flag */
2515                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2516     OUT_BCS_BATCH(batch, 0);
2517     ADVANCE_BCS_BATCH(batch);
2518 }
2519
2520 static void
2521 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2522                              struct gen7_mfd_context *gen7_mfd_context)
2523 {
2524     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2525     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2526     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2527     int first_mb_in_slice = 0;
2528     int slice_type = SLICE_TYPE_I;
2529
2530     BEGIN_BCS_BATCH(batch, 11);
2531     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2532     OUT_BCS_BATCH(batch, slice_type);
2533     OUT_BCS_BATCH(batch, 
2534                   (num_ref_idx_l1 << 24) |
2535                   (num_ref_idx_l0 << 16) |
2536                   (0 << 8) |
2537                   (0 << 0));
2538     OUT_BCS_BATCH(batch, 
2539                   (0 << 29) |
2540                   (1 << 27) |   /* disable Deblocking */
2541                   (0 << 24) |
2542                   (gen7_jpeg_wa_clip.qp << 16) |
2543                   (0 << 8) |
2544                   (0 << 0));
2545     OUT_BCS_BATCH(batch, 
2546                   (slice_ver_pos << 24) |
2547                   (slice_hor_pos << 16) | 
2548                   (first_mb_in_slice << 0));
2549     OUT_BCS_BATCH(batch,
2550                   (next_slice_ver_pos << 16) |
2551                   (next_slice_hor_pos << 0));
2552     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2553     OUT_BCS_BATCH(batch, 0);
2554     OUT_BCS_BATCH(batch, 0);
2555     OUT_BCS_BATCH(batch, 0);
2556     OUT_BCS_BATCH(batch, 0);
2557     ADVANCE_BCS_BATCH(batch);
2558 }
2559
2560 static void
2561 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2562                  struct gen7_mfd_context *gen7_mfd_context)
2563 {
2564     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2565     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2566     intel_batchbuffer_emit_mi_flush(batch);
2567     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2568     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2569     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2570     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2571     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2572     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2573     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2574
2575     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2576     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2577     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2578 }
2579
2580 #endif
2581
2582 void
2583 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2584                              struct decode_state *decode_state,
2585                              struct gen7_mfd_context *gen7_mfd_context)
2586 {
2587     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2588     VAPictureParameterBufferJPEGBaseline *pic_param;
2589     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2590     dri_bo *slice_data_bo;
2591     int i, j, max_selector = 0;
2592
2593     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2594     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2595
2596     /* Currently only support Baseline DCT */
2597     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2598     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2599 #ifdef JPEG_WA
2600     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2601 #endif
2602     intel_batchbuffer_emit_mi_flush(batch);
2603     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2604     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2605     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2606     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2607     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2608
2609     for (j = 0; j < decode_state->num_slice_params; j++) {
2610         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2611         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2612         slice_data_bo = decode_state->slice_datas[j]->bo;
2613         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2614
2615         if (j == decode_state->num_slice_params - 1)
2616             next_slice_group_param = NULL;
2617         else
2618             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2619
2620         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2621             int component;
2622
2623             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2624
2625             if (i < decode_state->slice_params[j]->num_elements - 1)
2626                 next_slice_param = slice_param + 1;
2627             else
2628                 next_slice_param = next_slice_group_param;
2629
2630             for (component = 0; component < slice_param->num_components; component++) {
2631                 if (max_selector < slice_param->components[component].dc_table_selector)
2632                     max_selector = slice_param->components[component].dc_table_selector;
2633
2634                 if (max_selector < slice_param->components[component].ac_table_selector)
2635                     max_selector = slice_param->components[component].ac_table_selector;
2636             }
2637
2638             slice_param++;
2639         }
2640     }
2641
2642     assert(max_selector < 2);
2643     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2644
2645     for (j = 0; j < decode_state->num_slice_params; j++) {
2646         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2647         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2648         slice_data_bo = decode_state->slice_datas[j]->bo;
2649         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2650
2651         if (j == decode_state->num_slice_params - 1)
2652             next_slice_group_param = NULL;
2653         else
2654             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2655
2656         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2657             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2658
2659             if (i < decode_state->slice_params[j]->num_elements - 1)
2660                 next_slice_param = slice_param + 1;
2661             else
2662                 next_slice_param = next_slice_group_param;
2663
2664             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2665             slice_param++;
2666         }
2667     }
2668
2669     intel_batchbuffer_end_atomic(batch);
2670     intel_batchbuffer_flush(batch);
2671 }
2672
2673 static const int vp8_dc_qlookup[128] =
2674 {
2675       4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2676      18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2677      29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2678      44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2679      59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2680      75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2681      91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2682     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2683 };
2684
2685 static const int vp8_ac_qlookup[128] =
2686 {
2687       4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2688      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2689      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2690      52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2691      78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2692     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2693     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2694     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2695 };
2696
2697 static inline unsigned int vp8_clip_quantization_index(int index)
2698 {
2699     if(index > 127)
2700         return 127;
2701     else if(index <0)
2702         return 0;
2703
2704     return index;
2705 }
2706
2707 static void
2708 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2709                           struct decode_state *decode_state,
2710                           struct gen7_mfd_context *gen7_mfd_context)
2711 {
2712     struct object_surface *obj_surface;
2713     struct i965_driver_data *i965 = i965_driver_data(ctx);
2714     dri_bo *bo;
2715     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2716     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2717     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2718
2719     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2720     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2721
2722     intel_update_vp8_frame_store_index(ctx,
2723                                        decode_state,
2724                                        pic_param,
2725                                        gen7_mfd_context->reference_surface);
2726
2727     /* Current decoded picture */
2728     obj_surface = decode_state->render_object;
2729     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2730
2731     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2732     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2733     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2734     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2735
2736     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2737     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2738     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2739     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2740
2741     intel_ensure_vp8_segmentation_buffer(ctx,
2742         &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
2743
2744     /* The same as AVC */
2745     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2746     bo = dri_bo_alloc(i965->intel.bufmgr,
2747                       "intra row store",
2748                       width_in_mbs * 64,
2749                       0x1000);
2750     assert(bo);
2751     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2752     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2753
2754     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2755     bo = dri_bo_alloc(i965->intel.bufmgr,
2756                       "deblocking filter row store",
2757                       width_in_mbs * 64 * 4,
2758                       0x1000);
2759     assert(bo);
2760     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2761     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2762
2763     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2764     bo = dri_bo_alloc(i965->intel.bufmgr,
2765                       "bsd mpc row store",
2766                       width_in_mbs * 64 * 2,
2767                       0x1000);
2768     assert(bo);
2769     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2770     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2771
2772     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2773     bo = dri_bo_alloc(i965->intel.bufmgr,
2774                       "mpr row store",
2775                       width_in_mbs * 64 * 2,
2776                       0x1000);
2777     assert(bo);
2778     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2779     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2780
2781     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2782 }
2783
2784 static void
2785 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2786                        struct decode_state *decode_state,
2787                        struct gen7_mfd_context *gen7_mfd_context)
2788 {
2789     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2790     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2791     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2792     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2793     dri_bo *probs_bo = decode_state->probability_data->bo;
2794     int i, j,log2num;
2795     unsigned int quantization_value[4][6];
2796
2797     /* There is no safe way to error out if the segmentation buffer
2798        could not be allocated. So, instead of aborting, simply decode
2799        something even if the result may look totally inacurate */
2800     const unsigned int enable_segmentation =
2801         pic_param->pic_fields.bits.segmentation_enabled &&
2802         gen7_mfd_context->segmentation_buffer.valid;
2803         
2804     log2num = (int)log2(slice_param->num_of_partitions - 1);
2805
2806     BEGIN_BCS_BATCH(batch, 38);
2807     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2808     OUT_BCS_BATCH(batch,
2809                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2810                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2811     OUT_BCS_BATCH(batch,
2812                   log2num << 24 |
2813                   pic_param->pic_fields.bits.sharpness_level << 16 |
2814                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2815                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2816                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2817                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2818                   pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2819                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2820                   (enable_segmentation &&
2821                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
2822                   (enable_segmentation &&
2823                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
2824                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
2825                   pic_param->pic_fields.bits.filter_type << 4 |
2826                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2827                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2828
2829     OUT_BCS_BATCH(batch,
2830                   pic_param->loop_filter_level[3] << 24 |
2831                   pic_param->loop_filter_level[2] << 16 |
2832                   pic_param->loop_filter_level[1] <<  8 |
2833                   pic_param->loop_filter_level[0] <<  0);
2834
2835     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2836     for (i = 0; i < 4; i++) {
2837                 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2838                 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2839                 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2840                 /* 101581>>16 is equivalent to 155/100 */
2841                 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2842                 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2843                 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2844
2845                 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2846                 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2847
2848                 OUT_BCS_BATCH(batch,
2849                       quantization_value[i][0] << 16 | /* Y1AC */
2850                       quantization_value[i][1] <<  0); /* Y1DC */
2851         OUT_BCS_BATCH(batch,
2852                       quantization_value[i][5] << 16 | /* UVAC */
2853                       quantization_value[i][4] <<  0); /* UVDC */
2854         OUT_BCS_BATCH(batch,
2855                       quantization_value[i][3] << 16 | /* Y2AC */
2856                       quantization_value[i][2] <<  0); /* Y2DC */
2857     }
2858
2859     /* CoeffProbability table for non-key frame, DW16-DW18 */
2860     if (probs_bo) {
2861         OUT_BCS_RELOC(batch, probs_bo,
2862                       0, I915_GEM_DOMAIN_INSTRUCTION,
2863                       0);
2864         OUT_BCS_BATCH(batch, 0);
2865         OUT_BCS_BATCH(batch, 0);
2866     } else {
2867         OUT_BCS_BATCH(batch, 0);
2868         OUT_BCS_BATCH(batch, 0);
2869         OUT_BCS_BATCH(batch, 0);
2870     }
2871
2872     OUT_BCS_BATCH(batch,
2873                   pic_param->mb_segment_tree_probs[2] << 16 |
2874                   pic_param->mb_segment_tree_probs[1] <<  8 |
2875                   pic_param->mb_segment_tree_probs[0] <<  0);
2876
2877     OUT_BCS_BATCH(batch,
2878                   pic_param->prob_skip_false << 24 |
2879                   pic_param->prob_intra      << 16 |
2880                   pic_param->prob_last       <<  8 |
2881                   pic_param->prob_gf         <<  0);
2882
2883     OUT_BCS_BATCH(batch,
2884                   pic_param->y_mode_probs[3] << 24 |
2885                   pic_param->y_mode_probs[2] << 16 |
2886                   pic_param->y_mode_probs[1] <<  8 |
2887                   pic_param->y_mode_probs[0] <<  0);
2888
2889     OUT_BCS_BATCH(batch,
2890                   pic_param->uv_mode_probs[2] << 16 |
2891                   pic_param->uv_mode_probs[1] <<  8 |
2892                   pic_param->uv_mode_probs[0] <<  0);
2893     
2894     /* MV update value, DW23-DW32 */
2895     for (i = 0; i < 2; i++) {
2896         for (j = 0; j < 20; j += 4) {
2897             OUT_BCS_BATCH(batch,
2898                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2899                           pic_param->mv_probs[i][j + 2] << 16 |
2900                           pic_param->mv_probs[i][j + 1] <<  8 |
2901                           pic_param->mv_probs[i][j + 0] <<  0);
2902         }
2903     }
2904
2905     OUT_BCS_BATCH(batch,
2906                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
2907                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
2908                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
2909                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
2910
2911     OUT_BCS_BATCH(batch,
2912                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
2913                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
2914                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
2915                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
2916
2917     /* segmentation id stream base address, DW35-DW37 */
2918     if (enable_segmentation) {
2919         OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
2920                       0, I915_GEM_DOMAIN_INSTRUCTION,
2921                       0);
2922         OUT_BCS_BATCH(batch, 0);
2923         OUT_BCS_BATCH(batch, 0);
2924     }
2925     else {
2926         OUT_BCS_BATCH(batch, 0);
2927         OUT_BCS_BATCH(batch, 0);
2928         OUT_BCS_BATCH(batch, 0);
2929     }
2930     ADVANCE_BCS_BATCH(batch);
2931 }
2932
2933 static void
2934 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2935                         VAPictureParameterBufferVP8 *pic_param,
2936                         VASliceParameterBufferVP8 *slice_param,
2937                         dri_bo *slice_data_bo,
2938                         struct gen7_mfd_context *gen7_mfd_context)
2939 {
2940     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2941     int i, log2num;
2942     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
2943     unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
2944     unsigned int partition_size_0 = slice_param->partition_size[0];
2945
2946     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
2947     if (used_bits == 8) {
2948         used_bits = 0;
2949         offset += 1;
2950         partition_size_0 -= 1;
2951     }
2952
2953     assert(slice_param->num_of_partitions >= 2);
2954     assert(slice_param->num_of_partitions <= 9);
2955
2956     log2num = (int)log2(slice_param->num_of_partitions - 1);
2957
2958     BEGIN_BCS_BATCH(batch, 22);
2959     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2960     OUT_BCS_BATCH(batch,
2961                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
2962                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
2963                   log2num << 4 |
2964                   (slice_param->macroblock_offset & 0x7));
2965     OUT_BCS_BATCH(batch,
2966                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2967                   0);
2968
2969     OUT_BCS_BATCH(batch, partition_size_0);
2970     OUT_BCS_BATCH(batch, offset);
2971     //partion sizes in bytes are present after the above first partition when there are more than one token partition
2972     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
2973     for (i = 1; i < 9; i++) {
2974         if (i < slice_param->num_of_partitions) {
2975             OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
2976             OUT_BCS_BATCH(batch, offset);
2977         } else {
2978             OUT_BCS_BATCH(batch, 0);
2979             OUT_BCS_BATCH(batch, 0);
2980         }
2981
2982         offset += slice_param->partition_size[i];
2983     }
2984
2985     OUT_BCS_BATCH(batch,
2986                   1 << 31 | /* concealment method */
2987                   0);
2988
2989     ADVANCE_BCS_BATCH(batch);
2990 }
2991
2992 void
2993 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
2994                             struct decode_state *decode_state,
2995                             struct gen7_mfd_context *gen7_mfd_context)
2996 {
2997     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2998     VAPictureParameterBufferVP8 *pic_param;
2999     VASliceParameterBufferVP8 *slice_param;
3000     dri_bo *slice_data_bo;
3001
3002     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3003     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3004
3005     /* one slice per frame */
3006     if (decode_state->num_slice_params != 1 ||
3007         (!decode_state->slice_params ||
3008          !decode_state->slice_params[0] ||
3009          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
3010         (!decode_state->slice_datas ||
3011          !decode_state->slice_datas[0] ||
3012          !decode_state->slice_datas[0]->bo) ||
3013         !decode_state->probability_data) {
3014         WARN_ONCE("Wrong parameters for VP8 decoding\n");
3015
3016         return;
3017     }
3018
3019     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3020     slice_data_bo = decode_state->slice_datas[0]->bo;
3021
3022     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3023     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3024     intel_batchbuffer_emit_mi_flush(batch);
3025     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3026     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3027     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3028     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3029     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3030     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3031     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3032     intel_batchbuffer_end_atomic(batch);
3033     intel_batchbuffer_flush(batch);
3034 }
3035
3036 static VAStatus
3037 gen8_mfd_decode_picture(VADriverContextP ctx, 
3038                         VAProfile profile, 
3039                         union codec_state *codec_state,
3040                         struct hw_context *hw_context)
3041
3042 {
3043     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3044     struct decode_state *decode_state = &codec_state->decode;
3045     VAStatus vaStatus;
3046
3047     assert(gen7_mfd_context);
3048
3049     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3050
3051     if (vaStatus != VA_STATUS_SUCCESS)
3052         goto out;
3053
3054     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3055
3056     switch (profile) {
3057     case VAProfileMPEG2Simple:
3058     case VAProfileMPEG2Main:
3059         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3060         break;
3061         
3062     case VAProfileH264ConstrainedBaseline:
3063     case VAProfileH264Main:
3064     case VAProfileH264High:
3065     case VAProfileH264StereoHigh:
3066     case VAProfileH264MultiviewHigh:
3067         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3068         break;
3069
3070     case VAProfileVC1Simple:
3071     case VAProfileVC1Main:
3072     case VAProfileVC1Advanced:
3073         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3074         break;
3075
3076     case VAProfileJPEGBaseline:
3077         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3078         break;
3079
3080     case VAProfileVP8Version0_3:
3081         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3082         break;
3083
3084     default:
3085         assert(0);
3086         break;
3087     }
3088
3089     vaStatus = VA_STATUS_SUCCESS;
3090
3091 out:
3092     return vaStatus;
3093 }
3094
3095 static void
3096 gen8_mfd_context_destroy(void *hw_context)
3097 {
3098     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3099
3100     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3101     gen7_mfd_context->post_deblocking_output.bo = NULL;
3102
3103     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3104     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3105
3106     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3107     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3108
3109     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3110     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3111
3112     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3113     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3114
3115     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3116     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3117
3118     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3119     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3120
3121     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
3122     gen7_mfd_context->segmentation_buffer.bo = NULL;
3123
3124     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3125
3126     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3127     free(gen7_mfd_context);
3128 }
3129
3130 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3131                                     struct gen7_mfd_context *gen7_mfd_context)
3132 {
3133     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3134     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3135     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3136     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3137 }
3138
3139 struct hw_context *
3140 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3141 {
3142     struct intel_driver_data *intel = intel_driver_data(ctx);
3143     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3144     int i;
3145
3146     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3147     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3148     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3149
3150     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3151         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3152         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3153     }
3154
3155     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3156     gen7_mfd_context->segmentation_buffer.valid = 0;
3157
3158     switch (obj_config->profile) {
3159     case VAProfileMPEG2Simple:
3160     case VAProfileMPEG2Main:
3161         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3162         break;
3163
3164     case VAProfileH264ConstrainedBaseline:
3165     case VAProfileH264Main:
3166     case VAProfileH264High:
3167     case VAProfileH264StereoHigh:
3168     case VAProfileH264MultiviewHigh:
3169         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3170         break;
3171     default:
3172         break;
3173     }
3174     return (struct hw_context *)gen7_mfd_context;
3175 }