VEBOX/bdw: set downsample method
[platform/upstream/libva-intel-driver.git] / src / gen8_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao  Yakui  <yakui.zhao@intel.com>
27  *
28  */
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <assert.h>
34 #include <math.h>
35 #include <va/va_dec_jpeg.h>
36 #include <va/va_dec_vp8.h>
37
38 #include "intel_batchbuffer.h"
39 #include "intel_driver.h"
40
41 #include "i965_defines.h"
42 #include "i965_drv_video.h"
43 #include "i965_decoder_utils.h"
44
45 #include "gen7_mfd.h"
46 #include "intel_media.h"
47
48 #define B0_STEP_REV             2
49 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
50
51 static const uint32_t zigzag_direct[64] = {
52     0,   1,  8, 16,  9,  2,  3, 10,
53     17, 24, 32, 25, 18, 11,  4,  5,
54     12, 19, 26, 33, 40, 48, 41, 34,
55     27, 20, 13,  6,  7, 14, 21, 28,
56     35, 42, 49, 56, 57, 50, 43, 36,
57     29, 22, 15, 23, 30, 37, 44, 51,
58     58, 59, 52, 45, 38, 31, 39, 46,
59     53, 60, 61, 54, 47, 55, 62, 63
60 };
61
62 static void
63 gen8_mfd_init_avc_surface(VADriverContextP ctx, 
64                           VAPictureParameterBufferH264 *pic_param,
65                           struct object_surface *obj_surface)
66 {
67     struct i965_driver_data *i965 = i965_driver_data(ctx);
68     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
69     int width_in_mbs, height_in_mbs;
70
71     obj_surface->free_private_data = gen_free_avc_surface;
72     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
73     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
74
75     if (!gen7_avc_surface) {
76         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
77         assert((obj_surface->size & 0x3f) == 0);
78         obj_surface->private_data = gen7_avc_surface;
79     }
80
81     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
82                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
83
84     if (gen7_avc_surface->dmv_top == NULL) {
85         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
86                                                  "direct mv w/r buffer",
87                                                  width_in_mbs * height_in_mbs * 128,
88                                                  0x1000);
89         assert(gen7_avc_surface->dmv_top);
90     }
91
92     if (gen7_avc_surface->dmv_bottom_flag &&
93         gen7_avc_surface->dmv_bottom == NULL) {
94         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
95                                                     "direct mv w/r buffer",
96                                                     width_in_mbs * height_in_mbs * 128,                                                    
97                                                     0x1000);
98         assert(gen7_avc_surface->dmv_bottom);
99     }
100 }
101
102 static void
103 gen8_mfd_pipe_mode_select(VADriverContextP ctx,
104                           struct decode_state *decode_state,
105                           int standard_select,
106                           struct gen7_mfd_context *gen7_mfd_context)
107 {
108     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
109
110     assert(standard_select == MFX_FORMAT_MPEG2 ||
111            standard_select == MFX_FORMAT_AVC ||
112            standard_select == MFX_FORMAT_VC1 ||
113            standard_select == MFX_FORMAT_JPEG ||
114            standard_select == MFX_FORMAT_VP8);
115
116     BEGIN_BCS_BATCH(batch, 5);
117     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
118     OUT_BCS_BATCH(batch,
119                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
120                   (MFD_MODE_VLD << 15) | /* VLD mode */
121                   (0 << 10) | /* disable Stream-Out */
122                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
123                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
124                   (0 << 5)  | /* not in stitch mode */
125                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
126                   (standard_select << 0));
127     OUT_BCS_BATCH(batch,
128                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
129                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
130                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
131                   (0 << 1)  |
132                   (0 << 0));
133     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
134     OUT_BCS_BATCH(batch, 0); /* reserved */
135     ADVANCE_BCS_BATCH(batch);
136 }
137
138 static void
139 gen8_mfd_surface_state(VADriverContextP ctx,
140                        struct decode_state *decode_state,
141                        int standard_select,
142                        struct gen7_mfd_context *gen7_mfd_context)
143 {
144     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
145     struct object_surface *obj_surface = decode_state->render_object;
146     unsigned int y_cb_offset;
147     unsigned int y_cr_offset;
148
149     assert(obj_surface);
150
151     y_cb_offset = obj_surface->y_cb_offset;
152     y_cr_offset = obj_surface->y_cr_offset;
153
154     BEGIN_BCS_BATCH(batch, 6);
155     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
156     OUT_BCS_BATCH(batch, 0);
157     OUT_BCS_BATCH(batch,
158                   ((obj_surface->orig_height - 1) << 18) |
159                   ((obj_surface->orig_width - 1) << 4));
160     OUT_BCS_BATCH(batch,
161                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
162                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
163                   (0 << 22) | /* surface object control state, ignored */
164                   ((obj_surface->width - 1) << 3) | /* pitch */
165                   (0 << 2)  | /* must be 0 */
166                   (1 << 1)  | /* must be tiled */
167                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
168     OUT_BCS_BATCH(batch,
169                   (0 << 16) | /* X offset for U(Cb), must be 0 */
170                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
171     OUT_BCS_BATCH(batch,
172                   (0 << 16) | /* X offset for V(Cr), must be 0 */
173                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
174     ADVANCE_BCS_BATCH(batch);
175 }
176
177 static void
178 gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
179                              struct decode_state *decode_state,
180                              int standard_select,
181                              struct gen7_mfd_context *gen7_mfd_context)
182 {
183     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
184     int i;
185
186     BEGIN_BCS_BATCH(batch, 61);
187     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
188         /* Pre-deblock 1-3 */
189     if (gen7_mfd_context->pre_deblocking_output.valid)
190         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
191                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
192                       0);
193     else
194         OUT_BCS_BATCH(batch, 0);
195
196         OUT_BCS_BATCH(batch, 0);
197         OUT_BCS_BATCH(batch, 0);
198         /* Post-debloing 4-6 */
199     if (gen7_mfd_context->post_deblocking_output.valid)
200         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
201                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
202                       0);
203     else
204         OUT_BCS_BATCH(batch, 0);
205
206         OUT_BCS_BATCH(batch, 0);
207         OUT_BCS_BATCH(batch, 0);
208
209         /* uncompressed-video & stream out 7-12 */
210     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
211     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
212         OUT_BCS_BATCH(batch, 0);
213         OUT_BCS_BATCH(batch, 0);
214         OUT_BCS_BATCH(batch, 0);
215         OUT_BCS_BATCH(batch, 0);
216
217         /* intra row-store scratch 13-15 */
218     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
219         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
220                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
221                       0);
222     else
223         OUT_BCS_BATCH(batch, 0);
224
225         OUT_BCS_BATCH(batch, 0);
226         OUT_BCS_BATCH(batch, 0);
227         /* deblocking-filter-row-store 16-18 */
228     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
229         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
230                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
231                       0);
232     else
233         OUT_BCS_BATCH(batch, 0);
234         OUT_BCS_BATCH(batch, 0);
235         OUT_BCS_BATCH(batch, 0);
236
237     /* DW 19..50 */
238     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
239         struct object_surface *obj_surface;
240
241         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
242             gen7_mfd_context->reference_surface[i].obj_surface &&
243             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
244             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
245
246             OUT_BCS_RELOC(batch, obj_surface->bo,
247                           I915_GEM_DOMAIN_INSTRUCTION, 0,
248                           0);
249         } else {
250             OUT_BCS_BATCH(batch, 0);
251         }
252         
253         OUT_BCS_BATCH(batch, 0);
254     }
255     
256     /* reference property 51 */
257     OUT_BCS_BATCH(batch, 0);  
258         
259     /* Macroblock status & ILDB 52-57 */
260     OUT_BCS_BATCH(batch, 0);
261     OUT_BCS_BATCH(batch, 0);
262     OUT_BCS_BATCH(batch, 0);
263     OUT_BCS_BATCH(batch, 0);
264     OUT_BCS_BATCH(batch, 0);
265     OUT_BCS_BATCH(batch, 0);
266
267     /* the second Macroblock status 58-60 */    
268     OUT_BCS_BATCH(batch, 0);
269     OUT_BCS_BATCH(batch, 0);
270     OUT_BCS_BATCH(batch, 0);
271
272     ADVANCE_BCS_BATCH(batch);
273 }
274
275 static void
276 gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
277                                  dri_bo *slice_data_bo,
278                                  int standard_select,
279                                  struct gen7_mfd_context *gen7_mfd_context)
280 {
281     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
282
283     BEGIN_BCS_BATCH(batch, 26);
284     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
285         /* MFX In BS 1-5 */
286     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
287     OUT_BCS_BATCH(batch, 0);
288     OUT_BCS_BATCH(batch, 0);
289         /* Upper bound 4-5 */   
290     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
291     OUT_BCS_BATCH(batch, 0);
292
293         /* MFX indirect MV 6-10 */
294     OUT_BCS_BATCH(batch, 0);
295     OUT_BCS_BATCH(batch, 0);
296     OUT_BCS_BATCH(batch, 0);
297     OUT_BCS_BATCH(batch, 0);
298     OUT_BCS_BATCH(batch, 0);
299         
300         /* MFX IT_COFF 11-15 */
301     OUT_BCS_BATCH(batch, 0);
302     OUT_BCS_BATCH(batch, 0);
303     OUT_BCS_BATCH(batch, 0);
304     OUT_BCS_BATCH(batch, 0);
305     OUT_BCS_BATCH(batch, 0);
306
307         /* MFX IT_DBLK 16-20 */
308     OUT_BCS_BATCH(batch, 0);
309     OUT_BCS_BATCH(batch, 0);
310     OUT_BCS_BATCH(batch, 0);
311     OUT_BCS_BATCH(batch, 0);
312     OUT_BCS_BATCH(batch, 0);
313
314         /* MFX PAK_BSE object for encoder 21-25 */
315     OUT_BCS_BATCH(batch, 0);
316     OUT_BCS_BATCH(batch, 0);
317     OUT_BCS_BATCH(batch, 0);
318     OUT_BCS_BATCH(batch, 0);
319     OUT_BCS_BATCH(batch, 0);
320
321     ADVANCE_BCS_BATCH(batch);
322 }
323
324 static void
325 gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
326                                  struct decode_state *decode_state,
327                                  int standard_select,
328                                  struct gen7_mfd_context *gen7_mfd_context)
329 {
330     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
331
332     BEGIN_BCS_BATCH(batch, 10);
333     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
334
335     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
336         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
337                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
338                       0);
339         else
340                 OUT_BCS_BATCH(batch, 0);
341                 
342     OUT_BCS_BATCH(batch, 0);
343     OUT_BCS_BATCH(batch, 0);
344         /* MPR Row Store Scratch buffer 4-6 */
345     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
346         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
347                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
348                       0);
349     else
350         OUT_BCS_BATCH(batch, 0);
351
352     OUT_BCS_BATCH(batch, 0);
353     OUT_BCS_BATCH(batch, 0);
354
355         /* Bitplane 7-9 */ 
356     if (gen7_mfd_context->bitplane_read_buffer.valid)
357         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
358                       I915_GEM_DOMAIN_INSTRUCTION, 0,
359                       0);
360     else
361         OUT_BCS_BATCH(batch, 0);
362     OUT_BCS_BATCH(batch, 0);
363     OUT_BCS_BATCH(batch, 0);
364     ADVANCE_BCS_BATCH(batch);
365 }
366
367 static void
368 gen8_mfd_qm_state(VADriverContextP ctx,
369                   int qm_type,
370                   unsigned char *qm,
371                   int qm_length,
372                   struct gen7_mfd_context *gen7_mfd_context)
373 {
374     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
375     unsigned int qm_buffer[16];
376
377     assert(qm_length <= 16 * 4);
378     memcpy(qm_buffer, qm, qm_length);
379
380     BEGIN_BCS_BATCH(batch, 18);
381     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
382     OUT_BCS_BATCH(batch, qm_type << 0);
383     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
384     ADVANCE_BCS_BATCH(batch);
385 }
386
387 static void
388 gen8_mfd_avc_img_state(VADriverContextP ctx,
389                        struct decode_state *decode_state,
390                        struct gen7_mfd_context *gen7_mfd_context)
391 {
392     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
393     int img_struct;
394     int mbaff_frame_flag;
395     unsigned int width_in_mbs, height_in_mbs;
396     VAPictureParameterBufferH264 *pic_param;
397
398     assert(decode_state->pic_param && decode_state->pic_param->buffer);
399     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
400     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
401
402     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
403         img_struct = 1;
404     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
405         img_struct = 3;
406     else
407         img_struct = 0;
408
409     if ((img_struct & 0x1) == 0x1) {
410         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
411     } else {
412         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
413     }
414
415     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
416         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
417         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
418     } else {
419         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
420     }
421
422     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
423                         !pic_param->pic_fields.bits.field_pic_flag);
424
425     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
426     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
427
428     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
429     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
430            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
431     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
432
433     BEGIN_BCS_BATCH(batch, 17);
434     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
435     OUT_BCS_BATCH(batch, 
436                   (width_in_mbs * height_in_mbs - 1));
437     OUT_BCS_BATCH(batch, 
438                   ((height_in_mbs - 1) << 16) | 
439                   ((width_in_mbs - 1) << 0));
440     OUT_BCS_BATCH(batch, 
441                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
442                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
443                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
444                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
445                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
446                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
447                   (img_struct << 8));
448     OUT_BCS_BATCH(batch,
449                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
450                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
451                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
452                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
453                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
454                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
455                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
456                   (mbaff_frame_flag << 1) |
457                   (pic_param->pic_fields.bits.field_pic_flag << 0));
458     OUT_BCS_BATCH(batch, 0);
459     OUT_BCS_BATCH(batch, 0);
460     OUT_BCS_BATCH(batch, 0);
461     OUT_BCS_BATCH(batch, 0);
462     OUT_BCS_BATCH(batch, 0);
463     OUT_BCS_BATCH(batch, 0);
464     OUT_BCS_BATCH(batch, 0);
465     OUT_BCS_BATCH(batch, 0);
466     OUT_BCS_BATCH(batch, 0);
467     OUT_BCS_BATCH(batch, 0);
468     OUT_BCS_BATCH(batch, 0);
469     OUT_BCS_BATCH(batch, 0);
470     ADVANCE_BCS_BATCH(batch);
471 }
472
473 static void
474 gen8_mfd_avc_qm_state(VADriverContextP ctx,
475                       struct decode_state *decode_state,
476                       struct gen7_mfd_context *gen7_mfd_context)
477 {
478     VAIQMatrixBufferH264 *iq_matrix;
479     VAPictureParameterBufferH264 *pic_param;
480
481     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
482         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
483     else
484         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
485
486     assert(decode_state->pic_param && decode_state->pic_param->buffer);
487     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
488
489     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
490     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
491
492     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
493         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
494         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
495     }
496 }
497
498 static void
499 gen8_mfd_avc_picid_state(VADriverContextP ctx,
500                       struct decode_state *decode_state,
501                       struct gen7_mfd_context *gen7_mfd_context)
502 {
503     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
504
505     BEGIN_BCS_BATCH(batch, 10);
506     OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
507     OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
508     OUT_BCS_BATCH(batch, 0);
509     OUT_BCS_BATCH(batch, 0);
510     OUT_BCS_BATCH(batch, 0);
511     OUT_BCS_BATCH(batch, 0);
512     OUT_BCS_BATCH(batch, 0);
513     OUT_BCS_BATCH(batch, 0);
514     OUT_BCS_BATCH(batch, 0);
515     OUT_BCS_BATCH(batch, 0);
516     ADVANCE_BCS_BATCH(batch);
517 }
518
519 static void
520 gen8_mfd_avc_directmode_state(VADriverContextP ctx,
521                               struct decode_state *decode_state,
522                               VAPictureParameterBufferH264 *pic_param,
523                               VASliceParameterBufferH264 *slice_param,
524                               struct gen7_mfd_context *gen7_mfd_context)
525 {
526     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
527     struct object_surface *obj_surface;
528     GenAvcSurface *gen7_avc_surface;
529     VAPictureH264 *va_pic;
530     int i, j;
531
532     BEGIN_BCS_BATCH(batch, 71);
533     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
534
535     /* reference surfaces 0..15 */
536     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
537         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
538             gen7_mfd_context->reference_surface[i].obj_surface &&
539             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
540
541             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
542             gen7_avc_surface = obj_surface->private_data;
543
544             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
545                           I915_GEM_DOMAIN_INSTRUCTION, 0,
546                           0);
547             OUT_BCS_BATCH(batch, 0);
548         } else {
549             OUT_BCS_BATCH(batch, 0);
550             OUT_BCS_BATCH(batch, 0);
551         }
552     }
553     
554     OUT_BCS_BATCH(batch, 0);
555
556     /* the current decoding frame/field */
557     va_pic = &pic_param->CurrPic;
558     obj_surface = decode_state->render_object;
559     assert(obj_surface->bo && obj_surface->private_data);
560     gen7_avc_surface = obj_surface->private_data;
561
562     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
563                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
564                   0);
565
566     OUT_BCS_BATCH(batch, 0);
567     OUT_BCS_BATCH(batch, 0);
568
569     /* POC List */
570     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
571         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
572             int found = 0;
573
574             assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
575
576             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
577                 va_pic = &pic_param->ReferenceFrames[j];
578                 
579                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
580                     continue;
581
582                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
583                     found = 1;
584                     break;
585                 }
586             }
587
588             assert(found == 1);
589             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
590             
591             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
592             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
593         } else {
594             OUT_BCS_BATCH(batch, 0);
595             OUT_BCS_BATCH(batch, 0);
596         }
597     }
598
599     va_pic = &pic_param->CurrPic;
600     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
601     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
602
603     ADVANCE_BCS_BATCH(batch);
604 }
605
606 static void
607 gen8_mfd_avc_slice_state(VADriverContextP ctx,
608                          VAPictureParameterBufferH264 *pic_param,
609                          VASliceParameterBufferH264 *slice_param,
610                          VASliceParameterBufferH264 *next_slice_param,
611                          struct gen7_mfd_context *gen7_mfd_context)
612 {
613     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
614     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
615     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
616     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
617     int num_ref_idx_l0, num_ref_idx_l1;
618     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
619                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
620     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
621     int slice_type;
622
623     if (slice_param->slice_type == SLICE_TYPE_I ||
624         slice_param->slice_type == SLICE_TYPE_SI) {
625         slice_type = SLICE_TYPE_I;
626     } else if (slice_param->slice_type == SLICE_TYPE_P ||
627                slice_param->slice_type == SLICE_TYPE_SP) {
628         slice_type = SLICE_TYPE_P;
629     } else { 
630         assert(slice_param->slice_type == SLICE_TYPE_B);
631         slice_type = SLICE_TYPE_B;
632     }
633
634     if (slice_type == SLICE_TYPE_I) {
635         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
636         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
637         num_ref_idx_l0 = 0;
638         num_ref_idx_l1 = 0;
639     } else if (slice_type == SLICE_TYPE_P) {
640         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
641         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
642         num_ref_idx_l1 = 0;
643     } else {
644         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
645         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
646     }
647
648     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
649     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
650     slice_ver_pos = first_mb_in_slice / width_in_mbs;
651
652     if (next_slice_param) {
653         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
654         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
655         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
656     } else {
657         next_slice_hor_pos = 0;
658         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
659     }
660
661     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
662     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
663     OUT_BCS_BATCH(batch, slice_type);
664     OUT_BCS_BATCH(batch, 
665                   (num_ref_idx_l1 << 24) |
666                   (num_ref_idx_l0 << 16) |
667                   (slice_param->chroma_log2_weight_denom << 8) |
668                   (slice_param->luma_log2_weight_denom << 0));
669     OUT_BCS_BATCH(batch, 
670                   (slice_param->direct_spatial_mv_pred_flag << 29) |
671                   (slice_param->disable_deblocking_filter_idc << 27) |
672                   (slice_param->cabac_init_idc << 24) |
673                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
674                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
675                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
676     OUT_BCS_BATCH(batch, 
677                   (slice_ver_pos << 24) |
678                   (slice_hor_pos << 16) | 
679                   (first_mb_in_slice << 0));
680     OUT_BCS_BATCH(batch,
681                   (next_slice_ver_pos << 16) |
682                   (next_slice_hor_pos << 0));
683     OUT_BCS_BATCH(batch, 
684                   (next_slice_param == NULL) << 19); /* last slice flag */
685     OUT_BCS_BATCH(batch, 0);
686     OUT_BCS_BATCH(batch, 0);
687     OUT_BCS_BATCH(batch, 0);
688     OUT_BCS_BATCH(batch, 0);
689     ADVANCE_BCS_BATCH(batch);
690 }
691
692 static inline void
693 gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
694                            VAPictureParameterBufferH264 *pic_param,
695                            VASliceParameterBufferH264 *slice_param,
696                            struct gen7_mfd_context *gen7_mfd_context)
697 {
698     gen6_send_avc_ref_idx_state(
699         gen7_mfd_context->base.batch,
700         slice_param,
701         gen7_mfd_context->reference_surface
702     );
703 }
704
705 static void
706 gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
707                                 VAPictureParameterBufferH264 *pic_param,
708                                 VASliceParameterBufferH264 *slice_param,
709                                 struct gen7_mfd_context *gen7_mfd_context)
710 {
711     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
712     int i, j, num_weight_offset_table = 0;
713     short weightoffsets[32 * 6];
714
715     if ((slice_param->slice_type == SLICE_TYPE_P ||
716          slice_param->slice_type == SLICE_TYPE_SP) &&
717         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
718         num_weight_offset_table = 1;
719     }
720     
721     if ((slice_param->slice_type == SLICE_TYPE_B) &&
722         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
723         num_weight_offset_table = 2;
724     }
725
726     for (i = 0; i < num_weight_offset_table; i++) {
727         BEGIN_BCS_BATCH(batch, 98);
728         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
729         OUT_BCS_BATCH(batch, i);
730
731         if (i == 0) {
732             for (j = 0; j < 32; j++) {
733                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
734                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
735                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
736                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
737                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
738                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
739             }
740         } else {
741             for (j = 0; j < 32; j++) {
742                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
743                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
744                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
745                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
746                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
747                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
748             }
749         }
750
751         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
752         ADVANCE_BCS_BATCH(batch);
753     }
754 }
755
756 static void
757 gen8_mfd_avc_bsd_object(VADriverContextP ctx,
758                         VAPictureParameterBufferH264 *pic_param,
759                         VASliceParameterBufferH264 *slice_param,
760                         dri_bo *slice_data_bo,
761                         VASliceParameterBufferH264 *next_slice_param,
762                         struct gen7_mfd_context *gen7_mfd_context)
763 {
764     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
765     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
766                                                             slice_param,
767                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
768
769     /* the input bitsteam format on GEN7 differs from GEN6 */
770     BEGIN_BCS_BATCH(batch, 6);
771     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
772     OUT_BCS_BATCH(batch, 
773                   (slice_param->slice_data_size));
774     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
775     OUT_BCS_BATCH(batch,
776                   (0 << 31) |
777                   (0 << 14) |
778                   (0 << 12) |
779                   (0 << 10) |
780                   (0 << 8));
781     OUT_BCS_BATCH(batch,
782                   ((slice_data_bit_offset >> 3) << 16) |
783                   (1 << 7)  |
784                   (0 << 5)  |
785                   (0 << 4)  |
786                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
787                   (slice_data_bit_offset & 0x7));
788     OUT_BCS_BATCH(batch, 0);
789     ADVANCE_BCS_BATCH(batch);
790 }
791
792 static inline void
793 gen8_mfd_avc_context_init(
794     VADriverContextP         ctx,
795     struct gen7_mfd_context *gen7_mfd_context
796 )
797 {
798     /* Initialize flat scaling lists */
799     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
800 }
801
802 static void
803 gen8_mfd_avc_decode_init(VADriverContextP ctx,
804                          struct decode_state *decode_state,
805                          struct gen7_mfd_context *gen7_mfd_context)
806 {
807     VAPictureParameterBufferH264 *pic_param;
808     VASliceParameterBufferH264 *slice_param;
809     struct i965_driver_data *i965 = i965_driver_data(ctx);
810     struct object_surface *obj_surface;
811     dri_bo *bo;
812     int i, j, enable_avc_ildb = 0;
813     unsigned int width_in_mbs, height_in_mbs;
814
815     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
816         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
817         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
818
819         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
820             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
821             assert((slice_param->slice_type == SLICE_TYPE_I) ||
822                    (slice_param->slice_type == SLICE_TYPE_SI) ||
823                    (slice_param->slice_type == SLICE_TYPE_P) ||
824                    (slice_param->slice_type == SLICE_TYPE_SP) ||
825                    (slice_param->slice_type == SLICE_TYPE_B));
826
827             if (slice_param->disable_deblocking_filter_idc != 1) {
828                 enable_avc_ildb = 1;
829                 break;
830             }
831
832             slice_param++;
833         }
834     }
835
836     assert(decode_state->pic_param && decode_state->pic_param->buffer);
837     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
838     intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
839     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
840     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
841     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
842     assert(height_in_mbs > 0 && height_in_mbs <= 256);
843
844     /* Current decoded picture */
845     obj_surface = decode_state->render_object;
846     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
847     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
848     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
849
850     /* initial uv component for YUV400 case */
851     if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
852          unsigned int uv_offset = obj_surface->width * obj_surface->height; 
853          unsigned int uv_size   = obj_surface->width * obj_surface->height / 2; 
854
855          drm_intel_gem_bo_map_gtt(obj_surface->bo);
856          memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
857          drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
858     }
859
860     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
861
862     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
863     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
864     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
865     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
866
867     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
868     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
869     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
870     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
871
872     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
873     bo = dri_bo_alloc(i965->intel.bufmgr,
874                       "intra row store",
875                       width_in_mbs * 64,
876                       0x1000);
877     assert(bo);
878     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
879     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
880
881     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
882     bo = dri_bo_alloc(i965->intel.bufmgr,
883                       "deblocking filter row store",
884                       width_in_mbs * 64 * 4,
885                       0x1000);
886     assert(bo);
887     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
888     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
889
890     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
891     bo = dri_bo_alloc(i965->intel.bufmgr,
892                       "bsd mpc row store",
893                       width_in_mbs * 64 * 2,
894                       0x1000);
895     assert(bo);
896     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
897     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
898
899     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
900     bo = dri_bo_alloc(i965->intel.bufmgr,
901                       "mpr row store",
902                       width_in_mbs * 64 * 2,
903                       0x1000);
904     assert(bo);
905     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
906     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
907
908     gen7_mfd_context->bitplane_read_buffer.valid = 0;
909 }
910
911 static void
912 gen8_mfd_avc_decode_picture(VADriverContextP ctx,
913                             struct decode_state *decode_state,
914                             struct gen7_mfd_context *gen7_mfd_context)
915 {
916     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
917     VAPictureParameterBufferH264 *pic_param;
918     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
919     dri_bo *slice_data_bo;
920     int i, j;
921
922     assert(decode_state->pic_param && decode_state->pic_param->buffer);
923     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
924     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
925
926     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
927     intel_batchbuffer_emit_mi_flush(batch);
928     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
929     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
930     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
931     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
932     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
933     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
934     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
935
936     for (j = 0; j < decode_state->num_slice_params; j++) {
937         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
938         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
939         slice_data_bo = decode_state->slice_datas[j]->bo;
940         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
941
942         if (j == decode_state->num_slice_params - 1)
943             next_slice_group_param = NULL;
944         else
945             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
946
947         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
948             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
949             assert((slice_param->slice_type == SLICE_TYPE_I) ||
950                    (slice_param->slice_type == SLICE_TYPE_SI) ||
951                    (slice_param->slice_type == SLICE_TYPE_P) ||
952                    (slice_param->slice_type == SLICE_TYPE_SP) ||
953                    (slice_param->slice_type == SLICE_TYPE_B));
954
955             if (i < decode_state->slice_params[j]->num_elements - 1)
956                 next_slice_param = slice_param + 1;
957             else
958                 next_slice_param = next_slice_group_param;
959
960             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
961             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
962             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
963             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
964             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
965             slice_param++;
966         }
967     }
968
969     intel_batchbuffer_end_atomic(batch);
970     intel_batchbuffer_flush(batch);
971 }
972
973 static void
974 gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
975                            struct decode_state *decode_state,
976                            struct gen7_mfd_context *gen7_mfd_context)
977 {
978     VAPictureParameterBufferMPEG2 *pic_param;
979     struct i965_driver_data *i965 = i965_driver_data(ctx);
980     struct object_surface *obj_surface;
981     dri_bo *bo;
982     unsigned int width_in_mbs;
983
984     assert(decode_state->pic_param && decode_state->pic_param->buffer);
985     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
986     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
987
988     mpeg2_set_reference_surfaces(
989         ctx,
990         gen7_mfd_context->reference_surface,
991         decode_state,
992         pic_param
993     );
994
995     /* Current decoded picture */
996     obj_surface = decode_state->render_object;
997     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
998
999     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1000     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1001     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1002     gen7_mfd_context->pre_deblocking_output.valid = 1;
1003
1004     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1005     bo = dri_bo_alloc(i965->intel.bufmgr,
1006                       "bsd mpc row store",
1007                       width_in_mbs * 96,
1008                       0x1000);
1009     assert(bo);
1010     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1011     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1012
1013     gen7_mfd_context->post_deblocking_output.valid = 0;
1014     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1015     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1016     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1017     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1018 }
1019
1020 static void
1021 gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
1022                          struct decode_state *decode_state,
1023                          struct gen7_mfd_context *gen7_mfd_context)
1024 {
1025     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1026     VAPictureParameterBufferMPEG2 *pic_param;
1027     unsigned int slice_concealment_disable_bit = 0;
1028
1029     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1030     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1031
1032     slice_concealment_disable_bit = 1;
1033
1034     BEGIN_BCS_BATCH(batch, 13);
1035     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1036     OUT_BCS_BATCH(batch,
1037                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1038                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1039                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1040                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1041                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1042                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1043                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1044                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1045                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1046                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1047                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1048                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1049     OUT_BCS_BATCH(batch,
1050                   pic_param->picture_coding_type << 9);
1051     OUT_BCS_BATCH(batch,
1052                   (slice_concealment_disable_bit << 31) |
1053                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1054                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1055     OUT_BCS_BATCH(batch, 0);
1056     OUT_BCS_BATCH(batch, 0);
1057     OUT_BCS_BATCH(batch, 0);
1058     OUT_BCS_BATCH(batch, 0);
1059     OUT_BCS_BATCH(batch, 0);
1060     OUT_BCS_BATCH(batch, 0);
1061     OUT_BCS_BATCH(batch, 0);
1062     OUT_BCS_BATCH(batch, 0);
1063     OUT_BCS_BATCH(batch, 0);
1064     ADVANCE_BCS_BATCH(batch);
1065 }
1066
1067 static void
1068 gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
1069                         struct decode_state *decode_state,
1070                         struct gen7_mfd_context *gen7_mfd_context)
1071 {
1072     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1073     int i, j;
1074
1075     /* Update internal QM state */
1076     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1077         VAIQMatrixBufferMPEG2 * const iq_matrix =
1078             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1079
1080         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1081             iq_matrix->load_intra_quantiser_matrix) {
1082             gen_iq_matrix->load_intra_quantiser_matrix =
1083                 iq_matrix->load_intra_quantiser_matrix;
1084             if (iq_matrix->load_intra_quantiser_matrix) {
1085                 for (j = 0; j < 64; j++)
1086                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1087                         iq_matrix->intra_quantiser_matrix[j];
1088             }
1089         }
1090
1091         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1092             iq_matrix->load_non_intra_quantiser_matrix) {
1093             gen_iq_matrix->load_non_intra_quantiser_matrix =
1094                 iq_matrix->load_non_intra_quantiser_matrix;
1095             if (iq_matrix->load_non_intra_quantiser_matrix) {
1096                 for (j = 0; j < 64; j++)
1097                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1098                         iq_matrix->non_intra_quantiser_matrix[j];
1099             }
1100         }
1101     }
1102
1103     /* Commit QM state to HW */
1104     for (i = 0; i < 2; i++) {
1105         unsigned char *qm = NULL;
1106         int qm_type;
1107
1108         if (i == 0) {
1109             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1110                 qm = gen_iq_matrix->intra_quantiser_matrix;
1111                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1112             }
1113         } else {
1114             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1115                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1116                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1117             }
1118         }
1119
1120         if (!qm)
1121             continue;
1122
1123         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1124     }
1125 }
1126
1127 static void
1128 gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1129                           VAPictureParameterBufferMPEG2 *pic_param,
1130                           VASliceParameterBufferMPEG2 *slice_param,
1131                           VASliceParameterBufferMPEG2 *next_slice_param,
1132                           struct gen7_mfd_context *gen7_mfd_context)
1133 {
1134     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1135     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1136     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1137
1138     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1139         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1140         is_field_pic = 1;
1141     is_field_pic_wa = is_field_pic &&
1142         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1143
1144     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1145     hpos0 = slice_param->slice_horizontal_position;
1146
1147     if (next_slice_param == NULL) {
1148         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1149         hpos1 = 0;
1150     } else {
1151         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1152         hpos1 = next_slice_param->slice_horizontal_position;
1153     }
1154
1155     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1156
1157     BEGIN_BCS_BATCH(batch, 5);
1158     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1159     OUT_BCS_BATCH(batch, 
1160                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1161     OUT_BCS_BATCH(batch, 
1162                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1163     OUT_BCS_BATCH(batch,
1164                   hpos0 << 24 |
1165                   vpos0 << 16 |
1166                   mb_count << 8 |
1167                   (next_slice_param == NULL) << 5 |
1168                   (next_slice_param == NULL) << 3 |
1169                   (slice_param->macroblock_offset & 0x7));
1170     OUT_BCS_BATCH(batch,
1171                   (slice_param->quantiser_scale_code << 24) |
1172                   (vpos1 << 8 | hpos1));
1173     ADVANCE_BCS_BATCH(batch);
1174 }
1175
1176 static void
1177 gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1178                               struct decode_state *decode_state,
1179                               struct gen7_mfd_context *gen7_mfd_context)
1180 {
1181     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1182     VAPictureParameterBufferMPEG2 *pic_param;
1183     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1184     dri_bo *slice_data_bo;
1185     int i, j;
1186
1187     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1188     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1189
1190     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1191     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1192     intel_batchbuffer_emit_mi_flush(batch);
1193     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1194     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1195     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1196     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1197     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1198     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1199
1200     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1201         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1202             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1203
1204     for (j = 0; j < decode_state->num_slice_params; j++) {
1205         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1206         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1207         slice_data_bo = decode_state->slice_datas[j]->bo;
1208         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1209
1210         if (j == decode_state->num_slice_params - 1)
1211             next_slice_group_param = NULL;
1212         else
1213             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1214
1215         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1216             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1217
1218             if (i < decode_state->slice_params[j]->num_elements - 1)
1219                 next_slice_param = slice_param + 1;
1220             else
1221                 next_slice_param = next_slice_group_param;
1222
1223             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1224             slice_param++;
1225         }
1226     }
1227
1228     intel_batchbuffer_end_atomic(batch);
1229     intel_batchbuffer_flush(batch);
1230 }
1231
1232 static const int va_to_gen7_vc1_pic_type[5] = {
1233     GEN7_VC1_I_PICTURE,
1234     GEN7_VC1_P_PICTURE,
1235     GEN7_VC1_B_PICTURE,
1236     GEN7_VC1_BI_PICTURE,
1237     GEN7_VC1_P_PICTURE,
1238 };
1239
1240 static const int va_to_gen7_vc1_mv[4] = {
1241     1, /* 1-MV */
1242     2, /* 1-MV half-pel */
1243     3, /* 1-MV half-pef bilinear */
1244     0, /* Mixed MV */
1245 };
1246
1247 static const int b_picture_scale_factor[21] = {
1248     128, 85,  170, 64,  192,
1249     51,  102, 153, 204, 43,
1250     215, 37,  74,  111, 148,
1251     185, 222, 32,  96,  160, 
1252     224,
1253 };
1254
1255 static const int va_to_gen7_vc1_condover[3] = {
1256     0,
1257     2,
1258     3
1259 };
1260
1261 static const int va_to_gen7_vc1_profile[4] = {
1262     GEN7_VC1_SIMPLE_PROFILE,
1263     GEN7_VC1_MAIN_PROFILE,
1264     GEN7_VC1_RESERVED_PROFILE,
1265     GEN7_VC1_ADVANCED_PROFILE
1266 };
1267
1268 static void 
1269 gen8_mfd_free_vc1_surface(void **data)
1270 {
1271     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1272
1273     if (!gen7_vc1_surface)
1274         return;
1275
1276     dri_bo_unreference(gen7_vc1_surface->dmv);
1277     free(gen7_vc1_surface);
1278     *data = NULL;
1279 }
1280
1281 static void
1282 gen8_mfd_init_vc1_surface(VADriverContextP ctx, 
1283                           VAPictureParameterBufferVC1 *pic_param,
1284                           struct object_surface *obj_surface)
1285 {
1286     struct i965_driver_data *i965 = i965_driver_data(ctx);
1287     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1288     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1289     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1290
1291     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
1292
1293     if (!gen7_vc1_surface) {
1294         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1295         assert((obj_surface->size & 0x3f) == 0);
1296         obj_surface->private_data = gen7_vc1_surface;
1297     }
1298
1299     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1300
1301     if (gen7_vc1_surface->dmv == NULL) {
1302         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1303                                              "direct mv w/r buffer",
1304                                              width_in_mbs * height_in_mbs * 64,
1305                                              0x1000);
1306     }
1307 }
1308
1309 static void
1310 gen8_mfd_vc1_decode_init(VADriverContextP ctx,
1311                          struct decode_state *decode_state,
1312                          struct gen7_mfd_context *gen7_mfd_context)
1313 {
1314     VAPictureParameterBufferVC1 *pic_param;
1315     struct i965_driver_data *i965 = i965_driver_data(ctx);
1316     struct object_surface *obj_surface;
1317     dri_bo *bo;
1318     int width_in_mbs;
1319     int picture_type;
1320
1321     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1322     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1323     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1324     picture_type = pic_param->picture_fields.bits.picture_type;
1325  
1326     intel_update_vc1_frame_store_index(ctx,
1327                                        decode_state,
1328                                        pic_param,
1329                                        gen7_mfd_context->reference_surface);
1330
1331     /* Current decoded picture */
1332     obj_surface = decode_state->render_object;
1333     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1334     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1335
1336     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1337     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1338     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1339     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1340
1341     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1342     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1343     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1344     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1345
1346     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1347     bo = dri_bo_alloc(i965->intel.bufmgr,
1348                       "intra row store",
1349                       width_in_mbs * 64,
1350                       0x1000);
1351     assert(bo);
1352     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1353     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1354
1355     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1356     bo = dri_bo_alloc(i965->intel.bufmgr,
1357                       "deblocking filter row store",
1358                       width_in_mbs * 7 * 64,
1359                       0x1000);
1360     assert(bo);
1361     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1362     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1363
1364     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1365     bo = dri_bo_alloc(i965->intel.bufmgr,
1366                       "bsd mpc row store",
1367                       width_in_mbs * 96,
1368                       0x1000);
1369     assert(bo);
1370     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1371     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1372
1373     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1374
1375     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1376     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1377     
1378     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1379         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1380         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1381         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1382         int src_w, src_h;
1383         uint8_t *src = NULL, *dst = NULL;
1384
1385         assert(decode_state->bit_plane->buffer);
1386         src = decode_state->bit_plane->buffer;
1387
1388         bo = dri_bo_alloc(i965->intel.bufmgr,
1389                           "VC-1 Bitplane",
1390                           bitplane_width * height_in_mbs,
1391                           0x1000);
1392         assert(bo);
1393         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1394
1395         dri_bo_map(bo, True);
1396         assert(bo->virtual);
1397         dst = bo->virtual;
1398
1399         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1400             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1401                 int src_index, dst_index;
1402                 int src_shift;
1403                 uint8_t src_value;
1404
1405                 src_index = (src_h * width_in_mbs + src_w) / 2;
1406                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1407                 src_value = ((src[src_index] >> src_shift) & 0xf);
1408
1409                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1410                     src_value |= 0x2;
1411                 }
1412
1413                 dst_index = src_w / 2;
1414                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1415             }
1416
1417             if (src_w & 1)
1418                 dst[src_w / 2] >>= 4;
1419
1420             dst += bitplane_width;
1421         }
1422
1423         dri_bo_unmap(bo);
1424     } else
1425         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1426 }
1427
1428 static void
1429 gen8_mfd_vc1_pic_state(VADriverContextP ctx,
1430                        struct decode_state *decode_state,
1431                        struct gen7_mfd_context *gen7_mfd_context)
1432 {
1433     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1434     VAPictureParameterBufferVC1 *pic_param;
1435     struct object_surface *obj_surface;
1436     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1437     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1438     int unified_mv_mode;
1439     int ref_field_pic_polarity = 0;
1440     int scale_factor = 0;
1441     int trans_ac_y = 0;
1442     int dmv_surface_valid = 0;
1443     int brfd = 0;
1444     int fcm = 0;
1445     int picture_type;
1446     int profile;
1447     int overlap;
1448     int interpolation_mode = 0;
1449
1450     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1451     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1452
1453     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1454     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1455     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1456     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1457     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1458     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1459     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1460     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1461
1462     if (dquant == 0) {
1463         alt_pquant_config = 0;
1464         alt_pquant_edge_mask = 0;
1465     } else if (dquant == 2) {
1466         alt_pquant_config = 1;
1467         alt_pquant_edge_mask = 0xf;
1468     } else {
1469         assert(dquant == 1);
1470         if (dquantfrm == 0) {
1471             alt_pquant_config = 0;
1472             alt_pquant_edge_mask = 0;
1473             alt_pq = 0;
1474         } else {
1475             assert(dquantfrm == 1);
1476             alt_pquant_config = 1;
1477
1478             switch (dqprofile) {
1479             case 3:
1480                 if (dqbilevel == 0) {
1481                     alt_pquant_config = 2;
1482                     alt_pquant_edge_mask = 0;
1483                 } else {
1484                     assert(dqbilevel == 1);
1485                     alt_pquant_config = 3;
1486                     alt_pquant_edge_mask = 0;
1487                 }
1488                 break;
1489                 
1490             case 0:
1491                 alt_pquant_edge_mask = 0xf;
1492                 break;
1493
1494             case 1:
1495                 if (dqdbedge == 3)
1496                     alt_pquant_edge_mask = 0x9;
1497                 else
1498                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1499
1500                 break;
1501
1502             case 2:
1503                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1504                 break;
1505
1506             default:
1507                 assert(0);
1508             }
1509         }
1510     }
1511
1512     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1513         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1514         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1515     } else {
1516         assert(pic_param->mv_fields.bits.mv_mode < 4);
1517         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1518     }
1519
1520     if (pic_param->sequence_fields.bits.interlace == 1 &&
1521         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1522         /* FIXME: calculate reference field picture polarity */
1523         assert(0);
1524         ref_field_pic_polarity = 0;
1525     }
1526
1527     if (pic_param->b_picture_fraction < 21)
1528         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1529
1530     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1531     
1532     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1533         picture_type == GEN7_VC1_I_PICTURE)
1534         picture_type = GEN7_VC1_BI_PICTURE;
1535
1536     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1537         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1538     else {
1539         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1540
1541         /*
1542          * 8.3.6.2.1 Transform Type Selection
1543          * If variable-sized transform coding is not enabled,
1544          * then the 8x8 transform shall be used for all blocks.
1545          * it is also MFX_VC1_PIC_STATE requirement.
1546          */
1547         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1548             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1549             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1550         }
1551     }
1552
1553     if (picture_type == GEN7_VC1_B_PICTURE) {
1554         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1555
1556         obj_surface = decode_state->reference_objects[1];
1557
1558         if (obj_surface)
1559             gen7_vc1_surface = obj_surface->private_data;
1560
1561         if (!gen7_vc1_surface || 
1562             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1563              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1564             dmv_surface_valid = 0;
1565         else
1566             dmv_surface_valid = 1;
1567     }
1568
1569     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1570
1571     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1572         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1573     else {
1574         if (pic_param->picture_fields.bits.top_field_first)
1575             fcm = 2;
1576         else
1577             fcm = 3;
1578     }
1579
1580     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1581         brfd = pic_param->reference_fields.bits.reference_distance;
1582         brfd = (scale_factor * brfd) >> 8;
1583         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1584
1585         if (brfd < 0)
1586             brfd = 0;
1587     }
1588
1589     overlap = 0;
1590     if (profile != GEN7_VC1_ADVANCED_PROFILE){
1591         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1592             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1593             overlap = 1; 
1594         }
1595     }else {
1596         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1597              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1598               overlap = 1; 
1599         }
1600         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1601             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1602              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1603                 overlap = 1; 
1604              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1605                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1606                  overlap = 1;
1607              }
1608         }
1609     } 
1610
1611     assert(pic_param->conditional_overlap_flag < 3);
1612     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1613
1614     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1615         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1616          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1617         interpolation_mode = 9; /* Half-pel bilinear */
1618     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1619              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1620               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1621         interpolation_mode = 1; /* Half-pel bicubic */
1622     else
1623         interpolation_mode = 0; /* Quarter-pel bicubic */
1624
1625     BEGIN_BCS_BATCH(batch, 6);
1626     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1627     OUT_BCS_BATCH(batch,
1628                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1629                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1630     OUT_BCS_BATCH(batch,
1631                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1632                   dmv_surface_valid << 15 |
1633                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1634                   pic_param->rounding_control << 13 |
1635                   pic_param->sequence_fields.bits.syncmarker << 12 |
1636                   interpolation_mode << 8 |
1637                   0 << 7 | /* FIXME: scale up or down ??? */
1638                   pic_param->range_reduction_frame << 6 |
1639                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1640                   overlap << 4 |
1641                   !pic_param->picture_fields.bits.is_first_field << 3 |
1642                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1643     OUT_BCS_BATCH(batch,
1644                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1645                   picture_type << 26 |
1646                   fcm << 24 |
1647                   alt_pq << 16 |
1648                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1649                   scale_factor << 0);
1650     OUT_BCS_BATCH(batch,
1651                   unified_mv_mode << 28 |
1652                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1653                   pic_param->fast_uvmc_flag << 26 |
1654                   ref_field_pic_polarity << 25 |
1655                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1656                   pic_param->reference_fields.bits.reference_distance << 20 |
1657                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1658                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1659                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1660                   alt_pquant_edge_mask << 4 |
1661                   alt_pquant_config << 2 |
1662                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1663                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1664     OUT_BCS_BATCH(batch,
1665                   !!pic_param->bitplane_present.value << 31 |
1666                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1667                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1668                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1669                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1670                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1671                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1672                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1673                   pic_param->mv_fields.bits.mv_table << 20 |
1674                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1675                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1676                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1677                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1678                   pic_param->mb_mode_table << 8 |
1679                   trans_ac_y << 6 |
1680                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1681                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1682                   pic_param->cbp_table << 0);
1683     ADVANCE_BCS_BATCH(batch);
1684 }
1685
1686 static void
1687 gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1688                              struct decode_state *decode_state,
1689                              struct gen7_mfd_context *gen7_mfd_context)
1690 {
1691     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1692     VAPictureParameterBufferVC1 *pic_param;
1693     int intensitycomp_single;
1694
1695     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1696     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1697
1698     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1699     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1700     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1701
1702     BEGIN_BCS_BATCH(batch, 6);
1703     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1704     OUT_BCS_BATCH(batch,
1705                   0 << 14 | /* FIXME: double ??? */
1706                   0 << 12 |
1707                   intensitycomp_single << 10 |
1708                   intensitycomp_single << 8 |
1709                   0 << 4 | /* FIXME: interlace mode */
1710                   0);
1711     OUT_BCS_BATCH(batch,
1712                   pic_param->luma_shift << 16 |
1713                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1714     OUT_BCS_BATCH(batch, 0);
1715     OUT_BCS_BATCH(batch, 0);
1716     OUT_BCS_BATCH(batch, 0);
1717     ADVANCE_BCS_BATCH(batch);
1718 }
1719
1720 static void
1721 gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
1722                               struct decode_state *decode_state,
1723                               struct gen7_mfd_context *gen7_mfd_context)
1724 {
1725     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1726     struct object_surface *obj_surface;
1727     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1728
1729     obj_surface = decode_state->render_object;
1730
1731     if (obj_surface && obj_surface->private_data) {
1732         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1733     }
1734
1735     obj_surface = decode_state->reference_objects[1];
1736
1737     if (obj_surface && obj_surface->private_data) {
1738         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1739     }
1740
1741     BEGIN_BCS_BATCH(batch, 7);
1742     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1743
1744     if (dmv_write_buffer)
1745         OUT_BCS_RELOC(batch, dmv_write_buffer,
1746                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1747                       0);
1748     else
1749         OUT_BCS_BATCH(batch, 0);
1750
1751     OUT_BCS_BATCH(batch, 0);
1752     OUT_BCS_BATCH(batch, 0);
1753
1754     if (dmv_read_buffer)
1755         OUT_BCS_RELOC(batch, dmv_read_buffer,
1756                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1757                       0);
1758     else
1759         OUT_BCS_BATCH(batch, 0);
1760     
1761     OUT_BCS_BATCH(batch, 0);
1762     OUT_BCS_BATCH(batch, 0);
1763                   
1764     ADVANCE_BCS_BATCH(batch);
1765 }
1766
1767 static int
1768 gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1769 {
1770     int out_slice_data_bit_offset;
1771     int slice_header_size = in_slice_data_bit_offset / 8;
1772     int i, j;
1773
1774     if (profile != 3)
1775         out_slice_data_bit_offset = in_slice_data_bit_offset;
1776     else {
1777         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1778             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1779                 i++, j += 2;
1780             }
1781         }
1782
1783         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1784     }
1785
1786     return out_slice_data_bit_offset;
1787 }
1788
1789 static void
1790 gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
1791                         VAPictureParameterBufferVC1 *pic_param,
1792                         VASliceParameterBufferVC1 *slice_param,
1793                         VASliceParameterBufferVC1 *next_slice_param,
1794                         dri_bo *slice_data_bo,
1795                         struct gen7_mfd_context *gen7_mfd_context)
1796 {
1797     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1798     int next_slice_start_vert_pos;
1799     int macroblock_offset;
1800     uint8_t *slice_data = NULL;
1801
1802     dri_bo_map(slice_data_bo, 0);
1803     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1804     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1805                                                                slice_param->macroblock_offset,
1806                                                                pic_param->sequence_fields.bits.profile);
1807     dri_bo_unmap(slice_data_bo);
1808
1809     if (next_slice_param)
1810         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1811     else
1812         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1813
1814     BEGIN_BCS_BATCH(batch, 5);
1815     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1816     OUT_BCS_BATCH(batch, 
1817                   slice_param->slice_data_size - (macroblock_offset >> 3));
1818     OUT_BCS_BATCH(batch, 
1819                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1820     OUT_BCS_BATCH(batch,
1821                   slice_param->slice_vertical_position << 16 |
1822                   next_slice_start_vert_pos << 0);
1823     OUT_BCS_BATCH(batch,
1824                   (macroblock_offset & 0x7));
1825     ADVANCE_BCS_BATCH(batch);
1826 }
1827
1828 static void
1829 gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
1830                             struct decode_state *decode_state,
1831                             struct gen7_mfd_context *gen7_mfd_context)
1832 {
1833     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1834     VAPictureParameterBufferVC1 *pic_param;
1835     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1836     dri_bo *slice_data_bo;
1837     int i, j;
1838
1839     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1840     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1841
1842     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1843     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1844     intel_batchbuffer_emit_mi_flush(batch);
1845     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1846     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1847     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1848     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1849     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1850     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1851     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1852
1853     for (j = 0; j < decode_state->num_slice_params; j++) {
1854         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1855         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1856         slice_data_bo = decode_state->slice_datas[j]->bo;
1857         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1858
1859         if (j == decode_state->num_slice_params - 1)
1860             next_slice_group_param = NULL;
1861         else
1862             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1863
1864         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1865             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1866
1867             if (i < decode_state->slice_params[j]->num_elements - 1)
1868                 next_slice_param = slice_param + 1;
1869             else
1870                 next_slice_param = next_slice_group_param;
1871
1872             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1873             slice_param++;
1874         }
1875     }
1876
1877     intel_batchbuffer_end_atomic(batch);
1878     intel_batchbuffer_flush(batch);
1879 }
1880
1881 static void
1882 gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
1883                           struct decode_state *decode_state,
1884                           struct gen7_mfd_context *gen7_mfd_context)
1885 {
1886     struct object_surface *obj_surface;
1887     VAPictureParameterBufferJPEGBaseline *pic_param;
1888     int subsampling = SUBSAMPLE_YUV420;
1889     int fourcc = VA_FOURCC('I', 'M', 'C', '3');
1890
1891     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1892
1893     if (pic_param->num_components == 1)
1894         subsampling = SUBSAMPLE_YUV400;
1895     else if (pic_param->num_components == 3) {
1896         int h1 = pic_param->components[0].h_sampling_factor;
1897         int h2 = pic_param->components[1].h_sampling_factor;
1898         int h3 = pic_param->components[2].h_sampling_factor;
1899         int v1 = pic_param->components[0].v_sampling_factor;
1900         int v2 = pic_param->components[1].v_sampling_factor;
1901         int v3 = pic_param->components[2].v_sampling_factor;
1902
1903         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1904             v1 == 2 && v2 == 1 && v3 == 1) {
1905             subsampling = SUBSAMPLE_YUV420;
1906             fourcc = VA_FOURCC('I', 'M', 'C', '3');
1907         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1908                    v1 == 1 && v2 == 1 && v3 == 1) {
1909             subsampling = SUBSAMPLE_YUV422H;
1910             fourcc = VA_FOURCC('4', '2', '2', 'H');
1911         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1912                    v1 == 1 && v2 == 1 && v3 == 1) {
1913             subsampling = SUBSAMPLE_YUV444;
1914             fourcc = VA_FOURCC('4', '4', '4', 'P');
1915         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1916                    v1 == 1 && v2 == 1 && v3 == 1) {
1917             subsampling = SUBSAMPLE_YUV411;
1918             fourcc = VA_FOURCC('4', '1', '1', 'P');
1919         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1920                    v1 == 2 && v2 == 1 && v3 == 1) {
1921             subsampling = SUBSAMPLE_YUV422V;
1922             fourcc = VA_FOURCC('4', '2', '2', 'V');
1923         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1924                    v1 == 2 && v2 == 2 && v3 == 2) {
1925             subsampling = SUBSAMPLE_YUV422H;
1926             fourcc = VA_FOURCC('4', '2', '2', 'H');
1927         } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1928                    v1 == 2 && v2 == 1 && v3 == 1) {
1929             subsampling = SUBSAMPLE_YUV422V;
1930             fourcc = VA_FOURCC('4', '2', '2', 'V');
1931         } else
1932             assert(0);
1933     }
1934     else {
1935         assert(0);
1936     }
1937
1938     /* Current decoded picture */
1939     obj_surface = decode_state->render_object;
1940     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1941
1942     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1943     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1944     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1945     gen7_mfd_context->pre_deblocking_output.valid = 1;
1946
1947     gen7_mfd_context->post_deblocking_output.bo = NULL;
1948     gen7_mfd_context->post_deblocking_output.valid = 0;
1949
1950     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1951     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1952
1953     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1954     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1955
1956     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1957     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1958
1959     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1960     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1961
1962     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1963     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1964 }
1965
1966 static const int va_to_gen7_jpeg_rotation[4] = {
1967     GEN7_JPEG_ROTATION_0,
1968     GEN7_JPEG_ROTATION_90,
1969     GEN7_JPEG_ROTATION_180,
1970     GEN7_JPEG_ROTATION_270
1971 };
1972
1973 static void
1974 gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
1975                         struct decode_state *decode_state,
1976                         struct gen7_mfd_context *gen7_mfd_context)
1977 {
1978     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1979     VAPictureParameterBufferJPEGBaseline *pic_param;
1980     int chroma_type = GEN7_YUV420;
1981     int frame_width_in_blks;
1982     int frame_height_in_blks;
1983
1984     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1985     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1986
1987     if (pic_param->num_components == 1)
1988         chroma_type = GEN7_YUV400;
1989     else if (pic_param->num_components == 3) {
1990         int h1 = pic_param->components[0].h_sampling_factor;
1991         int h2 = pic_param->components[1].h_sampling_factor;
1992         int h3 = pic_param->components[2].h_sampling_factor;
1993         int v1 = pic_param->components[0].v_sampling_factor;
1994         int v2 = pic_param->components[1].v_sampling_factor;
1995         int v3 = pic_param->components[2].v_sampling_factor;
1996
1997         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1998             v1 == 2 && v2 == 1 && v3 == 1)
1999             chroma_type = GEN7_YUV420;
2000         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2001                  v1 == 1 && v2 == 1 && v3 == 1)
2002             chroma_type = GEN7_YUV422H_2Y;
2003         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2004                  v1 == 1 && v2 == 1 && v3 == 1)
2005             chroma_type = GEN7_YUV444;
2006         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2007                  v1 == 1 && v2 == 1 && v3 == 1)
2008             chroma_type = GEN7_YUV411;
2009         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2010                  v1 == 2 && v2 == 1 && v3 == 1)
2011             chroma_type = GEN7_YUV422V_2Y;
2012         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2013                  v1 == 2 && v2 == 2 && v3 == 2)
2014             chroma_type = GEN7_YUV422H_4Y;
2015         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2016                  v1 == 2 && v2 == 1 && v3 == 1)
2017             chroma_type = GEN7_YUV422V_4Y;
2018         else
2019             assert(0);
2020     }
2021
2022     if (chroma_type == GEN7_YUV400 ||
2023         chroma_type == GEN7_YUV444 ||
2024         chroma_type == GEN7_YUV422V_2Y) {
2025         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2026         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2027     } else if (chroma_type == GEN7_YUV411) {
2028         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2029         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2030     } else {
2031         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2032         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2033     }
2034
2035     BEGIN_BCS_BATCH(batch, 3);
2036     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2037     OUT_BCS_BATCH(batch,
2038                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2039                   (chroma_type << 0));
2040     OUT_BCS_BATCH(batch,
2041                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2042                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2043     ADVANCE_BCS_BATCH(batch);
2044 }
2045
2046 static const int va_to_gen7_jpeg_hufftable[2] = {
2047     MFX_HUFFTABLE_ID_Y,
2048     MFX_HUFFTABLE_ID_UV
2049 };
2050
2051 static void
2052 gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2053                                struct decode_state *decode_state,
2054                                struct gen7_mfd_context *gen7_mfd_context,
2055                                int num_tables)
2056 {
2057     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2058     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2059     int index;
2060
2061     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2062         return;
2063
2064     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2065
2066     for (index = 0; index < num_tables; index++) {
2067         int id = va_to_gen7_jpeg_hufftable[index];
2068         if (!huffman_table->load_huffman_table[index])
2069             continue;
2070         BEGIN_BCS_BATCH(batch, 53);
2071         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2072         OUT_BCS_BATCH(batch, id);
2073         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2074         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2075         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2076         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2077         ADVANCE_BCS_BATCH(batch);
2078     }
2079 }
2080
2081 static const int va_to_gen7_jpeg_qm[5] = {
2082     -1,
2083     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2084     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2085     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2086     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2087 };
2088
2089 static void
2090 gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
2091                        struct decode_state *decode_state,
2092                        struct gen7_mfd_context *gen7_mfd_context)
2093 {
2094     VAPictureParameterBufferJPEGBaseline *pic_param;
2095     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2096     int index;
2097
2098     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2099         return;
2100
2101     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2102     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2103
2104     assert(pic_param->num_components <= 3);
2105
2106     for (index = 0; index < pic_param->num_components; index++) {
2107         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2108         int qm_type;
2109         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2110         unsigned char raster_qm[64];
2111         int j;
2112
2113         if (id > 4 || id < 1)
2114             continue;
2115
2116         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2117             continue;
2118
2119         qm_type = va_to_gen7_jpeg_qm[id];
2120
2121         for (j = 0; j < 64; j++)
2122             raster_qm[zigzag_direct[j]] = qm[j];
2123
2124         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2125     }
2126 }
2127
2128 static void
2129 gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
2130                          VAPictureParameterBufferJPEGBaseline *pic_param,
2131                          VASliceParameterBufferJPEGBaseline *slice_param,
2132                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2133                          dri_bo *slice_data_bo,
2134                          struct gen7_mfd_context *gen7_mfd_context)
2135 {
2136     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2137     int scan_component_mask = 0;
2138     int i;
2139
2140     assert(slice_param->num_components > 0);
2141     assert(slice_param->num_components < 4);
2142     assert(slice_param->num_components <= pic_param->num_components);
2143
2144     for (i = 0; i < slice_param->num_components; i++) {
2145         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2146         case 1:
2147             scan_component_mask |= (1 << 0);
2148             break;
2149         case 2:
2150             scan_component_mask |= (1 << 1);
2151             break;
2152         case 3:
2153             scan_component_mask |= (1 << 2);
2154             break;
2155         default:
2156             assert(0);
2157             break;
2158         }
2159     }
2160
2161     BEGIN_BCS_BATCH(batch, 6);
2162     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2163     OUT_BCS_BATCH(batch, 
2164                   slice_param->slice_data_size);
2165     OUT_BCS_BATCH(batch, 
2166                   slice_param->slice_data_offset);
2167     OUT_BCS_BATCH(batch,
2168                   slice_param->slice_horizontal_position << 16 |
2169                   slice_param->slice_vertical_position << 0);
2170     OUT_BCS_BATCH(batch,
2171                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2172                   (scan_component_mask << 27) |                 /* scan components */
2173                   (0 << 26) |   /* disable interrupt allowed */
2174                   (slice_param->num_mcus << 0));                /* MCU count */
2175     OUT_BCS_BATCH(batch,
2176                   (slice_param->restart_interval << 0));    /* RestartInterval */
2177     ADVANCE_BCS_BATCH(batch);
2178 }
2179
2180 /* Workaround for JPEG decoding on Ivybridge */
2181 #ifdef JPEG_WA
2182
2183 VAStatus 
2184 i965_DestroySurfaces(VADriverContextP ctx,
2185                      VASurfaceID *surface_list,
2186                      int num_surfaces);
2187 VAStatus 
2188 i965_CreateSurfaces(VADriverContextP ctx,
2189                     int width,
2190                     int height,
2191                     int format,
2192                     int num_surfaces,
2193                     VASurfaceID *surfaces);
2194
2195 static struct {
2196     int width;
2197     int height;
2198     unsigned char data[32];
2199     int data_size;
2200     int data_bit_offset;
2201     int qp;
2202 } gen7_jpeg_wa_clip = {
2203     16,
2204     16,
2205     {
2206         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2207         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2208     },
2209     14,
2210     40,
2211     28,
2212 };
2213
2214 static void
2215 gen8_jpeg_wa_init(VADriverContextP ctx,
2216                   struct gen7_mfd_context *gen7_mfd_context)
2217 {
2218     struct i965_driver_data *i965 = i965_driver_data(ctx);
2219     VAStatus status;
2220     struct object_surface *obj_surface;
2221
2222     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2223         i965_DestroySurfaces(ctx,
2224                              &gen7_mfd_context->jpeg_wa_surface_id,
2225                              1);
2226
2227     status = i965_CreateSurfaces(ctx,
2228                                  gen7_jpeg_wa_clip.width,
2229                                  gen7_jpeg_wa_clip.height,
2230                                  VA_RT_FORMAT_YUV420,
2231                                  1,
2232                                  &gen7_mfd_context->jpeg_wa_surface_id);
2233     assert(status == VA_STATUS_SUCCESS);
2234
2235     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2236     assert(obj_surface);
2237     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
2238     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2239
2240     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2241         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2242                                                                "JPEG WA data",
2243                                                                0x1000,
2244                                                                0x1000);
2245         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2246                        0,
2247                        gen7_jpeg_wa_clip.data_size,
2248                        gen7_jpeg_wa_clip.data);
2249     }
2250 }
2251
2252 static void
2253 gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2254                               struct gen7_mfd_context *gen7_mfd_context)
2255 {
2256     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2257
2258     BEGIN_BCS_BATCH(batch, 5);
2259     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2260     OUT_BCS_BATCH(batch,
2261                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2262                   (MFD_MODE_VLD << 15) | /* VLD mode */
2263                   (0 << 10) | /* disable Stream-Out */
2264                   (0 << 9)  | /* Post Deblocking Output */
2265                   (1 << 8)  | /* Pre Deblocking Output */
2266                   (0 << 5)  | /* not in stitch mode */
2267                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2268                   (MFX_FORMAT_AVC << 0));
2269     OUT_BCS_BATCH(batch,
2270                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2271                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2272                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2273                   (0 << 1)  |
2274                   (0 << 0));
2275     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2276     OUT_BCS_BATCH(batch, 0); /* reserved */
2277     ADVANCE_BCS_BATCH(batch);
2278 }
2279
2280 static void
2281 gen8_jpeg_wa_surface_state(VADriverContextP ctx,
2282                            struct gen7_mfd_context *gen7_mfd_context)
2283 {
2284     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2285     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2286
2287     BEGIN_BCS_BATCH(batch, 6);
2288     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2289     OUT_BCS_BATCH(batch, 0);
2290     OUT_BCS_BATCH(batch,
2291                   ((obj_surface->orig_width - 1) << 18) |
2292                   ((obj_surface->orig_height - 1) << 4));
2293     OUT_BCS_BATCH(batch,
2294                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2295                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2296                   (0 << 22) | /* surface object control state, ignored */
2297                   ((obj_surface->width - 1) << 3) | /* pitch */
2298                   (0 << 2)  | /* must be 0 */
2299                   (1 << 1)  | /* must be tiled */
2300                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2301     OUT_BCS_BATCH(batch,
2302                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2303                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2304     OUT_BCS_BATCH(batch,
2305                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2306                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2307     ADVANCE_BCS_BATCH(batch);
2308 }
2309
2310 static void
2311 gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2312                                  struct gen7_mfd_context *gen7_mfd_context)
2313 {
2314     struct i965_driver_data *i965 = i965_driver_data(ctx);
2315     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2316     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2317     dri_bo *intra_bo;
2318     int i;
2319
2320     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2321                             "intra row store",
2322                             128 * 64,
2323                             0x1000);
2324
2325     BEGIN_BCS_BATCH(batch, 61);
2326     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2327     OUT_BCS_RELOC(batch,
2328                   obj_surface->bo,
2329                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2330                   0);
2331         OUT_BCS_BATCH(batch, 0);
2332         OUT_BCS_BATCH(batch, 0);
2333     
2334
2335     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2336         OUT_BCS_BATCH(batch, 0);
2337         OUT_BCS_BATCH(batch, 0);
2338
2339         /* uncompressed-video & stream out 7-12 */
2340     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2341     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2342         OUT_BCS_BATCH(batch, 0);
2343         OUT_BCS_BATCH(batch, 0);
2344         OUT_BCS_BATCH(batch, 0);
2345         OUT_BCS_BATCH(batch, 0);
2346
2347         /* the DW 13-15 is for intra row store scratch */
2348     OUT_BCS_RELOC(batch,
2349                   intra_bo,
2350                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2351                   0);
2352         OUT_BCS_BATCH(batch, 0);
2353         OUT_BCS_BATCH(batch, 0);
2354
2355         /* the DW 16-18 is for deblocking filter */ 
2356     OUT_BCS_BATCH(batch, 0);
2357         OUT_BCS_BATCH(batch, 0);
2358         OUT_BCS_BATCH(batch, 0);
2359
2360     /* DW 19..50 */
2361     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2362         OUT_BCS_BATCH(batch, 0);
2363         OUT_BCS_BATCH(batch, 0);
2364     }
2365     OUT_BCS_BATCH(batch, 0);
2366
2367         /* the DW52-54 is for mb status address */
2368     OUT_BCS_BATCH(batch, 0);
2369         OUT_BCS_BATCH(batch, 0);
2370         OUT_BCS_BATCH(batch, 0);
2371         /* the DW56-60 is for ILDB & second ILDB address */
2372     OUT_BCS_BATCH(batch, 0);
2373         OUT_BCS_BATCH(batch, 0);
2374         OUT_BCS_BATCH(batch, 0);
2375     OUT_BCS_BATCH(batch, 0);
2376         OUT_BCS_BATCH(batch, 0);
2377         OUT_BCS_BATCH(batch, 0);
2378
2379     ADVANCE_BCS_BATCH(batch);
2380
2381     dri_bo_unreference(intra_bo);
2382 }
2383
2384 static void
2385 gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2386                                      struct gen7_mfd_context *gen7_mfd_context)
2387 {
2388     struct i965_driver_data *i965 = i965_driver_data(ctx);
2389     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2390     dri_bo *bsd_mpc_bo, *mpr_bo;
2391
2392     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2393                               "bsd mpc row store",
2394                               11520, /* 1.5 * 120 * 64 */
2395                               0x1000);
2396
2397     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2398                           "mpr row store",
2399                           7680, /* 1. 0 * 120 * 64 */
2400                           0x1000);
2401
2402     BEGIN_BCS_BATCH(batch, 10);
2403     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2404
2405     OUT_BCS_RELOC(batch,
2406                   bsd_mpc_bo,
2407                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2408                   0);
2409
2410     OUT_BCS_BATCH(batch, 0);
2411     OUT_BCS_BATCH(batch, 0);
2412
2413     OUT_BCS_RELOC(batch,
2414                   mpr_bo,
2415                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2416                   0);
2417     OUT_BCS_BATCH(batch, 0);
2418     OUT_BCS_BATCH(batch, 0);
2419
2420     OUT_BCS_BATCH(batch, 0);
2421     OUT_BCS_BATCH(batch, 0);
2422     OUT_BCS_BATCH(batch, 0);
2423
2424     ADVANCE_BCS_BATCH(batch);
2425
2426     dri_bo_unreference(bsd_mpc_bo);
2427     dri_bo_unreference(mpr_bo);
2428 }
2429
2430 static void
2431 gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2432                           struct gen7_mfd_context *gen7_mfd_context)
2433 {
2434
2435 }
2436
2437 static void
2438 gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
2439                            struct gen7_mfd_context *gen7_mfd_context)
2440 {
2441     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2442     int img_struct = 0;
2443     int mbaff_frame_flag = 0;
2444     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2445
2446     BEGIN_BCS_BATCH(batch, 16);
2447     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2448     OUT_BCS_BATCH(batch, 
2449                   width_in_mbs * height_in_mbs);
2450     OUT_BCS_BATCH(batch, 
2451                   ((height_in_mbs - 1) << 16) | 
2452                   ((width_in_mbs - 1) << 0));
2453     OUT_BCS_BATCH(batch, 
2454                   (0 << 24) |
2455                   (0 << 16) |
2456                   (0 << 14) |
2457                   (0 << 13) |
2458                   (0 << 12) | /* differ from GEN6 */
2459                   (0 << 10) |
2460                   (img_struct << 8));
2461     OUT_BCS_BATCH(batch,
2462                   (1 << 10) | /* 4:2:0 */
2463                   (1 << 7) |  /* CABAC */
2464                   (0 << 6) |
2465                   (0 << 5) |
2466                   (0 << 4) |
2467                   (0 << 3) |
2468                   (1 << 2) |
2469                   (mbaff_frame_flag << 1) |
2470                   (0 << 0));
2471     OUT_BCS_BATCH(batch, 0);
2472     OUT_BCS_BATCH(batch, 0);
2473     OUT_BCS_BATCH(batch, 0);
2474     OUT_BCS_BATCH(batch, 0);
2475     OUT_BCS_BATCH(batch, 0);
2476     OUT_BCS_BATCH(batch, 0);
2477     OUT_BCS_BATCH(batch, 0);
2478     OUT_BCS_BATCH(batch, 0);
2479     OUT_BCS_BATCH(batch, 0);
2480     OUT_BCS_BATCH(batch, 0);
2481     OUT_BCS_BATCH(batch, 0);
2482     ADVANCE_BCS_BATCH(batch);
2483 }
2484
2485 static void
2486 gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2487                                   struct gen7_mfd_context *gen7_mfd_context)
2488 {
2489     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2490     int i;
2491
2492     BEGIN_BCS_BATCH(batch, 71);
2493     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2494
2495     /* reference surfaces 0..15 */
2496     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2497         OUT_BCS_BATCH(batch, 0); /* top */
2498         OUT_BCS_BATCH(batch, 0); /* bottom */
2499     }
2500         
2501         OUT_BCS_BATCH(batch, 0);
2502
2503     /* the current decoding frame/field */
2504     OUT_BCS_BATCH(batch, 0); /* top */
2505     OUT_BCS_BATCH(batch, 0);
2506     OUT_BCS_BATCH(batch, 0);
2507
2508     /* POC List */
2509     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2510         OUT_BCS_BATCH(batch, 0);
2511         OUT_BCS_BATCH(batch, 0);
2512     }
2513
2514     OUT_BCS_BATCH(batch, 0);
2515     OUT_BCS_BATCH(batch, 0);
2516
2517     ADVANCE_BCS_BATCH(batch);
2518 }
2519
2520 static void
2521 gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2522                                      struct gen7_mfd_context *gen7_mfd_context)
2523 {
2524     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2525
2526     BEGIN_BCS_BATCH(batch, 11);
2527     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2528     OUT_BCS_RELOC(batch,
2529                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2530                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2531                   0);
2532     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2533     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2534     OUT_BCS_BATCH(batch, 0);
2535     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2536     OUT_BCS_BATCH(batch, 0);
2537     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2538     OUT_BCS_BATCH(batch, 0);
2539     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2540     OUT_BCS_BATCH(batch, 0);
2541     ADVANCE_BCS_BATCH(batch);
2542 }
2543
2544 static void
2545 gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2546                             struct gen7_mfd_context *gen7_mfd_context)
2547 {
2548     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2549
2550     /* the input bitsteam format on GEN7 differs from GEN6 */
2551     BEGIN_BCS_BATCH(batch, 6);
2552     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2553     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2554     OUT_BCS_BATCH(batch, 0);
2555     OUT_BCS_BATCH(batch,
2556                   (0 << 31) |
2557                   (0 << 14) |
2558                   (0 << 12) |
2559                   (0 << 10) |
2560                   (0 << 8));
2561     OUT_BCS_BATCH(batch,
2562                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2563                   (0 << 5)  |
2564                   (0 << 4)  |
2565                   (1 << 3) | /* LastSlice Flag */
2566                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2567     OUT_BCS_BATCH(batch, 0);
2568     ADVANCE_BCS_BATCH(batch);
2569 }
2570
2571 static void
2572 gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2573                              struct gen7_mfd_context *gen7_mfd_context)
2574 {
2575     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2576     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2577     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2578     int first_mb_in_slice = 0;
2579     int slice_type = SLICE_TYPE_I;
2580
2581     BEGIN_BCS_BATCH(batch, 11);
2582     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2583     OUT_BCS_BATCH(batch, slice_type);
2584     OUT_BCS_BATCH(batch, 
2585                   (num_ref_idx_l1 << 24) |
2586                   (num_ref_idx_l0 << 16) |
2587                   (0 << 8) |
2588                   (0 << 0));
2589     OUT_BCS_BATCH(batch, 
2590                   (0 << 29) |
2591                   (1 << 27) |   /* disable Deblocking */
2592                   (0 << 24) |
2593                   (gen7_jpeg_wa_clip.qp << 16) |
2594                   (0 << 8) |
2595                   (0 << 0));
2596     OUT_BCS_BATCH(batch, 
2597                   (slice_ver_pos << 24) |
2598                   (slice_hor_pos << 16) | 
2599                   (first_mb_in_slice << 0));
2600     OUT_BCS_BATCH(batch,
2601                   (next_slice_ver_pos << 16) |
2602                   (next_slice_hor_pos << 0));
2603     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2604     OUT_BCS_BATCH(batch, 0);
2605     OUT_BCS_BATCH(batch, 0);
2606     OUT_BCS_BATCH(batch, 0);
2607     OUT_BCS_BATCH(batch, 0);
2608     ADVANCE_BCS_BATCH(batch);
2609 }
2610
2611 static void
2612 gen8_mfd_jpeg_wa(VADriverContextP ctx,
2613                  struct gen7_mfd_context *gen7_mfd_context)
2614 {
2615     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2616     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
2617     intel_batchbuffer_emit_mi_flush(batch);
2618     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2619     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2620     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2621     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2622     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2623     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2624     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2625
2626     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2627     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2628     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2629 }
2630
2631 #endif
2632
2633 void
2634 gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
2635                              struct decode_state *decode_state,
2636                              struct gen7_mfd_context *gen7_mfd_context)
2637 {
2638     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2639     VAPictureParameterBufferJPEGBaseline *pic_param;
2640     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2641     dri_bo *slice_data_bo;
2642     int i, j, max_selector = 0;
2643
2644     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2645     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2646
2647     /* Currently only support Baseline DCT */
2648     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2649     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2650 #ifdef JPEG_WA
2651     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
2652 #endif
2653     intel_batchbuffer_emit_mi_flush(batch);
2654     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2655     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2656     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2657     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2658     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2659
2660     for (j = 0; j < decode_state->num_slice_params; j++) {
2661         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2662         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2663         slice_data_bo = decode_state->slice_datas[j]->bo;
2664         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2665
2666         if (j == decode_state->num_slice_params - 1)
2667             next_slice_group_param = NULL;
2668         else
2669             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2670
2671         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2672             int component;
2673
2674             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2675
2676             if (i < decode_state->slice_params[j]->num_elements - 1)
2677                 next_slice_param = slice_param + 1;
2678             else
2679                 next_slice_param = next_slice_group_param;
2680
2681             for (component = 0; component < slice_param->num_components; component++) {
2682                 if (max_selector < slice_param->components[component].dc_table_selector)
2683                     max_selector = slice_param->components[component].dc_table_selector;
2684
2685                 if (max_selector < slice_param->components[component].ac_table_selector)
2686                     max_selector = slice_param->components[component].ac_table_selector;
2687             }
2688
2689             slice_param++;
2690         }
2691     }
2692
2693     assert(max_selector < 2);
2694     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2695
2696     for (j = 0; j < decode_state->num_slice_params; j++) {
2697         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2698         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2699         slice_data_bo = decode_state->slice_datas[j]->bo;
2700         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2701
2702         if (j == decode_state->num_slice_params - 1)
2703             next_slice_group_param = NULL;
2704         else
2705             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2706
2707         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2708             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2709
2710             if (i < decode_state->slice_params[j]->num_elements - 1)
2711                 next_slice_param = slice_param + 1;
2712             else
2713                 next_slice_param = next_slice_group_param;
2714
2715             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2716             slice_param++;
2717         }
2718     }
2719
2720     intel_batchbuffer_end_atomic(batch);
2721     intel_batchbuffer_flush(batch);
2722 }
2723
2724 static const int vp8_dc_qlookup[128] =
2725 {
2726       4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
2727      18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
2728      29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
2729      44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
2730      59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
2731      75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
2732      91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
2733     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
2734 };
2735
2736 static const int vp8_ac_qlookup[128] =
2737 {
2738       4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
2739      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
2740      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
2741      52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
2742      78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
2743     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
2744     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
2745     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
2746 };
2747
2748 static inline unsigned int vp8_clip_quantization_index(int index)
2749 {
2750     if(index > 127)
2751         return 127;
2752     else if(index <0)
2753         return 0;
2754
2755     return index;
2756 }
2757
2758 static void
2759 gen8_mfd_vp8_decode_init(VADriverContextP ctx,
2760                           struct decode_state *decode_state,
2761                           struct gen7_mfd_context *gen7_mfd_context)
2762 {
2763     struct object_surface *obj_surface;
2764     struct i965_driver_data *i965 = i965_driver_data(ctx);
2765     dri_bo *bo;
2766     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2767     int width_in_mbs = (pic_param->frame_width + 15) / 16;
2768     int height_in_mbs = (pic_param->frame_height + 15) / 16;
2769
2770     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
2771     assert(height_in_mbs > 0 && height_in_mbs <= 256);
2772
2773     /* Current decoded picture */
2774     obj_surface = decode_state->render_object;
2775     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
2776
2777     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2778     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
2779     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
2780     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
2781
2782     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2783     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2784     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2785     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
2786
2787     /* The same as AVC */
2788     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2789     bo = dri_bo_alloc(i965->intel.bufmgr,
2790                       "intra row store",
2791                       width_in_mbs * 64,
2792                       0x1000);
2793     assert(bo);
2794     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
2795     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
2796
2797     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2798     bo = dri_bo_alloc(i965->intel.bufmgr,
2799                       "deblocking filter row store",
2800                       width_in_mbs * 64 * 4,
2801                       0x1000);
2802     assert(bo);
2803     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
2804     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
2805
2806     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2807     bo = dri_bo_alloc(i965->intel.bufmgr,
2808                       "bsd mpc row store",
2809                       width_in_mbs * 64 * 2,
2810                       0x1000);
2811     assert(bo);
2812     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
2813     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
2814
2815     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2816     bo = dri_bo_alloc(i965->intel.bufmgr,
2817                       "mpr row store",
2818                       width_in_mbs * 64 * 2,
2819                       0x1000);
2820     assert(bo);
2821     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
2822     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
2823
2824     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2825 }
2826
2827 static void
2828 gen8_mfd_vp8_pic_state(VADriverContextP ctx,
2829                        struct decode_state *decode_state,
2830                        struct gen7_mfd_context *gen7_mfd_context)
2831 {
2832     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2833     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
2834     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
2835     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
2836     dri_bo *probs_bo = decode_state->probability_data->bo;
2837     int i, j,log2num;
2838     unsigned int quantization_value[4][6];
2839
2840     log2num = (int)log2(slice_param->num_of_partitions - 1);
2841
2842     BEGIN_BCS_BATCH(batch, 38);
2843     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
2844     OUT_BCS_BATCH(batch,
2845                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
2846                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
2847     OUT_BCS_BATCH(batch,
2848                   log2num << 24 |
2849                   pic_param->pic_fields.bits.sharpness_level << 16 |
2850                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
2851                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
2852                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
2853                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
2854                   pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
2855                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
2856                   0 << 7 | /* segmentation id streamin disabled */
2857                   0 << 6 | /* segmentation id streamout disabled */
2858                   pic_param->pic_fields.bits.key_frame << 5 |
2859                   pic_param->pic_fields.bits.filter_type << 4 |
2860                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
2861                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
2862
2863     OUT_BCS_BATCH(batch,
2864                   pic_param->loop_filter_level[3] << 24 |
2865                   pic_param->loop_filter_level[2] << 16 |
2866                   pic_param->loop_filter_level[1] <<  8 |
2867                   pic_param->loop_filter_level[0] <<  0);
2868
2869     /* Quantizer Value for 4 segmetns, DW4-DW15 */
2870     for (i = 0; i < 4; i++) {
2871                 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
2872                 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
2873                 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
2874                 /* 101581>>16 is equivalent to 155/100 */
2875                 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
2876                 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
2877                 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
2878
2879                 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
2880                 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
2881
2882                 OUT_BCS_BATCH(batch,
2883                       quantization_value[i][0] << 16 | /* Y1AC */
2884                       quantization_value[i][1] <<  0); /* Y1DC */
2885         OUT_BCS_BATCH(batch,
2886                       quantization_value[i][5] << 16 | /* UVAC */
2887                       quantization_value[i][4] <<  0); /* UVDC */
2888         OUT_BCS_BATCH(batch,
2889                       quantization_value[i][3] << 16 | /* Y2AC */
2890                       quantization_value[i][2] <<  0); /* Y2DC */
2891     }
2892
2893     /* CoeffProbability table for non-key frame, DW16-DW18 */
2894     if (probs_bo) {
2895         OUT_BCS_RELOC(batch, probs_bo,
2896                       0, I915_GEM_DOMAIN_INSTRUCTION,
2897                       0);
2898         OUT_BCS_BATCH(batch, 0);
2899         OUT_BCS_BATCH(batch, 0);
2900     } else {
2901         OUT_BCS_BATCH(batch, 0);
2902         OUT_BCS_BATCH(batch, 0);
2903         OUT_BCS_BATCH(batch, 0);
2904     }
2905
2906     OUT_BCS_BATCH(batch,
2907                   pic_param->mb_segment_tree_probs[2] << 16 |
2908                   pic_param->mb_segment_tree_probs[1] <<  8 |
2909                   pic_param->mb_segment_tree_probs[0] <<  0);
2910
2911     OUT_BCS_BATCH(batch,
2912                   pic_param->prob_skip_false << 24 |
2913                   pic_param->prob_intra      << 16 |
2914                   pic_param->prob_last       <<  8 |
2915                   pic_param->prob_gf         <<  0);
2916
2917     OUT_BCS_BATCH(batch,
2918                   pic_param->y_mode_probs[3] << 24 |
2919                   pic_param->y_mode_probs[2] << 16 |
2920                   pic_param->y_mode_probs[1] <<  8 |
2921                   pic_param->y_mode_probs[0] <<  0);
2922
2923     OUT_BCS_BATCH(batch,
2924                   pic_param->uv_mode_probs[2] << 16 |
2925                   pic_param->uv_mode_probs[1] <<  8 |
2926                   pic_param->uv_mode_probs[0] <<  0);
2927     
2928     /* MV update value, DW23-DW32 */
2929     for (i = 0; i < 2; i++) {
2930         for (j = 0; j < 20; j += 4) {
2931             OUT_BCS_BATCH(batch,
2932                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
2933                           pic_param->mv_probs[i][j + 2] << 16 |
2934                           pic_param->mv_probs[i][j + 1] <<  8 |
2935                           pic_param->mv_probs[i][j + 0] <<  0);
2936         }
2937     }
2938
2939     OUT_BCS_BATCH(batch,
2940                   pic_param->loop_filter_deltas_ref_frame[3] << 24 |
2941                   pic_param->loop_filter_deltas_ref_frame[2] << 16 |
2942                   pic_param->loop_filter_deltas_ref_frame[1] <<  8 |
2943                   pic_param->loop_filter_deltas_ref_frame[0] <<  0);
2944
2945     OUT_BCS_BATCH(batch,
2946                   pic_param->loop_filter_deltas_mode[3] << 24 |
2947                   pic_param->loop_filter_deltas_mode[2] << 16 |
2948                   pic_param->loop_filter_deltas_mode[1] <<  8 |
2949                   pic_param->loop_filter_deltas_mode[0] <<  0);
2950
2951     /* segmentation id stream base address, DW35-DW37 */
2952     OUT_BCS_BATCH(batch, 0);
2953     OUT_BCS_BATCH(batch, 0);
2954     OUT_BCS_BATCH(batch, 0);
2955     ADVANCE_BCS_BATCH(batch);
2956 }
2957
2958 static void
2959 gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
2960                         VAPictureParameterBufferVP8 *pic_param,
2961                         VASliceParameterBufferVP8 *slice_param,
2962                         dri_bo *slice_data_bo,
2963                         struct gen7_mfd_context *gen7_mfd_context)
2964 {
2965     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2966     int i, log2num;
2967     unsigned int offset = slice_param->slice_data_offset;
2968
2969     assert(slice_param->num_of_partitions >= 2);
2970     assert(slice_param->num_of_partitions <= 9);
2971
2972     log2num = (int)log2(slice_param->num_of_partitions - 1);
2973
2974     BEGIN_BCS_BATCH(batch, 22);
2975     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
2976     OUT_BCS_BATCH(batch,
2977                   pic_param->bool_coder_ctx.count << 16 | /* Partition 0 CPBAC Entropy Count */
2978                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
2979                   log2num << 4 |
2980                   (slice_param->macroblock_offset & 0x7));
2981     OUT_BCS_BATCH(batch,
2982                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
2983                   0);
2984
2985     for (i = 0; i < 9; i++) {
2986         if (i < slice_param->num_of_partitions) {
2987             OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
2988             OUT_BCS_BATCH(batch, offset);
2989         } else {
2990             OUT_BCS_BATCH(batch, 0);
2991             OUT_BCS_BATCH(batch, 0);
2992         }
2993
2994         offset += slice_param->partition_size[i];
2995     }
2996
2997     OUT_BCS_BATCH(batch,
2998                   1 << 31 | /* concealment method */
2999                   0);
3000
3001     ADVANCE_BCS_BATCH(batch);
3002 }
3003
3004 void
3005 gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
3006                             struct decode_state *decode_state,
3007                             struct gen7_mfd_context *gen7_mfd_context)
3008 {
3009     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3010     VAPictureParameterBufferVP8 *pic_param;
3011     VASliceParameterBufferVP8 *slice_param;
3012     dri_bo *slice_data_bo;
3013
3014     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3015     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
3016
3017     /* one slice per frame */
3018     assert(decode_state->num_slice_params == 1);
3019     assert(decode_state->slice_params[0]->num_elements == 1);
3020     assert(decode_state->slice_params && decode_state->slice_params[0]->buffer);
3021     assert(decode_state->slice_datas[0]->bo);
3022
3023     assert(decode_state->probability_data);
3024
3025     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
3026     slice_data_bo = decode_state->slice_datas[0]->bo;
3027
3028     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
3029     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3030     intel_batchbuffer_emit_mi_flush(batch);
3031     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3032     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3033     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3034     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
3035     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
3036     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
3037     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
3038     intel_batchbuffer_end_atomic(batch);
3039     intel_batchbuffer_flush(batch);
3040 }
3041
3042 static VAStatus
3043 gen8_mfd_decode_picture(VADriverContextP ctx, 
3044                         VAProfile profile, 
3045                         union codec_state *codec_state,
3046                         struct hw_context *hw_context)
3047
3048 {
3049     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3050     struct decode_state *decode_state = &codec_state->decode;
3051     VAStatus vaStatus;
3052
3053     assert(gen7_mfd_context);
3054
3055     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3056
3057     if (vaStatus != VA_STATUS_SUCCESS)
3058         goto out;
3059
3060     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3061
3062     switch (profile) {
3063     case VAProfileMPEG2Simple:
3064     case VAProfileMPEG2Main:
3065         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3066         break;
3067         
3068     case VAProfileH264ConstrainedBaseline:
3069     case VAProfileH264Main:
3070     case VAProfileH264High:
3071         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3072         break;
3073
3074     case VAProfileVC1Simple:
3075     case VAProfileVC1Main:
3076     case VAProfileVC1Advanced:
3077         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3078         break;
3079
3080     case VAProfileJPEGBaseline:
3081         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3082         break;
3083
3084     case VAProfileVP8Version0_3:
3085         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
3086         break;
3087
3088     default:
3089         assert(0);
3090         break;
3091     }
3092
3093     vaStatus = VA_STATUS_SUCCESS;
3094
3095 out:
3096     return vaStatus;
3097 }
3098
3099 static void
3100 gen8_mfd_context_destroy(void *hw_context)
3101 {
3102     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3103
3104     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3105     gen7_mfd_context->post_deblocking_output.bo = NULL;
3106
3107     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3108     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3109
3110     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3111     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3112
3113     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3114     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3115
3116     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3117     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3118
3119     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3120     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3121
3122     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3123     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3124
3125     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3126
3127     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3128     free(gen7_mfd_context);
3129 }
3130
3131 static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
3132                                     struct gen7_mfd_context *gen7_mfd_context)
3133 {
3134     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3135     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3136     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3137     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3138 }
3139
3140 struct hw_context *
3141 gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3142 {
3143     struct intel_driver_data *intel = intel_driver_data(ctx);
3144     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3145     int i;
3146
3147     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
3148     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
3149     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3150
3151     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3152         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3153         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3154     }
3155
3156     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3157
3158     switch (obj_config->profile) {
3159     case VAProfileMPEG2Simple:
3160     case VAProfileMPEG2Main:
3161         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3162         break;
3163
3164     case VAProfileH264ConstrainedBaseline:
3165     case VAProfileH264Main:
3166     case VAProfileH264High:
3167         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
3168         break;
3169     default:
3170         break;
3171     }
3172     return (struct hw_context *)gen7_mfd_context;
3173 }