VPP: Fix Coverity alert on unitialized vpp_kernels
[platform/upstream/libva-intel-driver.git] / src / gen7_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include "sysdeps.h"
30
31 #include <va/va_dec_jpeg.h>
32
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
35 #include "i965_defines.h"
36 #include "i965_drv_video.h"
37 #include "i965_decoder_utils.h"
38
39 #include "gen7_mfd.h"
40 #include "intel_media.h"
41
42 static const uint32_t zigzag_direct[64] = {
43     0,   1,  8, 16,  9,  2,  3, 10,
44     17, 24, 32, 25, 18, 11,  4,  5,
45     12, 19, 26, 33, 40, 48, 41, 34,
46     27, 20, 13,  6,  7, 14, 21, 28,
47     35, 42, 49, 56, 57, 50, 43, 36,
48     29, 22, 15, 23, 30, 37, 44, 51,
49     58, 59, 52, 45, 38, 31, 39, 46,
50     53, 60, 61, 54, 47, 55, 62, 63
51 };
52
53 static void
54 gen7_mfd_init_avc_surface(VADriverContextP ctx, 
55                           VAPictureParameterBufferH264 *pic_param,
56                           struct object_surface *obj_surface)
57 {
58     struct i965_driver_data *i965 = i965_driver_data(ctx);
59     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
60     int width_in_mbs, height_in_mbs;
61
62     obj_surface->free_private_data = gen_free_avc_surface;
63     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
64     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
65
66     if (!gen7_avc_surface) {
67         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
68         gen7_avc_surface->frame_store_id = -1;
69         assert((obj_surface->size & 0x3f) == 0);
70         obj_surface->private_data = gen7_avc_surface;
71     }
72
73     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
74                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
75
76     if (gen7_avc_surface->dmv_top == NULL) {
77         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
78                                                  "direct mv w/r buffer",
79                                                  width_in_mbs * (height_in_mbs + 1) * 64,
80                                                  0x1000);
81         assert(gen7_avc_surface->dmv_top);
82     }
83
84     if (gen7_avc_surface->dmv_bottom_flag &&
85         gen7_avc_surface->dmv_bottom == NULL) {
86         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
87                                                     "direct mv w/r buffer",
88                                                     width_in_mbs * (height_in_mbs + 1) * 64,
89                                                     0x1000);
90         assert(gen7_avc_surface->dmv_bottom);
91     }
92 }
93
94 static void
95 gen7_mfd_pipe_mode_select(VADriverContextP ctx,
96                           struct decode_state *decode_state,
97                           int standard_select,
98                           struct gen7_mfd_context *gen7_mfd_context)
99 {
100     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
101
102     assert(standard_select == MFX_FORMAT_MPEG2 ||
103            standard_select == MFX_FORMAT_AVC ||
104            standard_select == MFX_FORMAT_VC1 ||
105            standard_select == MFX_FORMAT_JPEG);
106
107     BEGIN_BCS_BATCH(batch, 5);
108     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
109     OUT_BCS_BATCH(batch,
110                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
111                   (MFD_MODE_VLD << 15) | /* VLD mode */
112                   (0 << 10) | /* disable Stream-Out */
113                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
114                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
115                   (0 << 5)  | /* not in stitch mode */
116                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
117                   (standard_select << 0));
118     OUT_BCS_BATCH(batch,
119                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
120                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
121                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
122                   (0 << 1)  |
123                   (0 << 0));
124     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
125     OUT_BCS_BATCH(batch, 0); /* reserved */
126     ADVANCE_BCS_BATCH(batch);
127 }
128
129 static void
130 gen7_mfd_surface_state(VADriverContextP ctx,
131                        struct decode_state *decode_state,
132                        int standard_select,
133                        struct gen7_mfd_context *gen7_mfd_context)
134 {
135     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
136     struct object_surface *obj_surface = decode_state->render_object;
137     unsigned int y_cb_offset;
138     unsigned int y_cr_offset;
139     unsigned int surface_format;
140
141     assert(obj_surface);
142
143     y_cb_offset = obj_surface->y_cb_offset;
144     y_cr_offset = obj_surface->y_cr_offset;
145
146     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
147         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
148
149     BEGIN_BCS_BATCH(batch, 6);
150     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
151     OUT_BCS_BATCH(batch, 0);
152     OUT_BCS_BATCH(batch,
153                   ((obj_surface->orig_height - 1) << 18) |
154                   ((obj_surface->orig_width - 1) << 4));
155     OUT_BCS_BATCH(batch,
156                   (surface_format << 28) | /* 420 planar YUV surface */
157                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
158                   (0 << 22) | /* surface object control state, ignored */
159                   ((obj_surface->width - 1) << 3) | /* pitch */
160                   (0 << 2)  | /* must be 0 */
161                   (1 << 1)  | /* must be tiled */
162                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
163     OUT_BCS_BATCH(batch,
164                   (0 << 16) | /* X offset for U(Cb), must be 0 */
165                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
166     OUT_BCS_BATCH(batch,
167                   (0 << 16) | /* X offset for V(Cr), must be 0 */
168                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
169     ADVANCE_BCS_BATCH(batch);
170 }
171
172 static void
173 gen7_mfd_pipe_buf_addr_state(VADriverContextP ctx,
174                              struct decode_state *decode_state,
175                              int standard_select,
176                              struct gen7_mfd_context *gen7_mfd_context)
177 {
178     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
179     int i;
180
181     BEGIN_BCS_BATCH(batch, 24);
182     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
183     if (gen7_mfd_context->pre_deblocking_output.valid)
184         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
185                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
186                       0);
187     else
188         OUT_BCS_BATCH(batch, 0);
189
190     if (gen7_mfd_context->post_deblocking_output.valid)
191         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
192                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
193                       0);
194     else
195         OUT_BCS_BATCH(batch, 0);
196
197     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
198     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
199
200     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
201         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
202                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
203                       0);
204     else
205         OUT_BCS_BATCH(batch, 0);
206
207     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
208         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
209                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
210                       0);
211     else
212         OUT_BCS_BATCH(batch, 0);
213
214     /* DW 7..22 */
215     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
216         struct object_surface *obj_surface;
217
218         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
219             gen7_mfd_context->reference_surface[i].obj_surface &&
220             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
221             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
222
223             OUT_BCS_RELOC(batch, obj_surface->bo,
224                           I915_GEM_DOMAIN_INSTRUCTION, 0,
225                           0);
226         } else {
227             OUT_BCS_BATCH(batch, 0);
228         }
229     }
230
231     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
232     ADVANCE_BCS_BATCH(batch);
233 }
234
235 static void
236 gen7_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
237                                  dri_bo *slice_data_bo,
238                                  int standard_select,
239                                  struct gen7_mfd_context *gen7_mfd_context)
240 {
241     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
242
243     BEGIN_BCS_BATCH(batch, 11);
244     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
245     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
246     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
247     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
248     OUT_BCS_BATCH(batch, 0);
249     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
250     OUT_BCS_BATCH(batch, 0);
251     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
252     OUT_BCS_BATCH(batch, 0);
253     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
254     OUT_BCS_BATCH(batch, 0);
255     ADVANCE_BCS_BATCH(batch);
256 }
257
258 static void
259 gen7_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
260                                  struct decode_state *decode_state,
261                                  int standard_select,
262                                  struct gen7_mfd_context *gen7_mfd_context)
263 {
264     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
265
266     BEGIN_BCS_BATCH(batch, 4);
267     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
268
269     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
270         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
271                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
272                       0);
273     else
274         OUT_BCS_BATCH(batch, 0);
275
276     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
277         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
278                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
279                       0);
280     else
281         OUT_BCS_BATCH(batch, 0);
282
283     if (gen7_mfd_context->bitplane_read_buffer.valid)
284         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
285                       I915_GEM_DOMAIN_INSTRUCTION, 0,
286                       0);
287     else
288         OUT_BCS_BATCH(batch, 0);
289
290     ADVANCE_BCS_BATCH(batch);
291 }
292
293 static void
294 gen7_mfd_qm_state(VADriverContextP ctx,
295                   int qm_type,
296                   unsigned char *qm,
297                   int qm_length,
298                   struct gen7_mfd_context *gen7_mfd_context)
299 {
300     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
301     unsigned int qm_buffer[16];
302
303     assert(qm_length <= 16 * 4);
304     memcpy(qm_buffer, qm, qm_length);
305
306     BEGIN_BCS_BATCH(batch, 18);
307     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
308     OUT_BCS_BATCH(batch, qm_type << 0);
309     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
310     ADVANCE_BCS_BATCH(batch);
311 }
312
313 static void
314 gen7_mfd_avc_img_state(VADriverContextP ctx,
315                        struct decode_state *decode_state,
316                        struct gen7_mfd_context *gen7_mfd_context)
317 {
318     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
319     int img_struct;
320     int mbaff_frame_flag;
321     unsigned int width_in_mbs, height_in_mbs;
322     VAPictureParameterBufferH264 *pic_param;
323
324     assert(decode_state->pic_param && decode_state->pic_param->buffer);
325     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
326
327     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
328         img_struct = 1;
329     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
330         img_struct = 3;
331     else
332         img_struct = 0;
333
334     if ((img_struct & 0x1) == 0x1) {
335         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
336     } else {
337         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
338     }
339
340     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
341         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
342         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
343     } else {
344         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
345     }
346
347     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
348                         !pic_param->pic_fields.bits.field_pic_flag);
349
350     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
351     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
352
353     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
354     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
355            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
356     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
357
358     BEGIN_BCS_BATCH(batch, 16);
359     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
360     OUT_BCS_BATCH(batch, 
361                   (width_in_mbs * height_in_mbs - 1));
362     OUT_BCS_BATCH(batch, 
363                   ((height_in_mbs - 1) << 16) | 
364                   ((width_in_mbs - 1) << 0));
365     OUT_BCS_BATCH(batch, 
366                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
367                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
368                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
369                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
370                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
371                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
372                   (img_struct << 8));
373     OUT_BCS_BATCH(batch,
374                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
375                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
376                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
377                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
378                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
379                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
380                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
381                   (mbaff_frame_flag << 1) |
382                   (pic_param->pic_fields.bits.field_pic_flag << 0));
383     OUT_BCS_BATCH(batch, 0);
384     OUT_BCS_BATCH(batch, 0);
385     OUT_BCS_BATCH(batch, 0);
386     OUT_BCS_BATCH(batch, 0);
387     OUT_BCS_BATCH(batch, 0);
388     OUT_BCS_BATCH(batch, 0);
389     OUT_BCS_BATCH(batch, 0);
390     OUT_BCS_BATCH(batch, 0);
391     OUT_BCS_BATCH(batch, 0);
392     OUT_BCS_BATCH(batch, 0);
393     OUT_BCS_BATCH(batch, 0);
394     ADVANCE_BCS_BATCH(batch);
395 }
396
397 static void
398 gen7_mfd_avc_qm_state(VADriverContextP ctx,
399                       struct decode_state *decode_state,
400                       struct gen7_mfd_context *gen7_mfd_context)
401 {
402     VAIQMatrixBufferH264 *iq_matrix;
403     VAPictureParameterBufferH264 *pic_param;
404
405     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
406         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
407     else
408         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
409
410     assert(decode_state->pic_param && decode_state->pic_param->buffer);
411     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
412
413     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
414     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
415
416     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
417         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
418         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
419     }
420 }
421
422 static void
423 gen7_mfd_avc_directmode_state(VADriverContextP ctx,
424                               struct decode_state *decode_state,
425                               VAPictureParameterBufferH264 *pic_param,
426                               VASliceParameterBufferH264 *slice_param,
427                               struct gen7_mfd_context *gen7_mfd_context)
428 {
429     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
430     struct object_surface *obj_surface;
431     GenAvcSurface *gen7_avc_surface;
432     VAPictureH264 *va_pic;
433     int i;
434
435     BEGIN_BCS_BATCH(batch, 69);
436     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
437
438     /* reference surfaces 0..15 */
439     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
440         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
441             gen7_mfd_context->reference_surface[i].obj_surface &&
442             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
443
444             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
445             gen7_avc_surface = obj_surface->private_data;
446             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
447                           I915_GEM_DOMAIN_INSTRUCTION, 0,
448                           0);
449
450             if (gen7_avc_surface->dmv_bottom_flag == 1)
451                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
452                               I915_GEM_DOMAIN_INSTRUCTION, 0,
453                               0);
454             else
455                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
456                               I915_GEM_DOMAIN_INSTRUCTION, 0,
457                               0);
458         } else {
459             OUT_BCS_BATCH(batch, 0);
460             OUT_BCS_BATCH(batch, 0);
461         }
462     }
463
464     /* the current decoding frame/field */
465     va_pic = &pic_param->CurrPic;
466     obj_surface = decode_state->render_object;
467     assert(obj_surface->bo && obj_surface->private_data);
468     gen7_avc_surface = obj_surface->private_data;
469
470     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
471                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
472                   0);
473
474     if (gen7_avc_surface->dmv_bottom_flag == 1)
475         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
476                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
477                       0);
478     else
479         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
480                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
481                       0);
482
483     /* POC List */
484     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
485         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
486
487         if (obj_surface) {
488             const VAPictureH264 * const va_pic = avc_find_picture(
489                 obj_surface->base.id, pic_param->ReferenceFrames,
490                 ARRAY_ELEMS(pic_param->ReferenceFrames));
491
492             assert(va_pic != NULL);
493             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
494             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
495         } else {
496             OUT_BCS_BATCH(batch, 0);
497             OUT_BCS_BATCH(batch, 0);
498         }
499     }
500
501     va_pic = &pic_param->CurrPic;
502     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
503     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
504
505     ADVANCE_BCS_BATCH(batch);
506 }
507
508 static void
509 gen7_mfd_avc_phantom_slice_first(VADriverContextP ctx,
510                                  VAPictureParameterBufferH264 *pic_param,
511                                  VASliceParameterBufferH264 *next_slice_param,
512                                  struct gen7_mfd_context *gen7_mfd_context)
513 {
514     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
515 }
516
517 static void
518 gen7_mfd_avc_slice_state(VADriverContextP ctx,
519                          VAPictureParameterBufferH264 *pic_param,
520                          VASliceParameterBufferH264 *slice_param,
521                          VASliceParameterBufferH264 *next_slice_param,
522                          struct gen7_mfd_context *gen7_mfd_context)
523 {
524     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
525     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
526     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
527     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
528     int num_ref_idx_l0, num_ref_idx_l1;
529     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
530                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
531     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
532     int slice_type;
533
534     if (slice_param->slice_type == SLICE_TYPE_I ||
535         slice_param->slice_type == SLICE_TYPE_SI) {
536         slice_type = SLICE_TYPE_I;
537     } else if (slice_param->slice_type == SLICE_TYPE_P ||
538                slice_param->slice_type == SLICE_TYPE_SP) {
539         slice_type = SLICE_TYPE_P;
540     } else { 
541         assert(slice_param->slice_type == SLICE_TYPE_B);
542         slice_type = SLICE_TYPE_B;
543     }
544
545     if (slice_type == SLICE_TYPE_I) {
546         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
547         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
548         num_ref_idx_l0 = 0;
549         num_ref_idx_l1 = 0;
550     } else if (slice_type == SLICE_TYPE_P) {
551         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
552         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
553         num_ref_idx_l1 = 0;
554     } else {
555         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
556         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
557     }
558
559     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
560     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
561     slice_ver_pos = first_mb_in_slice / width_in_mbs;
562
563     if (next_slice_param) {
564         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
565         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
566         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
567     } else {
568         next_slice_hor_pos = 0;
569         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
570     }
571
572     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
573     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
574     OUT_BCS_BATCH(batch, slice_type);
575     OUT_BCS_BATCH(batch, 
576                   (num_ref_idx_l1 << 24) |
577                   (num_ref_idx_l0 << 16) |
578                   (slice_param->chroma_log2_weight_denom << 8) |
579                   (slice_param->luma_log2_weight_denom << 0));
580     OUT_BCS_BATCH(batch, 
581                   (slice_param->direct_spatial_mv_pred_flag << 29) |
582                   (slice_param->disable_deblocking_filter_idc << 27) |
583                   (slice_param->cabac_init_idc << 24) |
584                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
585                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
586                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
587     OUT_BCS_BATCH(batch, 
588                   (slice_ver_pos << 24) |
589                   (slice_hor_pos << 16) | 
590                   (first_mb_in_slice << 0));
591     OUT_BCS_BATCH(batch,
592                   (next_slice_ver_pos << 16) |
593                   (next_slice_hor_pos << 0));
594     OUT_BCS_BATCH(batch, 
595                   (next_slice_param == NULL) << 19); /* last slice flag */
596     OUT_BCS_BATCH(batch, 0);
597     OUT_BCS_BATCH(batch, 0);
598     OUT_BCS_BATCH(batch, 0);
599     OUT_BCS_BATCH(batch, 0);
600     ADVANCE_BCS_BATCH(batch);
601 }
602
603 static inline void
604 gen7_mfd_avc_ref_idx_state(VADriverContextP ctx,
605                            VAPictureParameterBufferH264 *pic_param,
606                            VASliceParameterBufferH264 *slice_param,
607                            struct gen7_mfd_context *gen7_mfd_context)
608 {
609     gen6_send_avc_ref_idx_state(
610         gen7_mfd_context->base.batch,
611         slice_param,
612         gen7_mfd_context->reference_surface
613     );
614 }
615
616 static void
617 gen7_mfd_avc_weightoffset_state(VADriverContextP ctx,
618                                 VAPictureParameterBufferH264 *pic_param,
619                                 VASliceParameterBufferH264 *slice_param,
620                                 struct gen7_mfd_context *gen7_mfd_context)
621 {
622     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
623     int i, j, num_weight_offset_table = 0;
624     short weightoffsets[32 * 6];
625
626     if ((slice_param->slice_type == SLICE_TYPE_P ||
627          slice_param->slice_type == SLICE_TYPE_SP) &&
628         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
629         num_weight_offset_table = 1;
630     }
631     
632     if ((slice_param->slice_type == SLICE_TYPE_B) &&
633         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
634         num_weight_offset_table = 2;
635     }
636
637     for (i = 0; i < num_weight_offset_table; i++) {
638         BEGIN_BCS_BATCH(batch, 98);
639         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
640         OUT_BCS_BATCH(batch, i);
641
642         if (i == 0) {
643             for (j = 0; j < 32; j++) {
644                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
645                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
646                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
647                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
648                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
649                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
650             }
651         } else {
652             for (j = 0; j < 32; j++) {
653                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
654                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
655                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
656                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
657                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
658                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
659             }
660         }
661
662         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
663         ADVANCE_BCS_BATCH(batch);
664     }
665 }
666
667 static void
668 gen7_mfd_avc_bsd_object(VADriverContextP ctx,
669                         VAPictureParameterBufferH264 *pic_param,
670                         VASliceParameterBufferH264 *slice_param,
671                         dri_bo *slice_data_bo,
672                         VASliceParameterBufferH264 *next_slice_param,
673                         struct gen7_mfd_context *gen7_mfd_context)
674 {
675     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
676     unsigned int slice_data_bit_offset;
677
678     slice_data_bit_offset = avc_get_first_mb_bit_offset(
679         slice_data_bo,
680         slice_param,
681         pic_param->pic_fields.bits.entropy_coding_mode_flag
682     );
683
684     /* the input bitsteam format on GEN7 differs from GEN6 */
685     BEGIN_BCS_BATCH(batch, 6);
686     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
687     OUT_BCS_BATCH(batch, 
688                   (slice_param->slice_data_size - slice_param->slice_data_offset));
689     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
690     OUT_BCS_BATCH(batch,
691                   (0 << 31) |
692                   (0 << 14) |
693                   (0 << 12) |
694                   (0 << 10) |
695                   (0 << 8));
696     OUT_BCS_BATCH(batch,
697                   ((slice_data_bit_offset >> 3) << 16) |
698                   (1 << 7)  |
699                   (0 << 5)  |
700                   (0 << 4)  |
701                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
702                   (slice_data_bit_offset & 0x7));
703     OUT_BCS_BATCH(batch, 0);
704     ADVANCE_BCS_BATCH(batch);
705 }
706
707 static inline void
708 gen7_mfd_avc_context_init(
709     VADriverContextP         ctx,
710     struct gen7_mfd_context *gen7_mfd_context
711 )
712 {
713     /* Initialize flat scaling lists */
714     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
715 }
716
717 static void
718 gen7_mfd_avc_decode_init(VADriverContextP ctx,
719                          struct decode_state *decode_state,
720                          struct gen7_mfd_context *gen7_mfd_context)
721 {
722     VAPictureParameterBufferH264 *pic_param;
723     VASliceParameterBufferH264 *slice_param;
724     struct i965_driver_data *i965 = i965_driver_data(ctx);
725     struct object_surface *obj_surface;
726     dri_bo *bo;
727     int i, j, enable_avc_ildb = 0;
728     unsigned int width_in_mbs, height_in_mbs;
729
730     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
731         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
732         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
733
734         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
735             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
736             assert((slice_param->slice_type == SLICE_TYPE_I) ||
737                    (slice_param->slice_type == SLICE_TYPE_SI) ||
738                    (slice_param->slice_type == SLICE_TYPE_P) ||
739                    (slice_param->slice_type == SLICE_TYPE_SP) ||
740                    (slice_param->slice_type == SLICE_TYPE_B));
741
742             if (slice_param->disable_deblocking_filter_idc != 1) {
743                 enable_avc_ildb = 1;
744                 break;
745             }
746
747             slice_param++;
748         }
749     }
750
751     assert(decode_state->pic_param && decode_state->pic_param->buffer);
752     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
753     intel_update_avc_frame_store_index(ctx, decode_state, pic_param,
754         gen7_mfd_context->reference_surface, &gen7_mfd_context->fs_ctx);
755     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
756     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
757     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
758     assert(height_in_mbs > 0 && height_in_mbs <= 256);
759
760     /* Current decoded picture */
761     obj_surface = decode_state->render_object;
762     if (pic_param->pic_fields.bits.reference_pic_flag)
763         obj_surface->flags |= SURFACE_REFERENCED;
764     else
765         obj_surface->flags &= ~SURFACE_REFERENCED;
766
767     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
768     gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface);
769
770     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
771     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
772     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
773     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
774
775     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
776     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
777     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
778     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
779
780     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
781     bo = dri_bo_alloc(i965->intel.bufmgr,
782                       "intra row store",
783                       width_in_mbs * 64,
784                       0x1000);
785     assert(bo);
786     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
787     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
788
789     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
790     bo = dri_bo_alloc(i965->intel.bufmgr,
791                       "deblocking filter row store",
792                       width_in_mbs * 64 * 4,
793                       0x1000);
794     assert(bo);
795     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
796     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
797
798     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
799     bo = dri_bo_alloc(i965->intel.bufmgr,
800                       "bsd mpc row store",
801                       width_in_mbs * 64 * 2,
802                       0x1000);
803     assert(bo);
804     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
805     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
806
807     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
808     bo = dri_bo_alloc(i965->intel.bufmgr,
809                       "mpr row store",
810                       width_in_mbs * 64 * 2,
811                       0x1000);
812     assert(bo);
813     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
814     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
815
816     gen7_mfd_context->bitplane_read_buffer.valid = 0;
817 }
818
819 static void
820 gen7_mfd_avc_decode_picture(VADriverContextP ctx,
821                             struct decode_state *decode_state,
822                             struct gen7_mfd_context *gen7_mfd_context)
823 {
824     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
825     VAPictureParameterBufferH264 *pic_param;
826     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
827     dri_bo *slice_data_bo;
828     int i, j;
829
830     assert(decode_state->pic_param && decode_state->pic_param->buffer);
831     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
832     gen7_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
833
834     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
835     intel_batchbuffer_emit_mi_flush(batch);
836     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
837     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
838     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
839     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
840     gen7_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
841     gen7_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
842
843     for (j = 0; j < decode_state->num_slice_params; j++) {
844         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
845         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
846         slice_data_bo = decode_state->slice_datas[j]->bo;
847         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
848
849         if (j == decode_state->num_slice_params - 1)
850             next_slice_group_param = NULL;
851         else
852             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
853
854         if (j == 0 && slice_param->first_mb_in_slice)
855             gen7_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context); 
856
857         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
858             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
859             assert((slice_param->slice_type == SLICE_TYPE_I) ||
860                    (slice_param->slice_type == SLICE_TYPE_SI) ||
861                    (slice_param->slice_type == SLICE_TYPE_P) ||
862                    (slice_param->slice_type == SLICE_TYPE_SP) ||
863                    (slice_param->slice_type == SLICE_TYPE_B));
864
865             if (i < decode_state->slice_params[j]->num_elements - 1)
866                 next_slice_param = slice_param + 1;
867             else
868                 next_slice_param = next_slice_group_param;
869
870             gen7_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
871             gen7_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
872             gen7_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
873             gen7_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
874             gen7_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
875             slice_param++;
876         }
877     }
878
879     intel_batchbuffer_end_atomic(batch);
880     intel_batchbuffer_flush(batch);
881 }
882
883 static void
884 gen7_mfd_mpeg2_decode_init(VADriverContextP ctx,
885                            struct decode_state *decode_state,
886                            struct gen7_mfd_context *gen7_mfd_context)
887 {
888     VAPictureParameterBufferMPEG2 *pic_param;
889     struct i965_driver_data *i965 = i965_driver_data(ctx);
890     struct object_surface *obj_surface;
891     dri_bo *bo;
892     unsigned int width_in_mbs;
893
894     assert(decode_state->pic_param && decode_state->pic_param->buffer);
895     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
896     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
897
898     mpeg2_set_reference_surfaces(
899         ctx,
900         gen7_mfd_context->reference_surface,
901         decode_state,
902         pic_param
903     );
904
905     /* Current decoded picture */
906     obj_surface = decode_state->render_object;
907     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
908
909     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
910     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
911     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
912     gen7_mfd_context->pre_deblocking_output.valid = 1;
913
914     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
915     bo = dri_bo_alloc(i965->intel.bufmgr,
916                       "bsd mpc row store",
917                       width_in_mbs * 96,
918                       0x1000);
919     assert(bo);
920     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
921     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
922
923     gen7_mfd_context->post_deblocking_output.valid = 0;
924     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
925     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
926     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
927     gen7_mfd_context->bitplane_read_buffer.valid = 0;
928 }
929
930 static void
931 gen7_mfd_mpeg2_pic_state(VADriverContextP ctx,
932                          struct decode_state *decode_state,
933                          struct gen7_mfd_context *gen7_mfd_context)
934 {
935     struct i965_driver_data * const i965 = i965_driver_data(ctx);
936     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
937     VAPictureParameterBufferMPEG2 *pic_param;
938     unsigned int slice_concealment_disable_bit = 0;
939
940     assert(decode_state->pic_param && decode_state->pic_param->buffer);
941     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
942
943     if (IS_HASWELL(i965->intel.device_info)) {
944         /* XXX: disable concealment for now */
945         slice_concealment_disable_bit = 1;
946     }
947
948     BEGIN_BCS_BATCH(batch, 13);
949     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
950     OUT_BCS_BATCH(batch,
951                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
952                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
953                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
954                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
955                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
956                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
957                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
958                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
959                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
960                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
961                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
962                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
963     OUT_BCS_BATCH(batch,
964                   pic_param->picture_coding_type << 9);
965     OUT_BCS_BATCH(batch,
966                   (slice_concealment_disable_bit << 31) |
967                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
968                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
969     OUT_BCS_BATCH(batch, 0);
970     OUT_BCS_BATCH(batch, 0);
971     OUT_BCS_BATCH(batch, 0);
972     OUT_BCS_BATCH(batch, 0);
973     OUT_BCS_BATCH(batch, 0);
974     OUT_BCS_BATCH(batch, 0);
975     OUT_BCS_BATCH(batch, 0);
976     OUT_BCS_BATCH(batch, 0);
977     OUT_BCS_BATCH(batch, 0);
978     ADVANCE_BCS_BATCH(batch);
979 }
980
981 static void
982 gen7_mfd_mpeg2_qm_state(VADriverContextP ctx,
983                         struct decode_state *decode_state,
984                         struct gen7_mfd_context *gen7_mfd_context)
985 {
986     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
987     int i, j;
988
989     /* Update internal QM state */
990     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
991         VAIQMatrixBufferMPEG2 * const iq_matrix =
992             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
993
994         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
995             iq_matrix->load_intra_quantiser_matrix) {
996             gen_iq_matrix->load_intra_quantiser_matrix =
997                 iq_matrix->load_intra_quantiser_matrix;
998             if (iq_matrix->load_intra_quantiser_matrix) {
999                 for (j = 0; j < 64; j++)
1000                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1001                         iq_matrix->intra_quantiser_matrix[j];
1002             }
1003         }
1004
1005         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1006             iq_matrix->load_non_intra_quantiser_matrix) {
1007             gen_iq_matrix->load_non_intra_quantiser_matrix =
1008                 iq_matrix->load_non_intra_quantiser_matrix;
1009             if (iq_matrix->load_non_intra_quantiser_matrix) {
1010                 for (j = 0; j < 64; j++)
1011                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1012                         iq_matrix->non_intra_quantiser_matrix[j];
1013             }
1014         }
1015     }
1016
1017     /* Commit QM state to HW */
1018     for (i = 0; i < 2; i++) {
1019         unsigned char *qm = NULL;
1020         int qm_type;
1021
1022         if (i == 0) {
1023             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1024                 qm = gen_iq_matrix->intra_quantiser_matrix;
1025                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1026             }
1027         } else {
1028             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1029                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1030                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1031             }
1032         }
1033
1034         if (!qm)
1035             continue;
1036
1037         gen7_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1038     }
1039 }
1040
1041 uint32_t mpeg2_get_slice_data_length(dri_bo *slice_data_bo, VASliceParameterBufferMPEG2 *slice_param)
1042 {
1043     uint8_t *buf;
1044     uint32_t buf_offset = slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3);
1045     uint32_t buf_size = slice_param->slice_data_size - (slice_param->macroblock_offset >> 3);
1046     uint32_t i;
1047
1048     dri_bo_map(slice_data_bo, 0);
1049     buf = (uint8_t *)slice_data_bo->virtual + buf_offset;
1050
1051     for (i = 3; i < buf_size; i++) {
1052         if (buf[i - 3] &&
1053             !buf[i - 2] &&
1054             !buf[i - 1] &&
1055             !buf[i]) {
1056             dri_bo_unmap(slice_data_bo);
1057             return i - 3 + 1;
1058         }
1059     }
1060
1061     dri_bo_unmap(slice_data_bo);
1062     return buf_size;
1063 }
1064
1065 static void
1066 gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1067                           VAPictureParameterBufferMPEG2 *pic_param,
1068                           VASliceParameterBufferMPEG2 *slice_param,
1069                           dri_bo *slice_data_bo,
1070                           VASliceParameterBufferMPEG2 *next_slice_param,
1071                           struct gen7_mfd_context *gen7_mfd_context)
1072 {
1073     struct i965_driver_data * const i965 = i965_driver_data(ctx);
1074     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1075     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1076     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1077
1078     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1079         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1080         is_field_pic = 1;
1081     is_field_pic_wa = is_field_pic &&
1082         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1083
1084     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1085     hpos0 = slice_param->slice_horizontal_position;
1086
1087     if (next_slice_param == NULL) {
1088         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1089         hpos1 = 0;
1090     } else {
1091         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1092         hpos1 = next_slice_param->slice_horizontal_position;
1093     }
1094
1095     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1096
1097     BEGIN_BCS_BATCH(batch, 5);
1098     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1099     OUT_BCS_BATCH(batch, 
1100                   mpeg2_get_slice_data_length(slice_data_bo, slice_param));
1101     OUT_BCS_BATCH(batch, 
1102                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1103     OUT_BCS_BATCH(batch,
1104                   hpos0 << 24 |
1105                   vpos0 << 16 |
1106                   mb_count << 8 |
1107                   (next_slice_param == NULL) << 5 |
1108                   (next_slice_param == NULL) << 3 |
1109                   (slice_param->macroblock_offset & 0x7));
1110     OUT_BCS_BATCH(batch,
1111                   (slice_param->quantiser_scale_code << 24) |
1112                   (IS_HASWELL(i965->intel.device_info) ? (vpos1 << 8 | hpos1) : 0));
1113     ADVANCE_BCS_BATCH(batch);
1114 }
1115
1116 static void
1117 gen7_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1118                               struct decode_state *decode_state,
1119                               struct gen7_mfd_context *gen7_mfd_context)
1120 {
1121     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1122     VAPictureParameterBufferMPEG2 *pic_param;
1123     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1124     dri_bo *slice_data_bo;
1125     int i, j;
1126
1127     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1128     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1129
1130     gen7_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1131     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1132     intel_batchbuffer_emit_mi_flush(batch);
1133     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1134     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1135     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1136     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1137     gen7_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1138     gen7_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1139
1140     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1141         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1142             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1143
1144     for (j = 0; j < decode_state->num_slice_params; j++) {
1145         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1146         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1147         slice_data_bo = decode_state->slice_datas[j]->bo;
1148         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1149
1150         if (j == decode_state->num_slice_params - 1)
1151             next_slice_group_param = NULL;
1152         else
1153             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1154
1155         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1156             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1157
1158             if (i < decode_state->slice_params[j]->num_elements - 1)
1159                 next_slice_param = slice_param + 1;
1160             else
1161                 next_slice_param = next_slice_group_param;
1162
1163             gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
1164             slice_param++;
1165         }
1166     }
1167
1168     intel_batchbuffer_end_atomic(batch);
1169     intel_batchbuffer_flush(batch);
1170 }
1171
1172 static const int va_to_gen7_vc1_pic_type[5] = {
1173     GEN7_VC1_I_PICTURE,
1174     GEN7_VC1_P_PICTURE,
1175     GEN7_VC1_B_PICTURE,
1176     GEN7_VC1_BI_PICTURE,
1177     GEN7_VC1_P_PICTURE,
1178 };
1179
1180 static const int va_to_gen7_vc1_mv[4] = {
1181     1, /* 1-MV */
1182     2, /* 1-MV half-pel */
1183     3, /* 1-MV half-pef bilinear */
1184     0, /* Mixed MV */
1185 };
1186
1187 static const int b_picture_scale_factor[21] = {
1188     128, 85,  170, 64,  192,
1189     51,  102, 153, 204, 43,
1190     215, 37,  74,  111, 148,
1191     185, 222, 32,  96,  160, 
1192     224,
1193 };
1194
1195 static const int va_to_gen7_vc1_condover[3] = {
1196     0,
1197     2,
1198     3
1199 };
1200
1201 static const int va_to_gen7_vc1_profile[4] = {
1202     GEN7_VC1_SIMPLE_PROFILE,
1203     GEN7_VC1_MAIN_PROFILE,
1204     GEN7_VC1_RESERVED_PROFILE,
1205     GEN7_VC1_ADVANCED_PROFILE
1206 };
1207
1208 static void 
1209 gen7_mfd_free_vc1_surface(void **data)
1210 {
1211     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1212
1213     if (!gen7_vc1_surface)
1214         return;
1215
1216     dri_bo_unreference(gen7_vc1_surface->dmv);
1217     free(gen7_vc1_surface);
1218     *data = NULL;
1219 }
1220
1221 static void
1222 gen7_mfd_init_vc1_surface(VADriverContextP ctx, 
1223                           VAPictureParameterBufferVC1 *pic_param,
1224                           struct object_surface *obj_surface)
1225 {
1226     struct i965_driver_data *i965 = i965_driver_data(ctx);
1227     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1228     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1229     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1230
1231     obj_surface->free_private_data = gen7_mfd_free_vc1_surface;
1232
1233     if (!gen7_vc1_surface) {
1234         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1235         assert((obj_surface->size & 0x3f) == 0);
1236         obj_surface->private_data = gen7_vc1_surface;
1237     }
1238
1239     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1240
1241     if (gen7_vc1_surface->dmv == NULL) {
1242         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1243                                              "direct mv w/r buffer",
1244                                              width_in_mbs * height_in_mbs * 64,
1245                                              0x1000);
1246     }
1247 }
1248
1249 static void
1250 gen7_mfd_vc1_decode_init(VADriverContextP ctx,
1251                          struct decode_state *decode_state,
1252                          struct gen7_mfd_context *gen7_mfd_context)
1253 {
1254     VAPictureParameterBufferVC1 *pic_param;
1255     struct i965_driver_data *i965 = i965_driver_data(ctx);
1256     struct object_surface *obj_surface;
1257     dri_bo *bo;
1258     int width_in_mbs;
1259     int picture_type;
1260  
1261     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1262     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1263     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1264     picture_type = pic_param->picture_fields.bits.picture_type;
1265  
1266     intel_update_vc1_frame_store_index(ctx,
1267                                        decode_state,
1268                                        pic_param,
1269                                        gen7_mfd_context->reference_surface);
1270
1271     /* Current decoded picture */
1272     obj_surface = decode_state->render_object;
1273     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1274     gen7_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1275
1276     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1277     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1278     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1279     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1280
1281     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1282     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1283     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1284     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1285
1286     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1287     bo = dri_bo_alloc(i965->intel.bufmgr,
1288                       "intra row store",
1289                       width_in_mbs * 64,
1290                       0x1000);
1291     assert(bo);
1292     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1293     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1294
1295     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1296     bo = dri_bo_alloc(i965->intel.bufmgr,
1297                       "deblocking filter row store",
1298                       width_in_mbs * 7 * 64,
1299                       0x1000);
1300     assert(bo);
1301     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1302     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1303
1304     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1305     bo = dri_bo_alloc(i965->intel.bufmgr,
1306                       "bsd mpc row store",
1307                       width_in_mbs * 96,
1308                       0x1000);
1309     assert(bo);
1310     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1311     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1312
1313     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1314
1315     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1316     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1317     
1318     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1319         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1320         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1321         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1322         int src_w, src_h;
1323         uint8_t *src = NULL, *dst = NULL;
1324
1325         assert(decode_state->bit_plane->buffer);
1326         src = decode_state->bit_plane->buffer;
1327
1328         bo = dri_bo_alloc(i965->intel.bufmgr,
1329                           "VC-1 Bitplane",
1330                           bitplane_width * height_in_mbs,
1331                           0x1000);
1332         assert(bo);
1333         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1334
1335         dri_bo_map(bo, True);
1336         assert(bo->virtual);
1337         dst = bo->virtual;
1338
1339         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1340             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1341                 int src_index, dst_index;
1342                 int src_shift;
1343                 uint8_t src_value;
1344
1345                 src_index = (src_h * width_in_mbs + src_w) / 2;
1346                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1347                 src_value = ((src[src_index] >> src_shift) & 0xf);
1348
1349                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1350                     src_value |= 0x2;
1351                 }
1352
1353                 dst_index = src_w / 2;
1354                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1355             }
1356
1357             if (src_w & 1)
1358                 dst[src_w / 2] >>= 4;
1359
1360             dst += bitplane_width;
1361         }
1362
1363         dri_bo_unmap(bo);
1364     } else
1365         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1366 }
1367
1368 static void
1369 gen7_mfd_vc1_pic_state(VADriverContextP ctx,
1370                        struct decode_state *decode_state,
1371                        struct gen7_mfd_context *gen7_mfd_context)
1372 {
1373     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1374     VAPictureParameterBufferVC1 *pic_param;
1375     struct object_surface *obj_surface;
1376     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1377     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1378     int unified_mv_mode;
1379     int ref_field_pic_polarity = 0;
1380     int scale_factor = 0;
1381     int trans_ac_y = 0;
1382     int dmv_surface_valid = 0;
1383     int brfd = 0;
1384     int fcm = 0;
1385     int picture_type;
1386     int profile;
1387     int overlap;
1388     int interpolation_mode = 0;
1389
1390     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1391     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1392
1393     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1394     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1395     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1396     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1397     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1398     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1399     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1400     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1401
1402     if (dquant == 0) {
1403         alt_pquant_config = 0;
1404         alt_pquant_edge_mask = 0;
1405     } else if (dquant == 2) {
1406         alt_pquant_config = 1;
1407         alt_pquant_edge_mask = 0xf;
1408     } else {
1409         assert(dquant == 1);
1410         if (dquantfrm == 0) {
1411             alt_pquant_config = 0;
1412             alt_pquant_edge_mask = 0;
1413             alt_pq = 0;
1414         } else {
1415             assert(dquantfrm == 1);
1416             alt_pquant_config = 1;
1417
1418             switch (dqprofile) {
1419             case 3:
1420                 if (dqbilevel == 0) {
1421                     alt_pquant_config = 2;
1422                     alt_pquant_edge_mask = 0;
1423                 } else {
1424                     assert(dqbilevel == 1);
1425                     alt_pquant_config = 3;
1426                     alt_pquant_edge_mask = 0;
1427                 }
1428                 break;
1429                 
1430             case 0:
1431                 alt_pquant_edge_mask = 0xf;
1432                 break;
1433
1434             case 1:
1435                 if (dqdbedge == 3)
1436                     alt_pquant_edge_mask = 0x9;
1437                 else
1438                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1439
1440                 break;
1441
1442             case 2:
1443                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1444                 break;
1445
1446             default:
1447                 assert(0);
1448             }
1449         }
1450     }
1451
1452     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1453         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1454         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1455     } else {
1456         assert(pic_param->mv_fields.bits.mv_mode < 4);
1457         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1458     }
1459
1460     if (pic_param->sequence_fields.bits.interlace == 1 &&
1461         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1462         /* FIXME: calculate reference field picture polarity */
1463         assert(0);
1464         ref_field_pic_polarity = 0;
1465     }
1466
1467     if (pic_param->b_picture_fraction < 21)
1468         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1469
1470     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1471     
1472     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1473         picture_type == GEN7_VC1_I_PICTURE)
1474         picture_type = GEN7_VC1_BI_PICTURE;
1475
1476     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1477         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1478     else {
1479         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1480         /*
1481          * 8.3.6.2.1 Transform Type Selection
1482          * If variable-sized transform coding is not enabled,
1483          * then the 8x8 transform shall be used for all blocks.
1484          * it is also MFX_VC1_PIC_STATE requirement.
1485          */
1486         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1487             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1488             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1489         }
1490     }
1491
1492
1493     if (picture_type == GEN7_VC1_B_PICTURE) {
1494         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1495
1496         obj_surface = decode_state->reference_objects[1];
1497
1498         if (obj_surface)
1499             gen7_vc1_surface = obj_surface->private_data;
1500
1501         if (!gen7_vc1_surface || 
1502             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1503              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1504             dmv_surface_valid = 0;
1505         else
1506             dmv_surface_valid = 1;
1507     }
1508
1509     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1510
1511     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1512         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1513     else {
1514         if (pic_param->picture_fields.bits.top_field_first)
1515             fcm = 2;
1516         else
1517             fcm = 3;
1518     }
1519
1520     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1521         brfd = pic_param->reference_fields.bits.reference_distance;
1522         brfd = (scale_factor * brfd) >> 8;
1523         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1524
1525         if (brfd < 0)
1526             brfd = 0;
1527     }
1528
1529     overlap = 0;
1530     if (profile != GEN7_VC1_ADVANCED_PROFILE){
1531         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1532             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1533             overlap = 1; 
1534         }
1535     }else {
1536         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1537              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1538               overlap = 1; 
1539         }
1540         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1541             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1542              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1543                 overlap = 1; 
1544              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1545                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1546                  overlap = 1;
1547              }
1548         }
1549     } 
1550
1551     assert(pic_param->conditional_overlap_flag < 3);
1552     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1553
1554     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1555         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1556          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1557         interpolation_mode = 9; /* Half-pel bilinear */
1558     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1559              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1560               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1561         interpolation_mode = 1; /* Half-pel bicubic */
1562     else
1563         interpolation_mode = 0; /* Quarter-pel bicubic */
1564
1565     BEGIN_BCS_BATCH(batch, 6);
1566     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1567     OUT_BCS_BATCH(batch,
1568                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1569                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1570     OUT_BCS_BATCH(batch,
1571                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1572                   dmv_surface_valid << 15 |
1573                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1574                   pic_param->rounding_control << 13 |
1575                   pic_param->sequence_fields.bits.syncmarker << 12 |
1576                   interpolation_mode << 8 |
1577                   0 << 7 | /* FIXME: scale up or down ??? */
1578                   pic_param->range_reduction_frame << 6 |
1579                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1580                   overlap << 4 |
1581                   !pic_param->picture_fields.bits.is_first_field << 3 |
1582                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1583     OUT_BCS_BATCH(batch,
1584                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1585                   picture_type << 26 |
1586                   fcm << 24 |
1587                   alt_pq << 16 |
1588                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1589                   scale_factor << 0);
1590     OUT_BCS_BATCH(batch,
1591                   unified_mv_mode << 28 |
1592                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1593                   pic_param->fast_uvmc_flag << 26 |
1594                   ref_field_pic_polarity << 25 |
1595                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1596                   pic_param->reference_fields.bits.reference_distance << 20 |
1597                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1598                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1599                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1600                   alt_pquant_edge_mask << 4 |
1601                   alt_pquant_config << 2 |
1602                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1603                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1604     OUT_BCS_BATCH(batch,
1605                   !!pic_param->bitplane_present.value << 31 |
1606                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1607                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1608                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1609                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1610                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1611                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1612                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1613                   pic_param->mv_fields.bits.mv_table << 20 |
1614                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1615                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1616                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1617                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1618                   pic_param->mb_mode_table << 8 |
1619                   trans_ac_y << 6 |
1620                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1621                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1622                   pic_param->cbp_table << 0);
1623     ADVANCE_BCS_BATCH(batch);
1624 }
1625
1626 static void
1627 gen7_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1628                              struct decode_state *decode_state,
1629                              struct gen7_mfd_context *gen7_mfd_context)
1630 {
1631     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1632     VAPictureParameterBufferVC1 *pic_param;
1633     int intensitycomp_single;
1634
1635     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1636     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1637
1638     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1639     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1640     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1641
1642     BEGIN_BCS_BATCH(batch, 6);
1643     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1644     OUT_BCS_BATCH(batch,
1645                   0 << 14 | /* FIXME: double ??? */
1646                   0 << 12 |
1647                   intensitycomp_single << 10 |
1648                   intensitycomp_single << 8 |
1649                   0 << 4 | /* FIXME: interlace mode */
1650                   0);
1651     OUT_BCS_BATCH(batch,
1652                   pic_param->luma_shift << 16 |
1653                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1654     OUT_BCS_BATCH(batch, 0);
1655     OUT_BCS_BATCH(batch, 0);
1656     OUT_BCS_BATCH(batch, 0);
1657     ADVANCE_BCS_BATCH(batch);
1658 }
1659
1660
1661 static void
1662 gen7_mfd_vc1_directmode_state(VADriverContextP ctx,
1663                               struct decode_state *decode_state,
1664                               struct gen7_mfd_context *gen7_mfd_context)
1665 {
1666     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1667     struct object_surface *obj_surface;
1668     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1669
1670     obj_surface = decode_state->render_object;
1671
1672     if (obj_surface && obj_surface->private_data) {
1673         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1674     }
1675
1676     obj_surface = decode_state->reference_objects[1];
1677
1678     if (obj_surface && obj_surface->private_data) {
1679         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1680     }
1681
1682     BEGIN_BCS_BATCH(batch, 3);
1683     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
1684
1685     if (dmv_write_buffer)
1686         OUT_BCS_RELOC(batch, dmv_write_buffer,
1687                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1688                       0);
1689     else
1690         OUT_BCS_BATCH(batch, 0);
1691
1692     if (dmv_read_buffer)
1693         OUT_BCS_RELOC(batch, dmv_read_buffer,
1694                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1695                       0);
1696     else
1697         OUT_BCS_BATCH(batch, 0);
1698                   
1699     ADVANCE_BCS_BATCH(batch);
1700 }
1701
1702 static int
1703 gen7_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1704 {
1705     int out_slice_data_bit_offset;
1706     int slice_header_size = in_slice_data_bit_offset / 8;
1707     int i, j;
1708
1709     if (profile != 3)
1710         out_slice_data_bit_offset = in_slice_data_bit_offset;
1711     else {
1712         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1713             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1714                 i++, j += 2;
1715             }
1716         }
1717
1718         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1719     }
1720
1721     return out_slice_data_bit_offset;
1722 }
1723
1724 static void
1725 gen7_mfd_vc1_bsd_object(VADriverContextP ctx,
1726                         VAPictureParameterBufferVC1 *pic_param,
1727                         VASliceParameterBufferVC1 *slice_param,
1728                         VASliceParameterBufferVC1 *next_slice_param,
1729                         dri_bo *slice_data_bo,
1730                         struct gen7_mfd_context *gen7_mfd_context)
1731 {
1732     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1733     int next_slice_start_vert_pos;
1734     int macroblock_offset;
1735     uint8_t *slice_data = NULL;
1736
1737     dri_bo_map(slice_data_bo, 0);
1738     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1739     macroblock_offset = gen7_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1740                                                                slice_param->macroblock_offset,
1741                                                                pic_param->sequence_fields.bits.profile);
1742     dri_bo_unmap(slice_data_bo);
1743
1744     if (next_slice_param)
1745         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1746     else
1747         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1748
1749     BEGIN_BCS_BATCH(batch, 5);
1750     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1751     OUT_BCS_BATCH(batch, 
1752                   slice_param->slice_data_size - (macroblock_offset >> 3));
1753     OUT_BCS_BATCH(batch, 
1754                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1755     OUT_BCS_BATCH(batch,
1756                   slice_param->slice_vertical_position << 16 |
1757                   next_slice_start_vert_pos << 0);
1758     OUT_BCS_BATCH(batch,
1759                   (macroblock_offset & 0x7));
1760     ADVANCE_BCS_BATCH(batch);
1761 }
1762
1763 static void
1764 gen7_mfd_vc1_decode_picture(VADriverContextP ctx,
1765                             struct decode_state *decode_state,
1766                             struct gen7_mfd_context *gen7_mfd_context)
1767 {
1768     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1769     VAPictureParameterBufferVC1 *pic_param;
1770     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1771     dri_bo *slice_data_bo;
1772     int i, j;
1773
1774     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1775     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1776
1777     gen7_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1778     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1779     intel_batchbuffer_emit_mi_flush(batch);
1780     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1781     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1782     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1783     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1784     gen7_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1785     gen7_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1786     gen7_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1787
1788     for (j = 0; j < decode_state->num_slice_params; j++) {
1789         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1790         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1791         slice_data_bo = decode_state->slice_datas[j]->bo;
1792         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1793
1794         if (j == decode_state->num_slice_params - 1)
1795             next_slice_group_param = NULL;
1796         else
1797             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1798
1799         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1800             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1801
1802             if (i < decode_state->slice_params[j]->num_elements - 1)
1803                 next_slice_param = slice_param + 1;
1804             else
1805                 next_slice_param = next_slice_group_param;
1806
1807             gen7_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1808             slice_param++;
1809         }
1810     }
1811
1812     intel_batchbuffer_end_atomic(batch);
1813     intel_batchbuffer_flush(batch);
1814 }
1815
1816 static void
1817 gen7_mfd_jpeg_decode_init(VADriverContextP ctx,
1818                           struct decode_state *decode_state,
1819                           struct gen7_mfd_context *gen7_mfd_context)
1820 {
1821     struct object_surface *obj_surface;
1822     VAPictureParameterBufferJPEGBaseline *pic_param;
1823     int subsampling = SUBSAMPLE_YUV420;
1824     int fourcc = VA_FOURCC_IMC3;
1825
1826     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1827
1828     if (pic_param->num_components == 1) {
1829         subsampling = SUBSAMPLE_YUV400;
1830         fourcc = VA_FOURCC_Y800;
1831     } else if (pic_param->num_components == 3) {
1832         int h1 = pic_param->components[0].h_sampling_factor;
1833         int h2 = pic_param->components[1].h_sampling_factor;
1834         int h3 = pic_param->components[2].h_sampling_factor;
1835         int v1 = pic_param->components[0].v_sampling_factor;
1836         int v2 = pic_param->components[1].v_sampling_factor;
1837         int v3 = pic_param->components[2].v_sampling_factor;
1838
1839         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1840             v1 == 2 && v2 == 1 && v3 == 1) {
1841             subsampling = SUBSAMPLE_YUV420;
1842             fourcc = VA_FOURCC_IMC3;
1843         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1844                    v1 == 1 && v2 == 1 && v3 == 1) {
1845             subsampling = SUBSAMPLE_YUV422H;
1846             fourcc = VA_FOURCC_422H;
1847         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1848                    v1 == 1 && v2 == 1 && v3 == 1) {
1849             subsampling = SUBSAMPLE_YUV444;
1850             fourcc = VA_FOURCC_444P;
1851         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1852                    v1 == 1 && v2 == 1 && v3 == 1) {
1853             subsampling = SUBSAMPLE_YUV411;
1854             fourcc = VA_FOURCC_411P;
1855         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1856                    v1 == 2 && v2 == 1 && v3 == 1) {
1857             subsampling = SUBSAMPLE_YUV422V;
1858             fourcc = VA_FOURCC_422V;
1859         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1860                    v1 == 2 && v2 == 2 && v3 == 2) {
1861             subsampling = SUBSAMPLE_YUV422H;
1862             fourcc = VA_FOURCC_422H;
1863         } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1864                    v1 == 2 && v2 == 1 && v3 == 1) {
1865             subsampling = SUBSAMPLE_YUV422V;
1866             fourcc = VA_FOURCC_422V;
1867         } else
1868             assert(0);
1869     } else {
1870         assert(0);
1871     }
1872
1873     /* Current decoded picture */
1874     obj_surface = decode_state->render_object;
1875     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
1876
1877     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1878     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1879     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1880     gen7_mfd_context->pre_deblocking_output.valid = 1;
1881
1882     gen7_mfd_context->post_deblocking_output.bo = NULL;
1883     gen7_mfd_context->post_deblocking_output.valid = 0;
1884
1885     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
1886     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1887
1888     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
1889     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1890
1891     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
1892     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
1893
1894     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
1895     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1896
1897     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1898     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1899 }
1900
1901 static const int va_to_gen7_jpeg_rotation[4] = {
1902     GEN7_JPEG_ROTATION_0,
1903     GEN7_JPEG_ROTATION_90,
1904     GEN7_JPEG_ROTATION_180,
1905     GEN7_JPEG_ROTATION_270
1906 };
1907
1908 static void
1909 gen7_mfd_jpeg_pic_state(VADriverContextP ctx,
1910                         struct decode_state *decode_state,
1911                         struct gen7_mfd_context *gen7_mfd_context)
1912 {
1913     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1914     VAPictureParameterBufferJPEGBaseline *pic_param;
1915     int chroma_type = GEN7_YUV420;
1916     int frame_width_in_blks;
1917     int frame_height_in_blks;
1918
1919     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1920     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
1921
1922     if (pic_param->num_components == 1)
1923         chroma_type = GEN7_YUV400;
1924     else if (pic_param->num_components == 3) {
1925         int h1 = pic_param->components[0].h_sampling_factor;
1926         int h2 = pic_param->components[1].h_sampling_factor;
1927         int h3 = pic_param->components[2].h_sampling_factor;
1928         int v1 = pic_param->components[0].v_sampling_factor;
1929         int v2 = pic_param->components[1].v_sampling_factor;
1930         int v3 = pic_param->components[2].v_sampling_factor;
1931
1932         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1933             v1 == 2 && v2 == 1 && v3 == 1)
1934             chroma_type = GEN7_YUV420;
1935         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1936                  v1 == 1 && v2 == 1 && v3 == 1)
1937             chroma_type = GEN7_YUV422H_2Y;
1938         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1939                  v1 == 1 && v2 == 1 && v3 == 1)
1940             chroma_type = GEN7_YUV444;
1941         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1942                  v1 == 1 && v2 == 1 && v3 == 1)
1943             chroma_type = GEN7_YUV411;
1944         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1945                  v1 == 2 && v2 == 1 && v3 == 1)
1946             chroma_type = GEN7_YUV422V_2Y;
1947         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1948                  v1 == 2 && v2 == 2 && v3 == 2)
1949             chroma_type = GEN7_YUV422H_4Y;
1950         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1951                  v1 == 2 && v2 == 1 && v3 == 1)
1952             chroma_type = GEN7_YUV422V_4Y;
1953         else
1954             assert(0);
1955     }
1956
1957     if (chroma_type == GEN7_YUV400 ||
1958         chroma_type == GEN7_YUV444 ||
1959         chroma_type == GEN7_YUV422V_2Y) {
1960         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
1961         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
1962     } else if (chroma_type == GEN7_YUV411) {
1963         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
1964         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
1965     } else {
1966         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
1967         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
1968     }
1969
1970     BEGIN_BCS_BATCH(batch, 3);
1971     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
1972     OUT_BCS_BATCH(batch,
1973                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
1974                   (chroma_type << 0));
1975     OUT_BCS_BATCH(batch,
1976                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
1977                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
1978     ADVANCE_BCS_BATCH(batch);
1979 }
1980
1981 static const int va_to_gen7_jpeg_hufftable[2] = {
1982     MFX_HUFFTABLE_ID_Y,
1983     MFX_HUFFTABLE_ID_UV
1984 };
1985
1986 static void
1987 gen7_mfd_jpeg_huff_table_state(VADriverContextP ctx,
1988                                struct decode_state *decode_state,
1989                                struct gen7_mfd_context *gen7_mfd_context,
1990                                int num_tables)
1991 {
1992     VAHuffmanTableBufferJPEGBaseline *huffman_table;
1993     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1994     int index;
1995
1996     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
1997         return;
1998
1999     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2000
2001     for (index = 0; index < num_tables; index++) {
2002         int id = va_to_gen7_jpeg_hufftable[index];
2003         if (!huffman_table->load_huffman_table[index])
2004             continue;
2005         BEGIN_BCS_BATCH(batch, 53);
2006         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2007         OUT_BCS_BATCH(batch, id);
2008         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2009         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2010         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2011         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2012         ADVANCE_BCS_BATCH(batch);
2013     }
2014 }
2015
2016 static const int va_to_gen7_jpeg_qm[5] = {
2017     -1,
2018     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2019     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2020     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2021     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2022 };
2023
2024 static void
2025 gen7_mfd_jpeg_qm_state(VADriverContextP ctx,
2026                        struct decode_state *decode_state,
2027                        struct gen7_mfd_context *gen7_mfd_context)
2028 {
2029     VAPictureParameterBufferJPEGBaseline *pic_param;
2030     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2031     int index;
2032
2033     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2034         return;
2035
2036     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2037     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2038
2039     assert(pic_param->num_components <= 3);
2040
2041     for (index = 0; index < pic_param->num_components; index++) {
2042         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2043         int qm_type;
2044         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2045         unsigned char raster_qm[64];
2046         int j;
2047
2048         if (id > 4 || id < 1)
2049             continue;
2050
2051         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2052             continue;
2053
2054         qm_type = va_to_gen7_jpeg_qm[id];
2055
2056         for (j = 0; j < 64; j++)
2057             raster_qm[zigzag_direct[j]] = qm[j];
2058
2059         gen7_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2060     }
2061 }
2062
2063 static void
2064 gen7_mfd_jpeg_bsd_object(VADriverContextP ctx,
2065                          VAPictureParameterBufferJPEGBaseline *pic_param,
2066                          VASliceParameterBufferJPEGBaseline *slice_param,
2067                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2068                          dri_bo *slice_data_bo,
2069                          struct gen7_mfd_context *gen7_mfd_context)
2070 {
2071     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2072     int scan_component_mask = 0;
2073     int i;
2074
2075     assert(slice_param->num_components > 0);
2076     assert(slice_param->num_components < 4);
2077     assert(slice_param->num_components <= pic_param->num_components);
2078
2079     for (i = 0; i < slice_param->num_components; i++) {
2080         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2081         case 1:
2082             scan_component_mask |= (1 << 0);
2083             break;
2084         case 2:
2085             scan_component_mask |= (1 << 1);
2086             break;
2087         case 3:
2088             scan_component_mask |= (1 << 2);
2089             break;
2090         default:
2091             assert(0);
2092             break;
2093         }
2094     }
2095
2096     BEGIN_BCS_BATCH(batch, 6);
2097     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2098     OUT_BCS_BATCH(batch, 
2099                   slice_param->slice_data_size);
2100     OUT_BCS_BATCH(batch, 
2101                   slice_param->slice_data_offset);
2102     OUT_BCS_BATCH(batch,
2103                   slice_param->slice_horizontal_position << 16 |
2104                   slice_param->slice_vertical_position << 0);
2105     OUT_BCS_BATCH(batch,
2106                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2107                   (scan_component_mask << 27) |                 /* scan components */
2108                   (0 << 26) |   /* disable interrupt allowed */
2109                   (slice_param->num_mcus << 0));                /* MCU count */
2110     OUT_BCS_BATCH(batch,
2111                   (slice_param->restart_interval << 0));    /* RestartInterval */
2112     ADVANCE_BCS_BATCH(batch);
2113 }
2114
2115 /* Workaround for JPEG decoding on Ivybridge */
2116
2117 static struct {
2118     int width;
2119     int height;
2120     unsigned char data[32];
2121     int data_size;
2122     int data_bit_offset;
2123     int qp;
2124 } gen7_jpeg_wa_clip = {
2125     16,
2126     16,
2127     {
2128         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2129         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2130     },
2131     14,
2132     40,
2133     28,
2134 };
2135
2136 static void
2137 gen7_jpeg_wa_init(VADriverContextP ctx,
2138                   struct gen7_mfd_context *gen7_mfd_context)
2139 {
2140     struct i965_driver_data *i965 = i965_driver_data(ctx);
2141     VAStatus status;
2142     struct object_surface *obj_surface;
2143
2144     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2145         i965_DestroySurfaces(ctx,
2146                              &gen7_mfd_context->jpeg_wa_surface_id,
2147                              1);
2148
2149     status = i965_CreateSurfaces(ctx,
2150                                  gen7_jpeg_wa_clip.width,
2151                                  gen7_jpeg_wa_clip.height,
2152                                  VA_RT_FORMAT_YUV420,
2153                                  1,
2154                                  &gen7_mfd_context->jpeg_wa_surface_id);
2155     assert(status == VA_STATUS_SUCCESS);
2156
2157     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2158     assert(obj_surface);
2159     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
2160     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2161
2162     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2163         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2164                                                                "JPEG WA data",
2165                                                                0x1000,
2166                                                                0x1000);
2167         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2168                        0,
2169                        gen7_jpeg_wa_clip.data_size,
2170                        gen7_jpeg_wa_clip.data);
2171     }
2172 }
2173
2174 static void
2175 gen7_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2176                               struct gen7_mfd_context *gen7_mfd_context)
2177 {
2178     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2179
2180     BEGIN_BCS_BATCH(batch, 5);
2181     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2182     OUT_BCS_BATCH(batch,
2183                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2184                   (MFD_MODE_VLD << 15) | /* VLD mode */
2185                   (0 << 10) | /* disable Stream-Out */
2186                   (0 << 9)  | /* Post Deblocking Output */
2187                   (1 << 8)  | /* Pre Deblocking Output */
2188                   (0 << 5)  | /* not in stitch mode */
2189                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2190                   (MFX_FORMAT_AVC << 0));
2191     OUT_BCS_BATCH(batch,
2192                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2193                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2194                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2195                   (0 << 1)  |
2196                   (0 << 0));
2197     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2198     OUT_BCS_BATCH(batch, 0); /* reserved */
2199     ADVANCE_BCS_BATCH(batch);
2200 }
2201
2202 static void
2203 gen7_jpeg_wa_surface_state(VADriverContextP ctx,
2204                            struct gen7_mfd_context *gen7_mfd_context)
2205 {
2206     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2207     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2208
2209     BEGIN_BCS_BATCH(batch, 6);
2210     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2211     OUT_BCS_BATCH(batch, 0);
2212     OUT_BCS_BATCH(batch,
2213                   ((obj_surface->orig_width - 1) << 18) |
2214                   ((obj_surface->orig_height - 1) << 4));
2215     OUT_BCS_BATCH(batch,
2216                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2217                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2218                   (0 << 22) | /* surface object control state, ignored */
2219                   ((obj_surface->width - 1) << 3) | /* pitch */
2220                   (0 << 2)  | /* must be 0 */
2221                   (1 << 1)  | /* must be tiled */
2222                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2223     OUT_BCS_BATCH(batch,
2224                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2225                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2226     OUT_BCS_BATCH(batch,
2227                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2228                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2229     ADVANCE_BCS_BATCH(batch);
2230 }
2231
2232 static void
2233 gen7_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2234                                  struct gen7_mfd_context *gen7_mfd_context)
2235 {
2236     struct i965_driver_data *i965 = i965_driver_data(ctx);
2237     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2238     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2239     dri_bo *intra_bo;
2240     int i;
2241
2242     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2243                             "intra row store",
2244                             128 * 64,
2245                             0x1000);
2246
2247     BEGIN_BCS_BATCH(batch, 24);
2248     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
2249     OUT_BCS_RELOC(batch,
2250                   obj_surface->bo,
2251                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2252                   0);
2253     
2254     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2255
2256     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2257     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2258
2259     OUT_BCS_RELOC(batch,
2260                   intra_bo,
2261                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2262                   0);
2263
2264     OUT_BCS_BATCH(batch, 0);
2265
2266     /* DW 7..22 */
2267     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2268         OUT_BCS_BATCH(batch, 0);
2269     }
2270
2271     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
2272     ADVANCE_BCS_BATCH(batch);
2273
2274     dri_bo_unreference(intra_bo);
2275 }
2276
2277 static void
2278 gen7_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2279                                      struct gen7_mfd_context *gen7_mfd_context)
2280 {
2281     struct i965_driver_data *i965 = i965_driver_data(ctx);
2282     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2283     dri_bo *bsd_mpc_bo, *mpr_bo;
2284
2285     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2286                               "bsd mpc row store",
2287                               11520, /* 1.5 * 120 * 64 */
2288                               0x1000);
2289
2290     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2291                           "mpr row store",
2292                           7680, /* 1. 0 * 120 * 64 */
2293                           0x1000);
2294
2295     BEGIN_BCS_BATCH(batch, 4);
2296     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
2297
2298     OUT_BCS_RELOC(batch,
2299                   bsd_mpc_bo,
2300                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2301                   0);
2302
2303     OUT_BCS_RELOC(batch,
2304                   mpr_bo,
2305                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2306                   0);
2307     OUT_BCS_BATCH(batch, 0);
2308
2309     ADVANCE_BCS_BATCH(batch);
2310
2311     dri_bo_unreference(bsd_mpc_bo);
2312     dri_bo_unreference(mpr_bo);
2313 }
2314
2315 static void
2316 gen7_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2317                           struct gen7_mfd_context *gen7_mfd_context)
2318 {
2319
2320 }
2321
2322 static void
2323 gen7_jpeg_wa_avc_img_state(VADriverContextP ctx,
2324                            struct gen7_mfd_context *gen7_mfd_context)
2325 {
2326     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2327     int img_struct = 0;
2328     int mbaff_frame_flag = 0;
2329     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2330
2331     BEGIN_BCS_BATCH(batch, 16);
2332     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2333     OUT_BCS_BATCH(batch, 
2334                   (width_in_mbs * height_in_mbs - 1));
2335     OUT_BCS_BATCH(batch, 
2336                   ((height_in_mbs - 1) << 16) | 
2337                   ((width_in_mbs - 1) << 0));
2338     OUT_BCS_BATCH(batch, 
2339                   (0 << 24) |
2340                   (0 << 16) |
2341                   (0 << 14) |
2342                   (0 << 13) |
2343                   (0 << 12) | /* differ from GEN6 */
2344                   (0 << 10) |
2345                   (img_struct << 8));
2346     OUT_BCS_BATCH(batch,
2347                   (1 << 10) | /* 4:2:0 */
2348                   (1 << 7) |  /* CABAC */
2349                   (0 << 6) |
2350                   (0 << 5) |
2351                   (0 << 4) |
2352                   (0 << 3) |
2353                   (1 << 2) |
2354                   (mbaff_frame_flag << 1) |
2355                   (0 << 0));
2356     OUT_BCS_BATCH(batch, 0);
2357     OUT_BCS_BATCH(batch, 0);
2358     OUT_BCS_BATCH(batch, 0);
2359     OUT_BCS_BATCH(batch, 0);
2360     OUT_BCS_BATCH(batch, 0);
2361     OUT_BCS_BATCH(batch, 0);
2362     OUT_BCS_BATCH(batch, 0);
2363     OUT_BCS_BATCH(batch, 0);
2364     OUT_BCS_BATCH(batch, 0);
2365     OUT_BCS_BATCH(batch, 0);
2366     OUT_BCS_BATCH(batch, 0);
2367     ADVANCE_BCS_BATCH(batch);
2368 }
2369
2370 static void
2371 gen7_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2372                                   struct gen7_mfd_context *gen7_mfd_context)
2373 {
2374     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2375     int i;
2376
2377     BEGIN_BCS_BATCH(batch, 69);
2378     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
2379
2380     /* reference surfaces 0..15 */
2381     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2382         OUT_BCS_BATCH(batch, 0); /* top */
2383         OUT_BCS_BATCH(batch, 0); /* bottom */
2384     }
2385
2386     /* the current decoding frame/field */
2387     OUT_BCS_BATCH(batch, 0); /* top */
2388     OUT_BCS_BATCH(batch, 0); /* bottom */
2389
2390     /* POC List */
2391     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2392         OUT_BCS_BATCH(batch, 0);
2393         OUT_BCS_BATCH(batch, 0);
2394     }
2395
2396     OUT_BCS_BATCH(batch, 0);
2397     OUT_BCS_BATCH(batch, 0);
2398
2399     ADVANCE_BCS_BATCH(batch);
2400 }
2401
2402 static void
2403 gen7_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2404                                      struct gen7_mfd_context *gen7_mfd_context)
2405 {
2406     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2407
2408     BEGIN_BCS_BATCH(batch, 11);
2409     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2410     OUT_BCS_RELOC(batch,
2411                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2412                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2413                   0);
2414     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2415     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2416     OUT_BCS_BATCH(batch, 0);
2417     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2418     OUT_BCS_BATCH(batch, 0);
2419     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2420     OUT_BCS_BATCH(batch, 0);
2421     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2422     OUT_BCS_BATCH(batch, 0);
2423     ADVANCE_BCS_BATCH(batch);
2424 }
2425
2426 static void
2427 gen7_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2428                             struct gen7_mfd_context *gen7_mfd_context)
2429 {
2430     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2431
2432     /* the input bitsteam format on GEN7 differs from GEN6 */
2433     BEGIN_BCS_BATCH(batch, 6);
2434     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2435     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2436     OUT_BCS_BATCH(batch, 0);
2437     OUT_BCS_BATCH(batch,
2438                   (0 << 31) |
2439                   (0 << 14) |
2440                   (0 << 12) |
2441                   (0 << 10) |
2442                   (0 << 8));
2443     OUT_BCS_BATCH(batch,
2444                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2445                   (0 << 5)  |
2446                   (0 << 4)  |
2447                   (1 << 3) | /* LastSlice Flag */
2448                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2449     OUT_BCS_BATCH(batch, 0);
2450     ADVANCE_BCS_BATCH(batch);
2451 }
2452
2453 static void
2454 gen7_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2455                              struct gen7_mfd_context *gen7_mfd_context)
2456 {
2457     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2458     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2459     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2460     int first_mb_in_slice = 0;
2461     int slice_type = SLICE_TYPE_I;
2462
2463     BEGIN_BCS_BATCH(batch, 11);
2464     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2465     OUT_BCS_BATCH(batch, slice_type);
2466     OUT_BCS_BATCH(batch, 
2467                   (num_ref_idx_l1 << 24) |
2468                   (num_ref_idx_l0 << 16) |
2469                   (0 << 8) |
2470                   (0 << 0));
2471     OUT_BCS_BATCH(batch, 
2472                   (0 << 29) |
2473                   (1 << 27) |   /* disable Deblocking */
2474                   (0 << 24) |
2475                   (gen7_jpeg_wa_clip.qp << 16) |
2476                   (0 << 8) |
2477                   (0 << 0));
2478     OUT_BCS_BATCH(batch, 
2479                   (slice_ver_pos << 24) |
2480                   (slice_hor_pos << 16) | 
2481                   (first_mb_in_slice << 0));
2482     OUT_BCS_BATCH(batch,
2483                   (next_slice_ver_pos << 16) |
2484                   (next_slice_hor_pos << 0));
2485     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2486     OUT_BCS_BATCH(batch, 0);
2487     OUT_BCS_BATCH(batch, 0);
2488     OUT_BCS_BATCH(batch, 0);
2489     OUT_BCS_BATCH(batch, 0);
2490     ADVANCE_BCS_BATCH(batch);
2491 }
2492
2493 static void
2494 gen7_mfd_jpeg_wa(VADriverContextP ctx,
2495                  struct gen7_mfd_context *gen7_mfd_context)
2496 {
2497     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2498     gen7_jpeg_wa_init(ctx, gen7_mfd_context);
2499     intel_batchbuffer_emit_mi_flush(batch);
2500     gen7_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2501     gen7_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2502     gen7_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2503     gen7_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2504     gen7_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2505     gen7_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2506     gen7_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2507
2508     gen7_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2509     gen7_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2510     gen7_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2511 }
2512
2513 void
2514 gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
2515                              struct decode_state *decode_state,
2516                              struct gen7_mfd_context *gen7_mfd_context)
2517 {
2518     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2519     VAPictureParameterBufferJPEGBaseline *pic_param;
2520     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
2521     dri_bo *slice_data_bo;
2522     int i, j, max_selector = 0;
2523
2524     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2525     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2526
2527     /* Currently only support Baseline DCT */
2528     gen7_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2529     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2530     gen7_mfd_jpeg_wa(ctx, gen7_mfd_context);
2531     intel_batchbuffer_emit_mi_flush(batch);
2532     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2533     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2534     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2535     gen7_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2536     gen7_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2537
2538     for (j = 0; j < decode_state->num_slice_params; j++) {
2539         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2540         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2541         slice_data_bo = decode_state->slice_datas[j]->bo;
2542         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2543
2544         if (j == decode_state->num_slice_params - 1)
2545             next_slice_group_param = NULL;
2546         else
2547             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2548
2549         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2550             int component;
2551
2552             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2553
2554             if (i < decode_state->slice_params[j]->num_elements - 1)
2555                 next_slice_param = slice_param + 1;
2556             else
2557                 next_slice_param = next_slice_group_param;
2558
2559             for (component = 0; component < slice_param->num_components; component++) {
2560                 if (max_selector < slice_param->components[component].dc_table_selector)
2561                     max_selector = slice_param->components[component].dc_table_selector;
2562
2563                 if (max_selector < slice_param->components[component].ac_table_selector)
2564                     max_selector = slice_param->components[component].ac_table_selector;
2565             }
2566
2567             slice_param++;
2568         }
2569     }
2570
2571     assert(max_selector < 2);
2572     gen7_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2573
2574     for (j = 0; j < decode_state->num_slice_params; j++) {
2575         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2576         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
2577         slice_data_bo = decode_state->slice_datas[j]->bo;
2578         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2579
2580         if (j == decode_state->num_slice_params - 1)
2581             next_slice_group_param = NULL;
2582         else
2583             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
2584
2585         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2586             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2587
2588             if (i < decode_state->slice_params[j]->num_elements - 1)
2589                 next_slice_param = slice_param + 1;
2590             else
2591                 next_slice_param = next_slice_group_param;
2592
2593             gen7_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2594             slice_param++;
2595         }
2596     }
2597
2598     intel_batchbuffer_end_atomic(batch);
2599     intel_batchbuffer_flush(batch);
2600 }
2601
2602 static VAStatus
2603 gen7_mfd_decode_picture(VADriverContextP ctx, 
2604                         VAProfile profile, 
2605                         union codec_state *codec_state,
2606                         struct hw_context *hw_context)
2607
2608 {
2609     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2610     struct decode_state *decode_state = &codec_state->decode;
2611     VAStatus vaStatus;
2612
2613     assert(gen7_mfd_context);
2614
2615     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
2616
2617     if (vaStatus != VA_STATUS_SUCCESS)
2618         goto out;
2619
2620     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
2621
2622     switch (profile) {
2623     case VAProfileMPEG2Simple:
2624     case VAProfileMPEG2Main:
2625         gen7_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
2626         break;
2627         
2628     case VAProfileH264ConstrainedBaseline:
2629     case VAProfileH264Main:
2630     case VAProfileH264High:
2631     case VAProfileH264StereoHigh:
2632         gen7_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
2633         break;
2634
2635     case VAProfileVC1Simple:
2636     case VAProfileVC1Main:
2637     case VAProfileVC1Advanced:
2638         gen7_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
2639         break;
2640
2641     case VAProfileJPEGBaseline:
2642         gen7_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
2643         break;
2644
2645     default:
2646         assert(0);
2647         break;
2648     }
2649
2650     vaStatus = VA_STATUS_SUCCESS;
2651
2652 out:
2653     return vaStatus;
2654 }
2655
2656 static void
2657 gen7_mfd_context_destroy(void *hw_context)
2658 {
2659     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2660
2661     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2662     gen7_mfd_context->post_deblocking_output.bo = NULL;
2663
2664     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2665     gen7_mfd_context->pre_deblocking_output.bo = NULL;
2666
2667     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2668     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2669
2670     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2671     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2672
2673     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2674     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2675
2676     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2677     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2678
2679     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
2680     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2681
2682     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
2683
2684     intel_batchbuffer_free(gen7_mfd_context->base.batch);
2685     free(gen7_mfd_context);
2686 }
2687
2688 static void gen7_mfd_mpeg2_context_init(VADriverContextP ctx,
2689                                     struct gen7_mfd_context *gen7_mfd_context)
2690 {
2691     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
2692     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
2693     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
2694     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
2695 }
2696
2697 struct hw_context *
2698 gen7_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
2699 {
2700     struct intel_driver_data *intel = intel_driver_data(ctx);
2701     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
2702     int i;
2703
2704     gen7_mfd_context->base.destroy = gen7_mfd_context_destroy;
2705     gen7_mfd_context->base.run = gen7_mfd_decode_picture;
2706     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
2707
2708     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
2709         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
2710         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
2711         gen7_mfd_context->reference_surface[i].obj_surface = NULL;
2712     }
2713
2714     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
2715     gen7_mfd_context->jpeg_wa_surface_object = NULL;
2716
2717     switch (obj_config->profile) {
2718     case VAProfileMPEG2Simple:
2719     case VAProfileMPEG2Main:
2720         gen7_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
2721         break;
2722
2723     case VAProfileH264ConstrainedBaseline:
2724     case VAProfileH264Main:
2725     case VAProfileH264High:
2726     case VAProfileH264StereoHigh:
2727         gen7_mfd_avc_context_init(ctx, gen7_mfd_context);
2728         break;
2729     default:
2730         break;
2731     }
2732     return (struct hw_context *)gen7_mfd_context;
2733 }