add yuyv->nv12 conversion in image processing
[profile/ivi/vaapi-intel-driver.git] / src / gen7_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <assert.h>
33 #include <va/va_dec_jpeg.h>
34
35 #include "intel_batchbuffer.h"
36 #include "intel_driver.h"
37
38 #include "i965_defines.h"
39 #include "i965_drv_video.h"
40 #include "i965_decoder_utils.h"
41
42 #include "gen7_mfd.h"
43
44 static const uint32_t zigzag_direct[64] = {
45     0,   1,  8, 16,  9,  2,  3, 10,
46     17, 24, 32, 25, 18, 11,  4,  5,
47     12, 19, 26, 33, 40, 48, 41, 34,
48     27, 20, 13,  6,  7, 14, 21, 28,
49     35, 42, 49, 56, 57, 50, 43, 36,
50     29, 22, 15, 23, 30, 37, 44, 51,
51     58, 59, 52, 45, 38, 31, 39, 46,
52     53, 60, 61, 54, 47, 55, 62, 63
53 };
54
55 static void
56 gen7_mfd_avc_frame_store_index(VADriverContextP ctx,
57                                VAPictureParameterBufferH264 *pic_param,
58                                struct gen7_mfd_context *gen7_mfd_context)
59 {
60     struct i965_driver_data *i965 = i965_driver_data(ctx);
61     int i, j;
62
63     assert(ARRAY_ELEMS(gen7_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
64
65     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
66         int found = 0;
67
68         if (gen7_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
69             continue;
70
71         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
72             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
73             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
74                 continue;
75
76             if (gen7_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
77                 found = 1;
78                 break;
79             }
80         }
81
82         if (!found) {
83             struct object_surface *obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
84             obj_surface->flags &= ~SURFACE_REFERENCED;
85
86             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
87                 dri_bo_unreference(obj_surface->bo);
88                 obj_surface->bo = NULL;
89                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
90             }
91
92             if (obj_surface->free_private_data)
93                 obj_surface->free_private_data(&obj_surface->private_data);
94
95             gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
96             gen7_mfd_context->reference_surface[i].frame_store_id = -1;
97         }
98     }
99
100     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
101         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
102         int found = 0;
103
104         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
105             continue;
106
107         for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
108             if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
109                 continue;
110             
111             if (gen7_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
112                 found = 1;
113                 break;
114             }
115         }
116
117         if (!found) {
118             int frame_idx;
119             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
120             
121             assert(obj_surface);
122             i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
123
124             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface); frame_idx++) {
125                 for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
126                     if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
127                         continue;
128
129                     if (gen7_mfd_context->reference_surface[j].frame_store_id == frame_idx)
130                         break;
131                 }
132
133                 if (j == ARRAY_ELEMS(gen7_mfd_context->reference_surface))
134                     break;
135             }
136
137             assert(frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface));
138
139             for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
140                 if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
141                     gen7_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
142                     gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
143                     break;
144                 }
145             }
146         }
147     }
148
149     /* sort */
150     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface) - 1; i++) {
151         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
152             gen7_mfd_context->reference_surface[i].frame_store_id == i)
153             continue;
154
155         for (j = i + 1; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
156             if (gen7_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
157                 gen7_mfd_context->reference_surface[j].frame_store_id == i) {
158                 VASurfaceID id = gen7_mfd_context->reference_surface[i].surface_id;
159                 int frame_idx = gen7_mfd_context->reference_surface[i].frame_store_id;
160
161                 gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[j].surface_id;
162                 gen7_mfd_context->reference_surface[i].frame_store_id = gen7_mfd_context->reference_surface[j].frame_store_id;
163                 gen7_mfd_context->reference_surface[j].surface_id = id;
164                 gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
165                 break;
166             }
167         }
168     }
169 }
170
171 static void 
172 gen7_mfd_free_avc_surface(void **data)
173 {
174     struct gen7_avc_surface *gen7_avc_surface = *data;
175
176     if (!gen7_avc_surface)
177         return;
178
179     dri_bo_unreference(gen7_avc_surface->dmv_top);
180     gen7_avc_surface->dmv_top = NULL;
181     dri_bo_unreference(gen7_avc_surface->dmv_bottom);
182     gen7_avc_surface->dmv_bottom = NULL;
183
184     free(gen7_avc_surface);
185     *data = NULL;
186 }
187
188 static void
189 gen7_mfd_init_avc_surface(VADriverContextP ctx, 
190                           VAPictureParameterBufferH264 *pic_param,
191                           struct object_surface *obj_surface)
192 {
193     struct i965_driver_data *i965 = i965_driver_data(ctx);
194     struct gen7_avc_surface *gen7_avc_surface = obj_surface->private_data;
195     int width_in_mbs, height_in_mbs;
196
197     obj_surface->free_private_data = gen7_mfd_free_avc_surface;
198     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
199     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
200
201     if (!gen7_avc_surface) {
202         gen7_avc_surface = calloc(sizeof(struct gen7_avc_surface), 1);
203         assert((obj_surface->size & 0x3f) == 0);
204         obj_surface->private_data = gen7_avc_surface;
205     }
206
207     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
208                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
209
210     if (gen7_avc_surface->dmv_top == NULL) {
211         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
212                                                  "direct mv w/r buffer",
213                                                  width_in_mbs * height_in_mbs * 64,
214                                                  0x1000);
215     }
216
217     if (gen7_avc_surface->dmv_bottom_flag &&
218         gen7_avc_surface->dmv_bottom == NULL) {
219         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
220                                                     "direct mv w/r buffer",
221                                                     width_in_mbs * height_in_mbs * 64,                                                    
222                                                     0x1000);
223     }
224 }
225
226 static void
227 gen7_mfd_pipe_mode_select(VADriverContextP ctx,
228                           struct decode_state *decode_state,
229                           int standard_select,
230                           struct gen7_mfd_context *gen7_mfd_context)
231 {
232     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
233
234     assert(standard_select == MFX_FORMAT_MPEG2 ||
235            standard_select == MFX_FORMAT_AVC ||
236            standard_select == MFX_FORMAT_VC1 ||
237            standard_select == MFX_FORMAT_JPEG);
238
239     BEGIN_BCS_BATCH(batch, 5);
240     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
241     OUT_BCS_BATCH(batch,
242                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
243                   (MFD_MODE_VLD << 15) | /* VLD mode */
244                   (0 << 10) | /* disable Stream-Out */
245                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
246                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
247                   (0 << 5)  | /* not in stitch mode */
248                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
249                   (standard_select << 0));
250     OUT_BCS_BATCH(batch,
251                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
252                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
253                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
254                   (0 << 1)  |
255                   (0 << 0));
256     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
257     OUT_BCS_BATCH(batch, 0); /* reserved */
258     ADVANCE_BCS_BATCH(batch);
259 }
260
261 static void
262 gen7_mfd_surface_state(VADriverContextP ctx,
263                        struct decode_state *decode_state,
264                        int standard_select,
265                        struct gen7_mfd_context *gen7_mfd_context)
266 {
267     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
268     struct i965_driver_data *i965 = i965_driver_data(ctx);
269     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
270     unsigned int y_cb_offset;
271     unsigned int y_cr_offset;
272
273     assert(obj_surface);
274
275     y_cb_offset = obj_surface->y_cb_offset;
276     y_cr_offset = obj_surface->y_cr_offset;
277
278     BEGIN_BCS_BATCH(batch, 6);
279     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
280     OUT_BCS_BATCH(batch, 0);
281     OUT_BCS_BATCH(batch,
282                   ((obj_surface->orig_height - 1) << 18) |
283                   ((obj_surface->orig_width - 1) << 4));
284     OUT_BCS_BATCH(batch,
285                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
286                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
287                   (0 << 22) | /* surface object control state, ignored */
288                   ((obj_surface->width - 1) << 3) | /* pitch */
289                   (0 << 2)  | /* must be 0 */
290                   (1 << 1)  | /* must be tiled */
291                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
292     OUT_BCS_BATCH(batch,
293                   (0 << 16) | /* X offset for U(Cb), must be 0 */
294                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
295     OUT_BCS_BATCH(batch,
296                   (0 << 16) | /* X offset for V(Cr), must be 0 */
297                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
298     ADVANCE_BCS_BATCH(batch);
299 }
300
301 static void
302 gen7_mfd_pipe_buf_addr_state(VADriverContextP ctx,
303                              struct decode_state *decode_state,
304                              int standard_select,
305                              struct gen7_mfd_context *gen7_mfd_context)
306 {
307     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
308     struct i965_driver_data *i965 = i965_driver_data(ctx);
309     int i;
310
311     BEGIN_BCS_BATCH(batch, 24);
312     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
313     if (gen7_mfd_context->pre_deblocking_output.valid)
314         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
315                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
316                       0);
317     else
318         OUT_BCS_BATCH(batch, 0);
319
320     if (gen7_mfd_context->post_deblocking_output.valid)
321         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
322                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
323                       0);
324     else
325         OUT_BCS_BATCH(batch, 0);
326
327     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
328     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
329
330     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
331         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
332                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
333                       0);
334     else
335         OUT_BCS_BATCH(batch, 0);
336
337     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
338         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
339                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
340                       0);
341     else
342         OUT_BCS_BATCH(batch, 0);
343
344     /* DW 7..22 */
345     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
346         struct object_surface *obj_surface;
347
348         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
349             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
350             assert(obj_surface && obj_surface->bo);
351
352             OUT_BCS_RELOC(batch, obj_surface->bo,
353                           I915_GEM_DOMAIN_INSTRUCTION, 0,
354                           0);
355         } else {
356             OUT_BCS_BATCH(batch, 0);
357         }
358     }
359
360     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
361     ADVANCE_BCS_BATCH(batch);
362 }
363
364 static void
365 gen7_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
366                                  dri_bo *slice_data_bo,
367                                  int standard_select,
368                                  struct gen7_mfd_context *gen7_mfd_context)
369 {
370     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
371
372     BEGIN_BCS_BATCH(batch, 11);
373     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
374     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
375     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
376     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
377     OUT_BCS_BATCH(batch, 0);
378     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
379     OUT_BCS_BATCH(batch, 0);
380     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
381     OUT_BCS_BATCH(batch, 0);
382     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
383     OUT_BCS_BATCH(batch, 0);
384     ADVANCE_BCS_BATCH(batch);
385 }
386
387 static void
388 gen7_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
389                                  struct decode_state *decode_state,
390                                  int standard_select,
391                                  struct gen7_mfd_context *gen7_mfd_context)
392 {
393     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
394
395     BEGIN_BCS_BATCH(batch, 4);
396     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
397
398     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
399         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
400                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
401                       0);
402     else
403         OUT_BCS_BATCH(batch, 0);
404
405     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
406         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
407                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
408                       0);
409     else
410         OUT_BCS_BATCH(batch, 0);
411
412     if (gen7_mfd_context->bitplane_read_buffer.valid)
413         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
414                       I915_GEM_DOMAIN_INSTRUCTION, 0,
415                       0);
416     else
417         OUT_BCS_BATCH(batch, 0);
418
419     ADVANCE_BCS_BATCH(batch);
420 }
421
422 #if 0
423 static void
424 gen7_mfd_aes_state(VADriverContextP ctx,
425                    struct decode_state *decode_state,
426                    int standard_select)
427 {
428     /* FIXME */
429 }
430 #endif
431
432 static void
433 gen7_mfd_qm_state(VADriverContextP ctx,
434                   int qm_type,
435                   unsigned char *qm,
436                   int qm_length,
437                   struct gen7_mfd_context *gen7_mfd_context)
438 {
439     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
440     unsigned int qm_buffer[16];
441
442     assert(qm_length <= 16 * 4);
443     memcpy(qm_buffer, qm, qm_length);
444
445     BEGIN_BCS_BATCH(batch, 18);
446     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
447     OUT_BCS_BATCH(batch, qm_type << 0);
448     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
449     ADVANCE_BCS_BATCH(batch);
450 }
451
452 #if 0
453 static void
454 gen7_mfd_wait(VADriverContextP ctx,
455               struct decode_state *decode_state,
456               int standard_select,
457               struct gen7_mfd_context *gen7_mfd_context)
458 {
459     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
460
461     BEGIN_BCS_BATCH(batch, 1);
462     OUT_BCS_BATCH(batch, MFX_WAIT | (1 << 8));
463     ADVANCE_BCS_BATCH(batch);
464 }
465 #endif
466
467 static void
468 gen7_mfd_avc_img_state(VADriverContextP ctx,
469                        struct decode_state *decode_state,
470                        struct gen7_mfd_context *gen7_mfd_context)
471 {
472     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
473     int img_struct;
474     int mbaff_frame_flag;
475     unsigned int width_in_mbs, height_in_mbs;
476     VAPictureParameterBufferH264 *pic_param;
477
478     assert(decode_state->pic_param && decode_state->pic_param->buffer);
479     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
480     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
481
482     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
483         img_struct = 1;
484     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
485         img_struct = 3;
486     else
487         img_struct = 0;
488
489     if ((img_struct & 0x1) == 0x1) {
490         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
491     } else {
492         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
493     }
494
495     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
496         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
497         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
498     } else {
499         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
500     }
501
502     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
503                         !pic_param->pic_fields.bits.field_pic_flag);
504
505     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
506     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
507
508     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
509     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
510            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
511     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
512
513     BEGIN_BCS_BATCH(batch, 16);
514     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
515     OUT_BCS_BATCH(batch, 
516                   width_in_mbs * height_in_mbs);
517     OUT_BCS_BATCH(batch, 
518                   ((height_in_mbs - 1) << 16) | 
519                   ((width_in_mbs - 1) << 0));
520     OUT_BCS_BATCH(batch, 
521                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
522                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
523                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
524                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
525                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
526                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
527                   (img_struct << 8));
528     OUT_BCS_BATCH(batch,
529                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
530                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
531                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
532                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
533                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
534                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
535                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
536                   (mbaff_frame_flag << 1) |
537                   (pic_param->pic_fields.bits.field_pic_flag << 0));
538     OUT_BCS_BATCH(batch, 0);
539     OUT_BCS_BATCH(batch, 0);
540     OUT_BCS_BATCH(batch, 0);
541     OUT_BCS_BATCH(batch, 0);
542     OUT_BCS_BATCH(batch, 0);
543     OUT_BCS_BATCH(batch, 0);
544     OUT_BCS_BATCH(batch, 0);
545     OUT_BCS_BATCH(batch, 0);
546     OUT_BCS_BATCH(batch, 0);
547     OUT_BCS_BATCH(batch, 0);
548     OUT_BCS_BATCH(batch, 0);
549     ADVANCE_BCS_BATCH(batch);
550 }
551
552 static void
553 gen7_mfd_avc_qm_state(VADriverContextP ctx,
554                       struct decode_state *decode_state,
555                       struct gen7_mfd_context *gen7_mfd_context)
556 {
557     VAIQMatrixBufferH264 *iq_matrix;
558     VAPictureParameterBufferH264 *pic_param;
559
560     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
561         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
562     else
563         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
564
565     assert(decode_state->pic_param && decode_state->pic_param->buffer);
566     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
567
568     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
569     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
570
571     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
572         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
573         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
574     }
575 }
576
577 static void
578 gen7_mfd_avc_directmode_state(VADriverContextP ctx,
579                               VAPictureParameterBufferH264 *pic_param,
580                               VASliceParameterBufferH264 *slice_param,
581                               struct gen7_mfd_context *gen7_mfd_context)
582 {
583     struct i965_driver_data *i965 = i965_driver_data(ctx);
584     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
585     struct object_surface *obj_surface;
586     struct gen7_avc_surface *gen7_avc_surface;
587     VAPictureH264 *va_pic;
588     int i, j;
589
590     BEGIN_BCS_BATCH(batch, 69);
591     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
592
593     /* reference surfaces 0..15 */
594     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
595         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
596             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
597             assert(obj_surface);
598             gen7_avc_surface = obj_surface->private_data;
599
600             if (gen7_avc_surface == NULL) {
601                 OUT_BCS_BATCH(batch, 0);
602                 OUT_BCS_BATCH(batch, 0);
603             } else {
604                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
605                               I915_GEM_DOMAIN_INSTRUCTION, 0,
606                               0);
607
608                 if (gen7_avc_surface->dmv_bottom_flag == 1)
609                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
610                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
611                                   0);
612                 else
613                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
614                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
615                                   0);
616             }
617         } else {
618             OUT_BCS_BATCH(batch, 0);
619             OUT_BCS_BATCH(batch, 0);
620         }
621     }
622
623     /* the current decoding frame/field */
624     va_pic = &pic_param->CurrPic;
625     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
626     obj_surface = SURFACE(va_pic->picture_id);
627     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
628     gen7_avc_surface = obj_surface->private_data;
629
630     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
631                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
632                   0);
633
634     if (gen7_avc_surface->dmv_bottom_flag == 1)
635         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
636                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
637                       0);
638     else
639         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
640                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
641                       0);
642
643     /* POC List */
644     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
645         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
646             int found = 0;
647             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
648                 va_pic = &pic_param->ReferenceFrames[j];
649                 
650                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
651                     continue;
652
653                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
654                     found = 1;
655                     break;
656                 }
657             }
658
659             assert(found == 1);
660             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
661             
662             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
663             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
664         } else {
665             OUT_BCS_BATCH(batch, 0);
666             OUT_BCS_BATCH(batch, 0);
667         }
668     }
669
670     va_pic = &pic_param->CurrPic;
671     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
672     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
673
674     ADVANCE_BCS_BATCH(batch);
675 }
676
677 static void
678 gen7_mfd_avc_slice_state(VADriverContextP ctx,
679                          VAPictureParameterBufferH264 *pic_param,
680                          VASliceParameterBufferH264 *slice_param,
681                          VASliceParameterBufferH264 *next_slice_param,
682                          struct gen7_mfd_context *gen7_mfd_context)
683 {
684     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
685     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
686     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
687     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
688     int num_ref_idx_l0, num_ref_idx_l1;
689     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
690                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
691     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
692     int slice_type;
693
694     if (slice_param->slice_type == SLICE_TYPE_I ||
695         slice_param->slice_type == SLICE_TYPE_SI) {
696         slice_type = SLICE_TYPE_I;
697     } else if (slice_param->slice_type == SLICE_TYPE_P ||
698                slice_param->slice_type == SLICE_TYPE_SP) {
699         slice_type = SLICE_TYPE_P;
700     } else { 
701         assert(slice_param->slice_type == SLICE_TYPE_B);
702         slice_type = SLICE_TYPE_B;
703     }
704
705     if (slice_type == SLICE_TYPE_I) {
706         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
707         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
708         num_ref_idx_l0 = 0;
709         num_ref_idx_l1 = 0;
710     } else if (slice_type == SLICE_TYPE_P) {
711         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
712         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
713         num_ref_idx_l1 = 0;
714     } else {
715         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
716         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
717     }
718
719     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
720     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
721     slice_ver_pos = first_mb_in_slice / width_in_mbs;
722
723     if (next_slice_param) {
724         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
725         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
726         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
727     } else {
728         next_slice_hor_pos = 0;
729         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
730     }
731
732     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
733     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
734     OUT_BCS_BATCH(batch, slice_type);
735     OUT_BCS_BATCH(batch, 
736                   (num_ref_idx_l1 << 24) |
737                   (num_ref_idx_l0 << 16) |
738                   (slice_param->chroma_log2_weight_denom << 8) |
739                   (slice_param->luma_log2_weight_denom << 0));
740     OUT_BCS_BATCH(batch, 
741                   (slice_param->direct_spatial_mv_pred_flag << 29) |
742                   (slice_param->disable_deblocking_filter_idc << 27) |
743                   (slice_param->cabac_init_idc << 24) |
744                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
745                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
746                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
747     OUT_BCS_BATCH(batch, 
748                   (slice_ver_pos << 24) |
749                   (slice_hor_pos << 16) | 
750                   (first_mb_in_slice << 0));
751     OUT_BCS_BATCH(batch,
752                   (next_slice_ver_pos << 16) |
753                   (next_slice_hor_pos << 0));
754     OUT_BCS_BATCH(batch, 
755                   (next_slice_param == NULL) << 19); /* last slice flag */
756     OUT_BCS_BATCH(batch, 0);
757     OUT_BCS_BATCH(batch, 0);
758     OUT_BCS_BATCH(batch, 0);
759     OUT_BCS_BATCH(batch, 0);
760     ADVANCE_BCS_BATCH(batch);
761 }
762
763 static inline void
764 gen7_mfd_avc_ref_idx_state(VADriverContextP ctx,
765                            VAPictureParameterBufferH264 *pic_param,
766                            VASliceParameterBufferH264 *slice_param,
767                            struct gen7_mfd_context *gen7_mfd_context)
768 {
769     gen6_send_avc_ref_idx_state(
770         gen7_mfd_context->base.batch,
771         slice_param,
772         gen7_mfd_context->reference_surface
773     );
774 }
775
776 static void
777 gen7_mfd_avc_weightoffset_state(VADriverContextP ctx,
778                                 VAPictureParameterBufferH264 *pic_param,
779                                 VASliceParameterBufferH264 *slice_param,
780                                 struct gen7_mfd_context *gen7_mfd_context)
781 {
782     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
783     int i, j, num_weight_offset_table = 0;
784     short weightoffsets[32 * 6];
785
786     if ((slice_param->slice_type == SLICE_TYPE_P ||
787          slice_param->slice_type == SLICE_TYPE_SP) &&
788         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
789         num_weight_offset_table = 1;
790     }
791     
792     if ((slice_param->slice_type == SLICE_TYPE_B) &&
793         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
794         num_weight_offset_table = 2;
795     }
796
797     for (i = 0; i < num_weight_offset_table; i++) {
798         BEGIN_BCS_BATCH(batch, 98);
799         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
800         OUT_BCS_BATCH(batch, i);
801
802         if (i == 0) {
803             for (j = 0; j < 32; j++) {
804                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
805                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
806                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
807                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
808                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
809                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
810             }
811         } else {
812             for (j = 0; j < 32; j++) {
813                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
814                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
815                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
816                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
817                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
818                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
819             }
820         }
821
822         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
823         ADVANCE_BCS_BATCH(batch);
824     }
825 }
826
827 static int
828 gen7_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
829 {
830     int out_slice_data_bit_offset;
831     int slice_header_size = in_slice_data_bit_offset / 8;
832     int i, j;
833
834     for (i = 0, j = 0; i < slice_header_size; i++, j++) {
835         if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
836             i++, j += 2;
837         }
838     }
839
840     out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
841
842     if (mode_flag == ENTROPY_CABAC)
843         out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
844
845     return out_slice_data_bit_offset;
846 }
847
848 static void
849 gen7_mfd_avc_bsd_object(VADriverContextP ctx,
850                         VAPictureParameterBufferH264 *pic_param,
851                         VASliceParameterBufferH264 *slice_param,
852                         dri_bo *slice_data_bo,
853                         VASliceParameterBufferH264 *next_slice_param,
854                         struct gen7_mfd_context *gen7_mfd_context)
855 {
856     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
857     int slice_data_bit_offset;
858     uint8_t *slice_data = NULL;
859
860     dri_bo_map(slice_data_bo, 0);
861     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
862     slice_data_bit_offset = gen7_mfd_avc_get_slice_bit_offset(slice_data,
863                                                               pic_param->pic_fields.bits.entropy_coding_mode_flag,
864                                                               slice_param->slice_data_bit_offset);
865     dri_bo_unmap(slice_data_bo);
866
867     /* the input bitsteam format on GEN7 differs from GEN6 */
868     BEGIN_BCS_BATCH(batch, 6);
869     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
870     OUT_BCS_BATCH(batch, 
871                   (slice_param->slice_data_size));
872     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
873     OUT_BCS_BATCH(batch,
874                   (0 << 31) |
875                   (0 << 14) |
876                   (0 << 12) |
877                   (0 << 10) |
878                   (0 << 8));
879     OUT_BCS_BATCH(batch,
880                   ((slice_data_bit_offset >> 3) << 16) |
881                   (0 << 5)  |
882                   (0 << 4)  |
883                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
884                   (slice_data_bit_offset & 0x7));
885     OUT_BCS_BATCH(batch, 0);
886     ADVANCE_BCS_BATCH(batch);
887 }
888
889 static inline void
890 gen7_mfd_avc_context_init(
891     VADriverContextP         ctx,
892     struct gen7_mfd_context *gen7_mfd_context
893 )
894 {
895     /* Initialize flat scaling lists */
896     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
897 }
898
899 static void
900 gen7_mfd_avc_decode_init(VADriverContextP ctx,
901                          struct decode_state *decode_state,
902                          struct gen7_mfd_context *gen7_mfd_context)
903 {
904     VAPictureParameterBufferH264 *pic_param;
905     VASliceParameterBufferH264 *slice_param;
906     VAPictureH264 *va_pic;
907     struct i965_driver_data *i965 = i965_driver_data(ctx);
908     struct object_surface *obj_surface;
909     dri_bo *bo;
910     int i, j, enable_avc_ildb = 0;
911     int width_in_mbs;
912
913     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
914         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
915         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
916
917         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
918             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
919             assert((slice_param->slice_type == SLICE_TYPE_I) ||
920                    (slice_param->slice_type == SLICE_TYPE_SI) ||
921                    (slice_param->slice_type == SLICE_TYPE_P) ||
922                    (slice_param->slice_type == SLICE_TYPE_SP) ||
923                    (slice_param->slice_type == SLICE_TYPE_B));
924
925             if (slice_param->disable_deblocking_filter_idc != 1) {
926                 enable_avc_ildb = 1;
927                 break;
928             }
929
930             slice_param++;
931         }
932     }
933
934     assert(decode_state->pic_param && decode_state->pic_param->buffer);
935     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
936     gen7_mfd_avc_frame_store_index(ctx, pic_param, gen7_mfd_context);
937     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
938
939     /* Current decoded picture */
940     va_pic = &pic_param->CurrPic;
941     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
942     obj_surface = SURFACE(va_pic->picture_id);
943     assert(obj_surface);
944     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
945     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
946     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
947     gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface);
948
949     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
950     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
951     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
952     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
953
954     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
955     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
956     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
957     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
958
959     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
960     bo = dri_bo_alloc(i965->intel.bufmgr,
961                       "intra row store",
962                       width_in_mbs * 64,
963                       0x1000);
964     assert(bo);
965     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
966     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
967
968     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
969     bo = dri_bo_alloc(i965->intel.bufmgr,
970                       "deblocking filter row store",
971                       width_in_mbs * 64 * 4,
972                       0x1000);
973     assert(bo);
974     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
975     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
976
977     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
978     bo = dri_bo_alloc(i965->intel.bufmgr,
979                       "bsd mpc row store",
980                       width_in_mbs * 64 * 2,
981                       0x1000);
982     assert(bo);
983     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
984     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
985
986     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
987     bo = dri_bo_alloc(i965->intel.bufmgr,
988                       "mpr row store",
989                       width_in_mbs * 64 * 2,
990                       0x1000);
991     assert(bo);
992     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
993     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
994
995     gen7_mfd_context->bitplane_read_buffer.valid = 0;
996 }
997
998 static void
999 gen7_mfd_avc_decode_picture(VADriverContextP ctx,
1000                             struct decode_state *decode_state,
1001                             struct gen7_mfd_context *gen7_mfd_context)
1002 {
1003     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1004     VAPictureParameterBufferH264 *pic_param;
1005     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
1006     dri_bo *slice_data_bo;
1007     int i, j;
1008
1009     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1010     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1011     gen7_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
1012
1013     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1014     intel_batchbuffer_emit_mi_flush(batch);
1015     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1016     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1017     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1018     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1019     gen7_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
1020     gen7_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
1021
1022     for (j = 0; j < decode_state->num_slice_params; j++) {
1023         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1024         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1025         slice_data_bo = decode_state->slice_datas[j]->bo;
1026         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
1027
1028         if (j == decode_state->num_slice_params - 1)
1029             next_slice_group_param = NULL;
1030         else
1031             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1032
1033         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1034             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1035             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1036                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1037                    (slice_param->slice_type == SLICE_TYPE_P) ||
1038                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1039                    (slice_param->slice_type == SLICE_TYPE_B));
1040
1041             if (i < decode_state->slice_params[j]->num_elements - 1)
1042                 next_slice_param = slice_param + 1;
1043             else
1044                 next_slice_param = next_slice_group_param;
1045
1046             gen7_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen7_mfd_context);
1047             gen7_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
1048             gen7_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
1049             gen7_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1050             gen7_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
1051             slice_param++;
1052         }
1053     }
1054
1055     intel_batchbuffer_end_atomic(batch);
1056     intel_batchbuffer_flush(batch);
1057 }
1058
1059 static void
1060 gen7_mfd_mpeg2_decode_init(VADriverContextP ctx,
1061                            struct decode_state *decode_state,
1062                            struct gen7_mfd_context *gen7_mfd_context)
1063 {
1064     VAPictureParameterBufferMPEG2 *pic_param;
1065     struct i965_driver_data *i965 = i965_driver_data(ctx);
1066     struct object_surface *obj_surface;
1067     dri_bo *bo;
1068     unsigned int width_in_mbs;
1069
1070     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1071     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1072     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1073
1074     mpeg2_set_reference_surfaces(
1075         ctx,
1076         gen7_mfd_context->reference_surface,
1077         decode_state,
1078         pic_param
1079     );
1080
1081     /* Current decoded picture */
1082     obj_surface = SURFACE(decode_state->current_render_target);
1083     assert(obj_surface);
1084     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1085
1086     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1087     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1088     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1089     gen7_mfd_context->pre_deblocking_output.valid = 1;
1090
1091     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1092     bo = dri_bo_alloc(i965->intel.bufmgr,
1093                       "bsd mpc row store",
1094                       width_in_mbs * 96,
1095                       0x1000);
1096     assert(bo);
1097     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1098     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1099
1100     gen7_mfd_context->post_deblocking_output.valid = 0;
1101     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1102     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1103     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1104     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1105 }
1106
1107 static void
1108 gen7_mfd_mpeg2_pic_state(VADriverContextP ctx,
1109                          struct decode_state *decode_state,
1110                          struct gen7_mfd_context *gen7_mfd_context)
1111 {
1112     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1113     VAPictureParameterBufferMPEG2 *pic_param;
1114
1115     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1116     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1117
1118     BEGIN_BCS_BATCH(batch, 13);
1119     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1120     OUT_BCS_BATCH(batch,
1121                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1122                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1123                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1124                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1125                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1126                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1127                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1128                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1129                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1130                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1131                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1132                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1133     OUT_BCS_BATCH(batch,
1134                   pic_param->picture_coding_type << 9);
1135     OUT_BCS_BATCH(batch,
1136                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1137                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1138     OUT_BCS_BATCH(batch, 0);
1139     OUT_BCS_BATCH(batch, 0);
1140     OUT_BCS_BATCH(batch, 0);
1141     OUT_BCS_BATCH(batch, 0);
1142     OUT_BCS_BATCH(batch, 0);
1143     OUT_BCS_BATCH(batch, 0);
1144     OUT_BCS_BATCH(batch, 0);
1145     OUT_BCS_BATCH(batch, 0);
1146     OUT_BCS_BATCH(batch, 0);
1147     ADVANCE_BCS_BATCH(batch);
1148 }
1149
1150 static void
1151 gen7_mfd_mpeg2_qm_state(VADriverContextP ctx,
1152                         struct decode_state *decode_state,
1153                         struct gen7_mfd_context *gen7_mfd_context)
1154 {
1155     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1156     int i, j;
1157
1158     /* Update internal QM state */
1159     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1160         VAIQMatrixBufferMPEG2 * const iq_matrix =
1161             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1162
1163         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1164             iq_matrix->load_intra_quantiser_matrix) {
1165             gen_iq_matrix->load_intra_quantiser_matrix =
1166                 iq_matrix->load_intra_quantiser_matrix;
1167             if (iq_matrix->load_intra_quantiser_matrix) {
1168                 for (j = 0; j < 64; j++)
1169                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1170                         iq_matrix->intra_quantiser_matrix[j];
1171             }
1172         }
1173
1174         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1175             iq_matrix->load_non_intra_quantiser_matrix) {
1176             gen_iq_matrix->load_non_intra_quantiser_matrix =
1177                 iq_matrix->load_non_intra_quantiser_matrix;
1178             if (iq_matrix->load_non_intra_quantiser_matrix) {
1179                 for (j = 0; j < 64; j++)
1180                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1181                         iq_matrix->non_intra_quantiser_matrix[j];
1182             }
1183         }
1184     }
1185
1186     /* Commit QM state to HW */
1187     for (i = 0; i < 2; i++) {
1188         unsigned char *qm = NULL;
1189         int qm_type;
1190
1191         if (i == 0) {
1192             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1193                 qm = gen_iq_matrix->intra_quantiser_matrix;
1194                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1195             }
1196         } else {
1197             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1198                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1199                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1200             }
1201         }
1202
1203         if (!qm)
1204             continue;
1205
1206         gen7_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1207     }
1208 }
1209
1210 static void
1211 gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1212                           VAPictureParameterBufferMPEG2 *pic_param,
1213                           VASliceParameterBufferMPEG2 *slice_param,
1214                           VASliceParameterBufferMPEG2 *next_slice_param,
1215                           struct gen7_mfd_context *gen7_mfd_context)
1216 {
1217     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1218     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1219     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1220
1221     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1222         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1223         is_field_pic = 1;
1224     is_field_pic_wa = is_field_pic &&
1225         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1226
1227     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1228     hpos0 = slice_param->slice_horizontal_position;
1229
1230     if (next_slice_param == NULL) {
1231         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1232         hpos1 = 0;
1233     } else {
1234         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1235         hpos1 = next_slice_param->slice_horizontal_position;
1236     }
1237
1238     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1239
1240     BEGIN_BCS_BATCH(batch, 5);
1241     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1242     OUT_BCS_BATCH(batch, 
1243                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1244     OUT_BCS_BATCH(batch, 
1245                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1246     OUT_BCS_BATCH(batch,
1247                   hpos0 << 24 |
1248                   vpos0 << 16 |
1249                   mb_count << 8 |
1250                   (next_slice_param == NULL) << 5 |
1251                   (next_slice_param == NULL) << 3 |
1252                   (slice_param->macroblock_offset & 0x7));
1253     OUT_BCS_BATCH(batch,
1254                   slice_param->quantiser_scale_code << 24);
1255     ADVANCE_BCS_BATCH(batch);
1256 }
1257
1258 static void
1259 gen7_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1260                               struct decode_state *decode_state,
1261                               struct gen7_mfd_context *gen7_mfd_context)
1262 {
1263     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1264     VAPictureParameterBufferMPEG2 *pic_param;
1265     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1266     dri_bo *slice_data_bo;
1267     int i, j;
1268
1269     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1270     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1271
1272     gen7_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1273     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1274     intel_batchbuffer_emit_mi_flush(batch);
1275     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1276     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1277     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1278     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1279     gen7_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1280     gen7_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1281
1282     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1283         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1284             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1285
1286     for (j = 0; j < decode_state->num_slice_params; j++) {
1287         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1288         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1289         slice_data_bo = decode_state->slice_datas[j]->bo;
1290         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1291
1292         if (j == decode_state->num_slice_params - 1)
1293             next_slice_group_param = NULL;
1294         else
1295             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1296
1297         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1298             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1299
1300             if (i < decode_state->slice_params[j]->num_elements - 1)
1301                 next_slice_param = slice_param + 1;
1302             else
1303                 next_slice_param = next_slice_group_param;
1304
1305             gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1306             slice_param++;
1307         }
1308     }
1309
1310     intel_batchbuffer_end_atomic(batch);
1311     intel_batchbuffer_flush(batch);
1312 }
1313
1314 static const int va_to_gen7_vc1_pic_type[5] = {
1315     GEN7_VC1_I_PICTURE,
1316     GEN7_VC1_P_PICTURE,
1317     GEN7_VC1_B_PICTURE,
1318     GEN7_VC1_BI_PICTURE,
1319     GEN7_VC1_P_PICTURE,
1320 };
1321
1322 static const int va_to_gen7_vc1_mv[4] = {
1323     1, /* 1-MV */
1324     2, /* 1-MV half-pel */
1325     3, /* 1-MV half-pef bilinear */
1326     0, /* Mixed MV */
1327 };
1328
1329 static const int b_picture_scale_factor[21] = {
1330     128, 85,  170, 64,  192,
1331     51,  102, 153, 204, 43,
1332     215, 37,  74,  111, 148,
1333     185, 222, 32,  96,  160, 
1334     224,
1335 };
1336
1337 static const int va_to_gen7_vc1_condover[3] = {
1338     0,
1339     2,
1340     3
1341 };
1342
1343 static const int va_to_gen7_vc1_profile[4] = {
1344     GEN7_VC1_SIMPLE_PROFILE,
1345     GEN7_VC1_MAIN_PROFILE,
1346     GEN7_VC1_RESERVED_PROFILE,
1347     GEN7_VC1_ADVANCED_PROFILE
1348 };
1349
1350 static void 
1351 gen7_mfd_free_vc1_surface(void **data)
1352 {
1353     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1354
1355     if (!gen7_vc1_surface)
1356         return;
1357
1358     dri_bo_unreference(gen7_vc1_surface->dmv);
1359     free(gen7_vc1_surface);
1360     *data = NULL;
1361 }
1362
1363 static void
1364 gen7_mfd_init_vc1_surface(VADriverContextP ctx, 
1365                           VAPictureParameterBufferVC1 *pic_param,
1366                           struct object_surface *obj_surface)
1367 {
1368     struct i965_driver_data *i965 = i965_driver_data(ctx);
1369     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1370     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1371     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1372
1373     obj_surface->free_private_data = gen7_mfd_free_vc1_surface;
1374
1375     if (!gen7_vc1_surface) {
1376         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1377         assert((obj_surface->size & 0x3f) == 0);
1378         obj_surface->private_data = gen7_vc1_surface;
1379     }
1380
1381     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1382
1383     if (gen7_vc1_surface->dmv == NULL) {
1384         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1385                                              "direct mv w/r buffer",
1386                                              width_in_mbs * height_in_mbs * 64,
1387                                              0x1000);
1388     }
1389 }
1390
1391 static void
1392 gen7_mfd_vc1_decode_init(VADriverContextP ctx,
1393                          struct decode_state *decode_state,
1394                          struct gen7_mfd_context *gen7_mfd_context)
1395 {
1396     VAPictureParameterBufferVC1 *pic_param;
1397     struct i965_driver_data *i965 = i965_driver_data(ctx);
1398     struct object_surface *obj_surface;
1399     int i;
1400     dri_bo *bo;
1401     int width_in_mbs;
1402
1403     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1404     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1405     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1406
1407     /* reference picture */
1408     obj_surface = SURFACE(pic_param->forward_reference_picture);
1409
1410     if (obj_surface && obj_surface->bo)
1411         gen7_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
1412     else
1413         gen7_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
1414
1415     obj_surface = SURFACE(pic_param->backward_reference_picture);
1416
1417     if (obj_surface && obj_surface->bo)
1418         gen7_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
1419     else
1420         gen7_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
1421
1422     /* must do so !!! */
1423     for (i = 2; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++)
1424         gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[i % 2].surface_id;
1425
1426     /* Current decoded picture */
1427     obj_surface = SURFACE(decode_state->current_render_target);
1428     assert(obj_surface);
1429     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1430     gen7_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1431
1432     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1433     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1434     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1435     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1436
1437     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1438     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1439     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1440     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1441
1442     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1443     bo = dri_bo_alloc(i965->intel.bufmgr,
1444                       "intra row store",
1445                       width_in_mbs * 64,
1446                       0x1000);
1447     assert(bo);
1448     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1449     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1450
1451     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1452     bo = dri_bo_alloc(i965->intel.bufmgr,
1453                       "deblocking filter row store",
1454                       width_in_mbs * 6 * 64,
1455                       0x1000);
1456     assert(bo);
1457     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1458     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1459
1460     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1461     bo = dri_bo_alloc(i965->intel.bufmgr,
1462                       "bsd mpc row store",
1463                       width_in_mbs * 96,
1464                       0x1000);
1465     assert(bo);
1466     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1467     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1468
1469     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1470
1471     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1472     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1473     
1474     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1475         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1476         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1477         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1478         int src_w, src_h;
1479         uint8_t *src = NULL, *dst = NULL;
1480
1481         assert(decode_state->bit_plane->buffer);
1482         src = decode_state->bit_plane->buffer;
1483
1484         bo = dri_bo_alloc(i965->intel.bufmgr,
1485                           "VC-1 Bitplane",
1486                           bitplane_width * bitplane_width,
1487                           0x1000);
1488         assert(bo);
1489         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1490
1491         dri_bo_map(bo, True);
1492         assert(bo->virtual);
1493         dst = bo->virtual;
1494
1495         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1496             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1497                 int src_index, dst_index;
1498                 int src_shift;
1499                 uint8_t src_value;
1500
1501                 src_index = (src_h * width_in_mbs + src_w) / 2;
1502                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1503                 src_value = ((src[src_index] >> src_shift) & 0xf);
1504
1505                 dst_index = src_w / 2;
1506                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1507             }
1508
1509             if (src_w & 1)
1510                 dst[src_w / 2] >>= 4;
1511
1512             dst += bitplane_width;
1513         }
1514
1515         dri_bo_unmap(bo);
1516     } else
1517         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1518 }
1519
1520 static void
1521 gen7_mfd_vc1_pic_state(VADriverContextP ctx,
1522                        struct decode_state *decode_state,
1523                        struct gen7_mfd_context *gen7_mfd_context)
1524 {
1525     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1526     VAPictureParameterBufferVC1 *pic_param;
1527     struct i965_driver_data *i965 = i965_driver_data(ctx);
1528     struct object_surface *obj_surface;
1529     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1530     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1531     int unified_mv_mode;
1532     int ref_field_pic_polarity = 0;
1533     int scale_factor = 0;
1534     int trans_ac_y = 0;
1535     int dmv_surface_valid = 0;
1536     int brfd = 0;
1537     int fcm = 0;
1538     int picture_type;
1539     int profile;
1540     int overlap;
1541     int interpolation_mode = 0;
1542
1543     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1544     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1545
1546     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1547     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1548     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1549     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1550     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1551     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1552     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1553     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1554
1555     if (dquant == 0) {
1556         alt_pquant_config = 0;
1557         alt_pquant_edge_mask = 0;
1558     } else if (dquant == 2) {
1559         alt_pquant_config = 1;
1560         alt_pquant_edge_mask = 0xf;
1561     } else {
1562         assert(dquant == 1);
1563         if (dquantfrm == 0) {
1564             alt_pquant_config = 0;
1565             alt_pquant_edge_mask = 0;
1566             alt_pq = 0;
1567         } else {
1568             assert(dquantfrm == 1);
1569             alt_pquant_config = 1;
1570
1571             switch (dqprofile) {
1572             case 3:
1573                 if (dqbilevel == 0) {
1574                     alt_pquant_config = 2;
1575                     alt_pquant_edge_mask = 0;
1576                 } else {
1577                     assert(dqbilevel == 1);
1578                     alt_pquant_config = 3;
1579                     alt_pquant_edge_mask = 0;
1580                 }
1581                 break;
1582                 
1583             case 0:
1584                 alt_pquant_edge_mask = 0xf;
1585                 break;
1586
1587             case 1:
1588                 if (dqdbedge == 3)
1589                     alt_pquant_edge_mask = 0x9;
1590                 else
1591                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1592
1593                 break;
1594
1595             case 2:
1596                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1597                 break;
1598
1599             default:
1600                 assert(0);
1601             }
1602         }
1603     }
1604
1605     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1606         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1607         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1608     } else {
1609         assert(pic_param->mv_fields.bits.mv_mode < 4);
1610         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1611     }
1612
1613     if (pic_param->sequence_fields.bits.interlace == 1 &&
1614         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1615         /* FIXME: calculate reference field picture polarity */
1616         assert(0);
1617         ref_field_pic_polarity = 0;
1618     }
1619
1620     if (pic_param->b_picture_fraction < 21)
1621         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1622
1623     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1624     
1625     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1626         picture_type == GEN7_VC1_I_PICTURE)
1627         picture_type = GEN7_VC1_BI_PICTURE;
1628
1629     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1630         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1631     else
1632         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1633
1634
1635     if (picture_type == GEN7_VC1_B_PICTURE) {
1636         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1637
1638         obj_surface = SURFACE(pic_param->backward_reference_picture);
1639         assert(obj_surface);
1640         gen7_vc1_surface = obj_surface->private_data;
1641
1642         if (!gen7_vc1_surface || 
1643             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1644              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1645             dmv_surface_valid = 0;
1646         else
1647             dmv_surface_valid = 1;
1648     }
1649
1650     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1651
1652     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1653         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1654     else {
1655         if (pic_param->picture_fields.bits.top_field_first)
1656             fcm = 2;
1657         else
1658             fcm = 3;
1659     }
1660
1661     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1662         brfd = pic_param->reference_fields.bits.reference_distance;
1663         brfd = (scale_factor * brfd) >> 8;
1664         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1665
1666         if (brfd < 0)
1667             brfd = 0;
1668     }
1669
1670     overlap = pic_param->sequence_fields.bits.overlap;
1671     if (profile != GEN7_VC1_ADVANCED_PROFILE && pic_param->pic_quantizer_fields.bits.pic_quantizer_scale < 9)
1672         overlap = 0;
1673
1674     assert(pic_param->conditional_overlap_flag < 3);
1675     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1676
1677     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1678         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1679          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1680         interpolation_mode = 9; /* Half-pel bilinear */
1681     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1682              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1683               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1684         interpolation_mode = 1; /* Half-pel bicubic */
1685     else
1686         interpolation_mode = 0; /* Quarter-pel bicubic */
1687
1688     BEGIN_BCS_BATCH(batch, 6);
1689     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1690     OUT_BCS_BATCH(batch,
1691                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1692                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1693     OUT_BCS_BATCH(batch,
1694                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1695                   dmv_surface_valid << 15 |
1696                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1697                   pic_param->rounding_control << 13 |
1698                   pic_param->sequence_fields.bits.syncmarker << 12 |
1699                   interpolation_mode << 8 |
1700                   0 << 7 | /* FIXME: scale up or down ??? */
1701                   pic_param->range_reduction_frame << 6 |
1702                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1703                   overlap << 4 |
1704                   !pic_param->picture_fields.bits.is_first_field << 3 |
1705                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1706     OUT_BCS_BATCH(batch,
1707                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1708                   picture_type << 26 |
1709                   fcm << 24 |
1710                   alt_pq << 16 |
1711                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1712                   scale_factor << 0);
1713     OUT_BCS_BATCH(batch,
1714                   unified_mv_mode << 28 |
1715                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1716                   pic_param->fast_uvmc_flag << 26 |
1717                   ref_field_pic_polarity << 25 |
1718                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1719                   pic_param->reference_fields.bits.reference_distance << 20 |
1720                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1721                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1722                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1723                   alt_pquant_edge_mask << 4 |
1724                   alt_pquant_config << 2 |
1725                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1726                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1727     OUT_BCS_BATCH(batch,
1728                   !!pic_param->bitplane_present.value << 31 |
1729                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1730                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1731                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1732                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1733                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1734                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1735                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1736                   pic_param->mv_fields.bits.mv_table << 20 |
1737                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1738                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1739                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1740                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1741                   pic_param->mb_mode_table << 8 |
1742                   trans_ac_y << 6 |
1743                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1744                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1745                   pic_param->cbp_table << 0);
1746     ADVANCE_BCS_BATCH(batch);
1747 }
1748
1749 static void
1750 gen7_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1751                              struct decode_state *decode_state,
1752                              struct gen7_mfd_context *gen7_mfd_context)
1753 {
1754     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1755     VAPictureParameterBufferVC1 *pic_param;
1756     int intensitycomp_single;
1757
1758     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1759     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1760
1761     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1762     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1763     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1764
1765     BEGIN_BCS_BATCH(batch, 6);
1766     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1767     OUT_BCS_BATCH(batch,
1768                   0 << 14 | /* FIXME: double ??? */
1769                   0 << 12 |
1770                   intensitycomp_single << 10 |
1771                   intensitycomp_single << 8 |
1772                   0 << 4 | /* FIXME: interlace mode */
1773                   0);
1774     OUT_BCS_BATCH(batch,
1775                   pic_param->luma_shift << 16 |
1776                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1777     OUT_BCS_BATCH(batch, 0);
1778     OUT_BCS_BATCH(batch, 0);
1779     OUT_BCS_BATCH(batch, 0);
1780     ADVANCE_BCS_BATCH(batch);
1781 }
1782
1783
1784 static void
1785 gen7_mfd_vc1_directmode_state(VADriverContextP ctx,
1786                               struct decode_state *decode_state,
1787                               struct gen7_mfd_context *gen7_mfd_context)
1788 {
1789     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1790     VAPictureParameterBufferVC1 *pic_param;
1791     struct i965_driver_data *i965 = i965_driver_data(ctx);
1792     struct object_surface *obj_surface;
1793     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1794
1795     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1796     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1797
1798     obj_surface = SURFACE(decode_state->current_render_target);
1799
1800     if (obj_surface && obj_surface->private_data) {
1801         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1802     }
1803
1804     obj_surface = SURFACE(pic_param->backward_reference_picture);
1805
1806     if (obj_surface && obj_surface->private_data) {
1807         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1808     }
1809
1810     BEGIN_BCS_BATCH(batch, 3);
1811     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
1812
1813     if (dmv_write_buffer)
1814         OUT_BCS_RELOC(batch, dmv_write_buffer,
1815                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1816                       0);
1817     else
1818         OUT_BCS_BATCH(batch, 0);
1819
1820     if (dmv_read_buffer)
1821         OUT_BCS_RELOC(batch, dmv_read_buffer,
1822                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1823                       0);
1824     else
1825         OUT_BCS_BATCH(batch, 0);
1826                   
1827     ADVANCE_BCS_BATCH(batch);
1828 }
1829
1830 static int
1831 gen7_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
1832 {
1833     int out_slice_data_bit_offset;
1834     int slice_header_size = in_slice_data_bit_offset / 8;
1835     int i, j;
1836
1837     if (profile != 3)
1838         out_slice_data_bit_offset = in_slice_data_bit_offset;
1839     else {
1840         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
1841             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
1842                 i++, j += 2;
1843             }
1844         }
1845
1846         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
1847     }
1848
1849     return out_slice_data_bit_offset;
1850 }
1851
1852 static void
1853 gen7_mfd_vc1_bsd_object(VADriverContextP ctx,
1854                         VAPictureParameterBufferVC1 *pic_param,
1855                         VASliceParameterBufferVC1 *slice_param,
1856                         VASliceParameterBufferVC1 *next_slice_param,
1857                         dri_bo *slice_data_bo,
1858                         struct gen7_mfd_context *gen7_mfd_context)
1859 {
1860     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1861     int next_slice_start_vert_pos;
1862     int macroblock_offset;
1863     uint8_t *slice_data = NULL;
1864
1865     dri_bo_map(slice_data_bo, 0);
1866     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
1867     macroblock_offset = gen7_mfd_vc1_get_macroblock_bit_offset(slice_data, 
1868                                                                slice_param->macroblock_offset,
1869                                                                pic_param->sequence_fields.bits.profile);
1870     dri_bo_unmap(slice_data_bo);
1871
1872     if (next_slice_param)
1873         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
1874     else
1875         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
1876
1877     BEGIN_BCS_BATCH(batch, 5);
1878     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
1879     OUT_BCS_BATCH(batch, 
1880                   slice_param->slice_data_size - (macroblock_offset >> 3));
1881     OUT_BCS_BATCH(batch, 
1882                   slice_param->slice_data_offset + (macroblock_offset >> 3));
1883     OUT_BCS_BATCH(batch,
1884                   slice_param->slice_vertical_position << 16 |
1885                   next_slice_start_vert_pos << 0);
1886     OUT_BCS_BATCH(batch,
1887                   (macroblock_offset & 0x7));
1888     ADVANCE_BCS_BATCH(batch);
1889 }
1890
1891 static void
1892 gen7_mfd_vc1_decode_picture(VADriverContextP ctx,
1893                             struct decode_state *decode_state,
1894                             struct gen7_mfd_context *gen7_mfd_context)
1895 {
1896     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1897     VAPictureParameterBufferVC1 *pic_param;
1898     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
1899     dri_bo *slice_data_bo;
1900     int i, j;
1901
1902     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1903     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1904
1905     gen7_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
1906     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1907     intel_batchbuffer_emit_mi_flush(batch);
1908     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1909     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1910     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1911     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
1912     gen7_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
1913     gen7_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
1914     gen7_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
1915
1916     for (j = 0; j < decode_state->num_slice_params; j++) {
1917         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1918         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
1919         slice_data_bo = decode_state->slice_datas[j]->bo;
1920         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
1921
1922         if (j == decode_state->num_slice_params - 1)
1923             next_slice_group_param = NULL;
1924         else
1925             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
1926
1927         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1928             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1929
1930             if (i < decode_state->slice_params[j]->num_elements - 1)
1931                 next_slice_param = slice_param + 1;
1932             else
1933                 next_slice_param = next_slice_group_param;
1934
1935             gen7_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
1936             slice_param++;
1937         }
1938     }
1939
1940     intel_batchbuffer_end_atomic(batch);
1941     intel_batchbuffer_flush(batch);
1942 }
1943
1944 static void
1945 gen7_mfd_jpeg_decode_init(VADriverContextP ctx,
1946                           struct decode_state *decode_state,
1947                           struct gen7_mfd_context *gen7_mfd_context)
1948 {
1949     struct i965_driver_data *i965 = i965_driver_data(ctx);
1950     struct object_surface *obj_surface;
1951     VAPictureParameterBufferJPEG *pic_param;
1952     int subsampling = SUBSAMPLE_YUV420;
1953
1954     pic_param = (VAPictureParameterBufferJPEG *)decode_state->pic_param->buffer;
1955
1956     if (pic_param->num_components == 1)
1957         subsampling = SUBSAMPLE_YUV400;
1958     else if (pic_param->num_components == 3) {
1959         int h1 = pic_param->components[0].h_sampling_factor;
1960         int h2 = pic_param->components[1].h_sampling_factor;
1961         int h3 = pic_param->components[2].h_sampling_factor;
1962         int v1 = pic_param->components[0].v_sampling_factor;
1963         int v2 = pic_param->components[1].v_sampling_factor;
1964         int v3 = pic_param->components[2].v_sampling_factor;
1965
1966         if (h1 == 2 && h2 == 1 && h3 == 1 &&
1967             v1 == 2 && v2 == 1 && v3 == 1)
1968             subsampling = SUBSAMPLE_YUV420;
1969         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1970                  v1 == 1 && v2 == 1 && v3 == 1)
1971             subsampling = SUBSAMPLE_YUV422H;
1972         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1973                  v1 == 1 && v2 == 1 && v3 == 1)
1974             subsampling = SUBSAMPLE_YUV444;
1975         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
1976                  v1 == 1 && v2 == 1 && v3 == 1)
1977             subsampling = SUBSAMPLE_YUV411;
1978         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
1979                  v1 == 2 && v2 == 1 && v3 == 1)
1980             subsampling = SUBSAMPLE_YUV422V;
1981         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
1982                  v1 == 2 && v2 == 2 && v3 == 2)
1983             subsampling = SUBSAMPLE_YUV422H;
1984         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
1985                  v1 == 2 && v2 == 1 && v3 == 1)
1986             subsampling = SUBSAMPLE_YUV422V;
1987         else
1988             assert(0);
1989     } else {
1990         assert(0);
1991     }
1992
1993     /* Current decoded picture */
1994     obj_surface = SURFACE(decode_state->current_render_target);
1995     assert(obj_surface);
1996     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling);
1997
1998     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1999     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2000     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2001     gen7_mfd_context->pre_deblocking_output.valid = 1;
2002
2003     gen7_mfd_context->post_deblocking_output.bo = NULL;
2004     gen7_mfd_context->post_deblocking_output.valid = 0;
2005
2006     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2007     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
2008
2009     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2010     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
2011
2012     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2013     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
2014
2015     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2016     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
2017
2018     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2019     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2020 }
2021
2022 static const int va_to_gen7_jpeg_rotation[4] = {
2023     GEN7_JPEG_ROTATION_0,
2024     GEN7_JPEG_ROTATION_90,
2025     GEN7_JPEG_ROTATION_180,
2026     GEN7_JPEG_ROTATION_270
2027 };
2028
2029 static void
2030 gen7_mfd_jpeg_pic_state(VADriverContextP ctx,
2031                         struct decode_state *decode_state,
2032                         struct gen7_mfd_context *gen7_mfd_context)
2033 {
2034     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2035     VAPictureParameterBufferJPEG *pic_param;
2036     int chroma_type = GEN7_YUV420;
2037     int frame_width_in_blks;
2038     int frame_height_in_blks;
2039
2040     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2041     pic_param = (VAPictureParameterBufferJPEG *)decode_state->pic_param->buffer;
2042
2043     if (pic_param->num_components == 1)
2044         chroma_type = GEN7_YUV400;
2045     else if (pic_param->num_components == 3) {
2046         int h1 = pic_param->components[0].h_sampling_factor;
2047         int h2 = pic_param->components[1].h_sampling_factor;
2048         int h3 = pic_param->components[2].h_sampling_factor;
2049         int v1 = pic_param->components[0].v_sampling_factor;
2050         int v2 = pic_param->components[1].v_sampling_factor;
2051         int v3 = pic_param->components[2].v_sampling_factor;
2052
2053         if (h1 == 2 && h2 == 1 && h3 == 1 &&
2054             v1 == 2 && v2 == 1 && v3 == 1)
2055             chroma_type = GEN7_YUV420;
2056         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2057                  v1 == 1 && v2 == 1 && v3 == 1)
2058             chroma_type = GEN7_YUV422H_2Y;
2059         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2060                  v1 == 1 && v2 == 1 && v3 == 1)
2061             chroma_type = GEN7_YUV444;
2062         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2063                  v1 == 1 && v2 == 1 && v3 == 1)
2064             chroma_type = GEN7_YUV411;
2065         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2066                  v1 == 2 && v2 == 1 && v3 == 1)
2067             chroma_type = GEN7_YUV422V_2Y;
2068         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2069                  v1 == 2 && v2 == 2 && v3 == 2)
2070             chroma_type = GEN7_YUV422H_4Y;
2071         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2072                  v1 == 2 && v2 == 1 && v3 == 1)
2073             chroma_type = GEN7_YUV422V_4Y;
2074         else
2075             assert(0);
2076     }
2077
2078     if (chroma_type == GEN7_YUV400 ||
2079         chroma_type == GEN7_YUV444 ||
2080         chroma_type == GEN7_YUV422V_2Y) {
2081         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2082         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2083     } else if (chroma_type == GEN7_YUV411) {
2084         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2085         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2086     } else {
2087         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2088         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2089     }
2090
2091     BEGIN_BCS_BATCH(batch, 3);
2092     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2093     OUT_BCS_BATCH(batch,
2094                   (va_to_gen7_jpeg_rotation[pic_param->rotation] << 4) |    /* rotation */
2095                   (chroma_type << 0));
2096     OUT_BCS_BATCH(batch,
2097                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2098                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2099     ADVANCE_BCS_BATCH(batch);
2100 }
2101
2102 static const int va_to_gen7_jpeg_hufftable[2] = {
2103     MFX_HUFFTABLE_ID_Y,
2104     MFX_HUFFTABLE_ID_UV
2105 };
2106
2107 static void
2108 gen7_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2109                                struct decode_state *decode_state,
2110                                struct gen7_mfd_context *gen7_mfd_context,
2111                                int num_tables)
2112 {
2113     VAHuffmanTableBufferJPEG *huffman_table;
2114     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2115     int index;
2116
2117     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2118         return;
2119
2120     huffman_table = (VAHuffmanTableBufferJPEG *)decode_state->huffman_table->buffer;
2121
2122     for (index = 0; index < num_tables; index++) {
2123         int id = va_to_gen7_jpeg_hufftable[index];
2124         BEGIN_BCS_BATCH(batch, 53);
2125         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2126         OUT_BCS_BATCH(batch, id);
2127         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2128         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2129         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2130         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2131         ADVANCE_BCS_BATCH(batch);
2132     }
2133 }
2134
2135 static const int va_to_gen7_jpeg_qm[5] = {
2136     -1,
2137     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2138     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2139     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2140     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2141 };
2142
2143 static void
2144 gen7_mfd_jpeg_qm_state(VADriverContextP ctx,
2145                        struct decode_state *decode_state,
2146                        struct gen7_mfd_context *gen7_mfd_context)
2147 {
2148     VAPictureParameterBufferJPEG *pic_param;
2149     VAIQMatrixBufferJPEG *iq_matrix;
2150     int index;
2151
2152     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2153         return;
2154
2155     iq_matrix = (VAIQMatrixBufferJPEG *)decode_state->iq_matrix->buffer;
2156     pic_param = (VAPictureParameterBufferJPEG *)decode_state->pic_param->buffer;
2157
2158     assert(pic_param->num_components <= 3);
2159
2160     for (index = 0; index < pic_param->num_components; index++) {
2161         int qm_type = va_to_gen7_jpeg_qm[pic_param->components[index].component_id - pic_param->components[0].component_id + 1];
2162         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2163         unsigned char precision = pic_param->sample_precision;
2164         unsigned char raster_qm[64];
2165         int j;
2166
2167         assert(precision == 8);
2168
2169         for (j = 0; j < 64; j++)
2170             raster_qm[zigzag_direct[j]] = qm[j];
2171
2172         gen7_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2173     }
2174 }
2175
2176 static void
2177 gen7_mfd_jpeg_bsd_object(VADriverContextP ctx,
2178                          VAPictureParameterBufferJPEG *pic_param,
2179                          VASliceParameterBufferJPEG *slice_param,
2180                          VASliceParameterBufferJPEG *next_slice_param,
2181                          dri_bo *slice_data_bo,
2182                          struct gen7_mfd_context *gen7_mfd_context)
2183 {
2184     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2185     int scan_component_mask = 0;
2186     int i;
2187
2188     assert(slice_param->num_components > 0);
2189     assert(slice_param->num_components < 4);
2190     assert(slice_param->num_components <= pic_param->num_components);
2191
2192     for (i = 0; i < slice_param->num_components; i++) {
2193         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2194         case 1:
2195             scan_component_mask |= (1 << 0);
2196             break;
2197         case 2:
2198             scan_component_mask |= (1 << 1);
2199             break;
2200         case 3:
2201             scan_component_mask |= (1 << 2);
2202             break;
2203         default:
2204             assert(0);
2205             break;
2206         }
2207     }
2208
2209     BEGIN_BCS_BATCH(batch, 6);
2210     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2211     OUT_BCS_BATCH(batch, 
2212                   slice_param->slice_data_size);
2213     OUT_BCS_BATCH(batch, 
2214                   slice_param->slice_data_offset);
2215     OUT_BCS_BATCH(batch,
2216                   slice_param->slice_horizontal_position << 16 |
2217                   slice_param->slice_vertical_position << 0);
2218     OUT_BCS_BATCH(batch,
2219                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2220                   (scan_component_mask << 27) |                 /* scan components */
2221                   (0 << 26) |   /* disable interrupt allowed */
2222                   (slice_param->num_mcus << 0));                /* MCU count */
2223     OUT_BCS_BATCH(batch,
2224                   (slice_param->restart_interval << 0));    /* RestartInterval */
2225     ADVANCE_BCS_BATCH(batch);
2226 }
2227
2228 /* Workaround for JPEG decoding on Ivybridge */
2229
2230 VAStatus 
2231 i965_DestroySurfaces(VADriverContextP ctx,
2232                      VASurfaceID *surface_list,
2233                      int num_surfaces);
2234 VAStatus 
2235 i965_CreateSurfaces(VADriverContextP ctx,
2236                     int width,
2237                     int height,
2238                     int format,
2239                     int num_surfaces,
2240                     VASurfaceID *surfaces);
2241
2242 static struct {
2243     int width;
2244     int height;
2245     unsigned char data[32];
2246     int data_size;
2247     int data_bit_offset;
2248     int qp;
2249 } gen7_jpeg_wa_clip = {
2250     16,
2251     16,
2252     {
2253         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2254         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2255     },
2256     14,
2257     40,
2258     28,
2259 };
2260
2261 static void
2262 gen7_jpeg_wa_init(VADriverContextP ctx,
2263                   struct gen7_mfd_context *gen7_mfd_context)
2264 {
2265     struct i965_driver_data *i965 = i965_driver_data(ctx);
2266     VAStatus status;
2267     struct object_surface *obj_surface;
2268
2269     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2270         i965_DestroySurfaces(ctx,
2271                              &gen7_mfd_context->jpeg_wa_surface_id,
2272                              1);
2273
2274     status = i965_CreateSurfaces(ctx,
2275                                  gen7_jpeg_wa_clip.width,
2276                                  gen7_jpeg_wa_clip.height,
2277                                  VA_RT_FORMAT_YUV420,
2278                                  1,
2279                                  &gen7_mfd_context->jpeg_wa_surface_id);
2280     assert(status == VA_STATUS_SUCCESS);
2281
2282     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2283     assert(obj_surface);
2284     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
2285
2286     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2287         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2288                                                                "JPEG WA data",
2289                                                                0x1000,
2290                                                                0x1000);
2291         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2292                        0,
2293                        gen7_jpeg_wa_clip.data_size,
2294                        gen7_jpeg_wa_clip.data);
2295     }
2296 }
2297
2298 static void
2299 gen7_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2300                               struct gen7_mfd_context *gen7_mfd_context)
2301 {
2302     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2303
2304     BEGIN_BCS_BATCH(batch, 5);
2305     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2306     OUT_BCS_BATCH(batch,
2307                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2308                   (MFD_MODE_VLD << 15) | /* VLD mode */
2309                   (0 << 10) | /* disable Stream-Out */
2310                   (0 << 9)  | /* Post Deblocking Output */
2311                   (1 << 8)  | /* Pre Deblocking Output */
2312                   (0 << 5)  | /* not in stitch mode */
2313                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2314                   (MFX_FORMAT_AVC << 0));
2315     OUT_BCS_BATCH(batch,
2316                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2317                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2318                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2319                   (0 << 1)  |
2320                   (0 << 0));
2321     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2322     OUT_BCS_BATCH(batch, 0); /* reserved */
2323     ADVANCE_BCS_BATCH(batch);
2324 }
2325
2326 static void
2327 gen7_jpeg_wa_surface_state(VADriverContextP ctx,
2328                            struct gen7_mfd_context *gen7_mfd_context)
2329 {
2330     struct i965_driver_data *i965 = i965_driver_data(ctx);
2331     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2332     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2333
2334     BEGIN_BCS_BATCH(batch, 6);
2335     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2336     OUT_BCS_BATCH(batch, 0);
2337     OUT_BCS_BATCH(batch,
2338                   ((obj_surface->orig_width - 1) << 18) |
2339                   ((obj_surface->orig_height - 1) << 4));
2340     OUT_BCS_BATCH(batch,
2341                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2342                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2343                   (0 << 22) | /* surface object control state, ignored */
2344                   ((obj_surface->width - 1) << 3) | /* pitch */
2345                   (0 << 2)  | /* must be 0 */
2346                   (1 << 1)  | /* must be tiled */
2347                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2348     OUT_BCS_BATCH(batch,
2349                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2350                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2351     OUT_BCS_BATCH(batch,
2352                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2353                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2354     ADVANCE_BCS_BATCH(batch);
2355 }
2356
2357 static void
2358 gen7_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2359                                  struct gen7_mfd_context *gen7_mfd_context)
2360 {
2361     struct i965_driver_data *i965 = i965_driver_data(ctx);
2362     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2363     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2364     dri_bo *intra_bo;
2365     int i;
2366
2367     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2368                             "intra row store",
2369                             128 * 64,
2370                             0x1000);
2371
2372     BEGIN_BCS_BATCH(batch, 24);
2373     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
2374     OUT_BCS_RELOC(batch,
2375                   obj_surface->bo,
2376                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2377                   0);
2378     
2379     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2380
2381     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2382     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2383
2384     OUT_BCS_RELOC(batch,
2385                   intra_bo,
2386                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2387                   0);
2388
2389     OUT_BCS_BATCH(batch, 0);
2390
2391     /* DW 7..22 */
2392     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2393         OUT_BCS_BATCH(batch, 0);
2394     }
2395
2396     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
2397     ADVANCE_BCS_BATCH(batch);
2398
2399     dri_bo_unreference(intra_bo);
2400 }
2401
2402 static void
2403 gen7_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2404                                      struct gen7_mfd_context *gen7_mfd_context)
2405 {
2406     struct i965_driver_data *i965 = i965_driver_data(ctx);
2407     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2408     dri_bo *bsd_mpc_bo, *mpr_bo;
2409
2410     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2411                               "bsd mpc row store",
2412                               11520, /* 1.5 * 120 * 64 */
2413                               0x1000);
2414
2415     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2416                           "mpr row store",
2417                           7680, /* 1. 0 * 120 * 64 */
2418                           0x1000);
2419
2420     BEGIN_BCS_BATCH(batch, 4);
2421     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
2422
2423     OUT_BCS_RELOC(batch,
2424                   bsd_mpc_bo,
2425                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2426                   0);
2427
2428     OUT_BCS_RELOC(batch,
2429                   mpr_bo,
2430                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2431                   0);
2432     OUT_BCS_BATCH(batch, 0);
2433
2434     ADVANCE_BCS_BATCH(batch);
2435
2436     dri_bo_unreference(bsd_mpc_bo);
2437     dri_bo_unreference(mpr_bo);
2438 }
2439
2440 static void
2441 gen7_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2442                           struct gen7_mfd_context *gen7_mfd_context)
2443 {
2444
2445 }
2446
2447 static void
2448 gen7_jpeg_wa_avc_img_state(VADriverContextP ctx,
2449                            struct gen7_mfd_context *gen7_mfd_context)
2450 {
2451     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2452     int img_struct = 0;
2453     int mbaff_frame_flag = 0;
2454     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2455
2456     BEGIN_BCS_BATCH(batch, 16);
2457     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2458     OUT_BCS_BATCH(batch, 
2459                   width_in_mbs * height_in_mbs);
2460     OUT_BCS_BATCH(batch, 
2461                   ((height_in_mbs - 1) << 16) | 
2462                   ((width_in_mbs - 1) << 0));
2463     OUT_BCS_BATCH(batch, 
2464                   (0 << 24) |
2465                   (0 << 16) |
2466                   (0 << 14) |
2467                   (0 << 13) |
2468                   (0 << 12) | /* differ from GEN6 */
2469                   (0 << 10) |
2470                   (img_struct << 8));
2471     OUT_BCS_BATCH(batch,
2472                   (1 << 10) | /* 4:2:0 */
2473                   (1 << 7) |  /* CABAC */
2474                   (0 << 6) |
2475                   (0 << 5) |
2476                   (0 << 4) |
2477                   (0 << 3) |
2478                   (1 << 2) |
2479                   (mbaff_frame_flag << 1) |
2480                   (0 << 0));
2481     OUT_BCS_BATCH(batch, 0);
2482     OUT_BCS_BATCH(batch, 0);
2483     OUT_BCS_BATCH(batch, 0);
2484     OUT_BCS_BATCH(batch, 0);
2485     OUT_BCS_BATCH(batch, 0);
2486     OUT_BCS_BATCH(batch, 0);
2487     OUT_BCS_BATCH(batch, 0);
2488     OUT_BCS_BATCH(batch, 0);
2489     OUT_BCS_BATCH(batch, 0);
2490     OUT_BCS_BATCH(batch, 0);
2491     OUT_BCS_BATCH(batch, 0);
2492     ADVANCE_BCS_BATCH(batch);
2493 }
2494
2495 static void
2496 gen7_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2497                                   struct gen7_mfd_context *gen7_mfd_context)
2498 {
2499     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2500     int i;
2501
2502     BEGIN_BCS_BATCH(batch, 69);
2503     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
2504
2505     /* reference surfaces 0..15 */
2506     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2507         OUT_BCS_BATCH(batch, 0); /* top */
2508         OUT_BCS_BATCH(batch, 0); /* bottom */
2509     }
2510
2511     /* the current decoding frame/field */
2512     OUT_BCS_BATCH(batch, 0); /* top */
2513     OUT_BCS_BATCH(batch, 0); /* bottom */
2514
2515     /* POC List */
2516     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2517         OUT_BCS_BATCH(batch, 0);
2518         OUT_BCS_BATCH(batch, 0);
2519     }
2520
2521     OUT_BCS_BATCH(batch, 0);
2522     OUT_BCS_BATCH(batch, 0);
2523
2524     ADVANCE_BCS_BATCH(batch);
2525 }
2526
2527 static void
2528 gen7_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2529                                      struct gen7_mfd_context *gen7_mfd_context)
2530 {
2531     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2532
2533     BEGIN_BCS_BATCH(batch, 11);
2534     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2535     OUT_BCS_RELOC(batch,
2536                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2537                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2538                   0);
2539     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2540     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2541     OUT_BCS_BATCH(batch, 0);
2542     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2543     OUT_BCS_BATCH(batch, 0);
2544     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2545     OUT_BCS_BATCH(batch, 0);
2546     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2547     OUT_BCS_BATCH(batch, 0);
2548     ADVANCE_BCS_BATCH(batch);
2549 }
2550
2551 static void
2552 gen7_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2553                             struct gen7_mfd_context *gen7_mfd_context)
2554 {
2555     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2556
2557     /* the input bitsteam format on GEN7 differs from GEN6 */
2558     BEGIN_BCS_BATCH(batch, 6);
2559     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
2560     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
2561     OUT_BCS_BATCH(batch, 0);
2562     OUT_BCS_BATCH(batch,
2563                   (0 << 31) |
2564                   (0 << 14) |
2565                   (0 << 12) |
2566                   (0 << 10) |
2567                   (0 << 8));
2568     OUT_BCS_BATCH(batch,
2569                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
2570                   (0 << 5)  |
2571                   (0 << 4)  |
2572                   (1 << 3) | /* LastSlice Flag */
2573                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
2574     OUT_BCS_BATCH(batch, 0);
2575     ADVANCE_BCS_BATCH(batch);
2576 }
2577
2578 static void
2579 gen7_jpeg_wa_avc_slice_state(VADriverContextP ctx,
2580                              struct gen7_mfd_context *gen7_mfd_context)
2581 {
2582     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2583     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
2584     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
2585     int first_mb_in_slice = 0;
2586     int slice_type = SLICE_TYPE_I;
2587
2588     BEGIN_BCS_BATCH(batch, 11);
2589     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
2590     OUT_BCS_BATCH(batch, slice_type);
2591     OUT_BCS_BATCH(batch, 
2592                   (num_ref_idx_l1 << 24) |
2593                   (num_ref_idx_l0 << 16) |
2594                   (0 << 8) |
2595                   (0 << 0));
2596     OUT_BCS_BATCH(batch, 
2597                   (0 << 29) |
2598                   (1 << 27) |   /* disable Deblocking */
2599                   (0 << 24) |
2600                   (gen7_jpeg_wa_clip.qp << 16) |
2601                   (0 << 8) |
2602                   (0 << 0));
2603     OUT_BCS_BATCH(batch, 
2604                   (slice_ver_pos << 24) |
2605                   (slice_hor_pos << 16) | 
2606                   (first_mb_in_slice << 0));
2607     OUT_BCS_BATCH(batch,
2608                   (next_slice_ver_pos << 16) |
2609                   (next_slice_hor_pos << 0));
2610     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
2611     OUT_BCS_BATCH(batch, 0);
2612     OUT_BCS_BATCH(batch, 0);
2613     OUT_BCS_BATCH(batch, 0);
2614     OUT_BCS_BATCH(batch, 0);
2615     ADVANCE_BCS_BATCH(batch);
2616 }
2617
2618 static void
2619 gen7_mfd_jpeg_wa(VADriverContextP ctx,
2620                  struct gen7_mfd_context *gen7_mfd_context)
2621 {
2622     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2623     gen7_jpeg_wa_init(ctx, gen7_mfd_context);
2624     intel_batchbuffer_emit_mi_flush(batch);
2625     gen7_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
2626     gen7_jpeg_wa_surface_state(ctx, gen7_mfd_context);
2627     gen7_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
2628     gen7_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
2629     gen7_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
2630     gen7_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
2631     gen7_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
2632
2633     gen7_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
2634     gen7_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
2635     gen7_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
2636 }
2637
2638 void
2639 gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
2640                              struct decode_state *decode_state,
2641                              struct gen7_mfd_context *gen7_mfd_context)
2642 {
2643     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2644     VAPictureParameterBufferJPEG *pic_param;
2645     VASliceParameterBufferJPEG *slice_param, *next_slice_param, *next_slice_group_param;
2646     dri_bo *slice_data_bo;
2647     int i, j, max_selector = 0;
2648
2649     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2650     pic_param = (VAPictureParameterBufferJPEG *)decode_state->pic_param->buffer;
2651
2652     /* Currently only support Baseline DCT */
2653     assert(pic_param->sample_precision == 8);
2654     gen7_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
2655     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2656     gen7_mfd_jpeg_wa(ctx, gen7_mfd_context);
2657     intel_batchbuffer_emit_mi_flush(batch);
2658     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2659     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2660     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
2661     gen7_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
2662     gen7_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
2663
2664     for (j = 0; j < decode_state->num_slice_params; j++) {
2665         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2666         slice_param = (VASliceParameterBufferJPEG *)decode_state->slice_params[j]->buffer;
2667         slice_data_bo = decode_state->slice_datas[j]->bo;
2668         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2669
2670         if (j == decode_state->num_slice_params - 1)
2671             next_slice_group_param = NULL;
2672         else
2673             next_slice_group_param = (VASliceParameterBufferJPEG *)decode_state->slice_params[j + 1]->buffer;
2674
2675         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2676             int component;
2677
2678             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2679
2680             if (i < decode_state->slice_params[j]->num_elements - 1)
2681                 next_slice_param = slice_param + 1;
2682             else
2683                 next_slice_param = next_slice_group_param;
2684
2685             for (component = 0; component < slice_param->num_components; component++) {
2686                 if (max_selector < slice_param->components[component].dc_table_selector)
2687                     max_selector = slice_param->components[component].dc_table_selector;
2688
2689                 if (max_selector < slice_param->components[component].ac_table_selector)
2690                     max_selector = slice_param->components[component].ac_table_selector;
2691             }
2692
2693             slice_param++;
2694         }
2695     }
2696
2697     assert(max_selector < 2);
2698     gen7_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
2699
2700     for (j = 0; j < decode_state->num_slice_params; j++) {
2701         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2702         slice_param = (VASliceParameterBufferJPEG *)decode_state->slice_params[j]->buffer;
2703         slice_data_bo = decode_state->slice_datas[j]->bo;
2704         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
2705
2706         if (j == decode_state->num_slice_params - 1)
2707             next_slice_group_param = NULL;
2708         else
2709             next_slice_group_param = (VASliceParameterBufferJPEG *)decode_state->slice_params[j + 1]->buffer;
2710
2711         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2712             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2713
2714             if (i < decode_state->slice_params[j]->num_elements - 1)
2715                 next_slice_param = slice_param + 1;
2716             else
2717                 next_slice_param = next_slice_group_param;
2718
2719             gen7_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2720             slice_param++;
2721         }
2722     }
2723
2724     intel_batchbuffer_end_atomic(batch);
2725     intel_batchbuffer_flush(batch);
2726 }
2727
2728 static void 
2729 gen7_mfd_decode_picture(VADriverContextP ctx, 
2730                         VAProfile profile, 
2731                         union codec_state *codec_state,
2732                         struct hw_context *hw_context)
2733
2734 {
2735     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2736     struct decode_state *decode_state = &codec_state->decode;
2737
2738     assert(gen7_mfd_context);
2739
2740     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
2741
2742     switch (profile) {
2743     case VAProfileMPEG2Simple:
2744     case VAProfileMPEG2Main:
2745         gen7_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
2746         break;
2747         
2748     case VAProfileH264Baseline:
2749     case VAProfileH264Main:
2750     case VAProfileH264High:
2751         gen7_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
2752         break;
2753
2754     case VAProfileVC1Simple:
2755     case VAProfileVC1Main:
2756     case VAProfileVC1Advanced:
2757         gen7_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
2758         break;
2759
2760     case VAProfileJPEGBaseline:
2761         gen7_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
2762         break;
2763
2764     default:
2765         assert(0);
2766         break;
2767     }
2768 }
2769
2770 static void
2771 gen7_mfd_context_destroy(void *hw_context)
2772 {
2773     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
2774
2775     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
2776     gen7_mfd_context->post_deblocking_output.bo = NULL;
2777
2778     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2779     gen7_mfd_context->pre_deblocking_output.bo = NULL;
2780
2781     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
2782     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2783
2784     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
2785     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2786
2787     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
2788     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2789
2790     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
2791     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2792
2793     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
2794     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2795
2796     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
2797
2798     intel_batchbuffer_free(gen7_mfd_context->base.batch);
2799     free(gen7_mfd_context);
2800 }
2801
2802 static void gen7_mfd_mpeg2_context_init(VADriverContextP ctx,
2803                                     struct gen7_mfd_context *gen7_mfd_context)
2804 {
2805     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
2806     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
2807     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
2808     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
2809 }
2810
2811 struct hw_context *
2812 gen7_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
2813 {
2814     struct intel_driver_data *intel = intel_driver_data(ctx);
2815     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
2816     int i;
2817
2818     gen7_mfd_context->base.destroy = gen7_mfd_context_destroy;
2819     gen7_mfd_context->base.run = gen7_mfd_decode_picture;
2820     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER);
2821
2822     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
2823         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
2824         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
2825     }
2826
2827     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
2828
2829     switch (obj_config->profile) {
2830     case VAProfileMPEG2Simple:
2831     case VAProfileMPEG2Main:
2832         gen7_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
2833         break;
2834
2835     case VAProfileH264Baseline:
2836     case VAProfileH264Main:
2837     case VAProfileH264High:
2838         gen7_mfd_avc_context_init(ctx, gen7_mfd_context);
2839         break;
2840     default:
2841         break;
2842     }
2843     return (struct hw_context *)gen7_mfd_context;
2844 }