Use the right parameters to initialize bit rate context
[platform/upstream/libva-intel-driver.git] / src / gen75_mfd.c
1 /*
2  * Copyright © 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *    Zhao Yakui  <yakui.zhao@intel.com>
27  *
28  */
29 #include "sysdeps.h"
30
31 #include <va/va_dec_jpeg.h>
32
33 #include "intel_batchbuffer.h"
34 #include "intel_driver.h"
35 #include "i965_defines.h"
36 #include "i965_drv_video.h"
37 #include "i965_decoder_utils.h"
38 #include "gen7_mfd.h"
39 #include "intel_media.h"
40
41 #define B0_STEP_REV             2
42 #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
43
44 static const uint32_t zigzag_direct[64] = {
45     0,   1,  8, 16,  9,  2,  3, 10,
46     17, 24, 32, 25, 18, 11,  4,  5,
47     12, 19, 26, 33, 40, 48, 41, 34,
48     27, 20, 13,  6,  7, 14, 21, 28,
49     35, 42, 49, 56, 57, 50, 43, 36,
50     29, 22, 15, 23, 30, 37, 44, 51,
51     58, 59, 52, 45, 38, 31, 39, 46,
52     53, 60, 61, 54, 47, 55, 62, 63
53 };
54
55 static void
56 gen75_mfd_init_avc_surface(VADriverContextP ctx, 
57                           VAPictureParameterBufferH264 *pic_param,
58                           struct object_surface *obj_surface)
59 {
60     struct i965_driver_data *i965 = i965_driver_data(ctx);
61     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
62     int width_in_mbs, height_in_mbs;
63
64     obj_surface->free_private_data = gen_free_avc_surface;
65     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
66     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
67
68     if (!gen7_avc_surface) {
69         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
70         assert((obj_surface->size & 0x3f) == 0);
71         obj_surface->private_data = gen7_avc_surface;
72     }
73
74     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
75                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
76
77     if (gen7_avc_surface->dmv_top == NULL) {
78         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
79                                                  "direct mv w/r buffer",
80                                                  width_in_mbs * height_in_mbs * 128,
81                                                  0x1000);
82         assert(gen7_avc_surface->dmv_top);
83     }
84
85     if (gen7_avc_surface->dmv_bottom_flag &&
86         gen7_avc_surface->dmv_bottom == NULL) {
87         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
88                                                     "direct mv w/r buffer",
89                                                     width_in_mbs * height_in_mbs * 128,                                                    
90                                                     0x1000);
91         assert(gen7_avc_surface->dmv_bottom);
92     }
93 }
94
95 static void
96 gen75_mfd_pipe_mode_select(VADriverContextP ctx,
97                           struct decode_state *decode_state,
98                           int standard_select,
99                           struct gen7_mfd_context *gen7_mfd_context)
100 {
101     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
102
103     assert(standard_select == MFX_FORMAT_MPEG2 ||
104            standard_select == MFX_FORMAT_AVC ||
105            standard_select == MFX_FORMAT_VC1 ||
106            standard_select == MFX_FORMAT_JPEG);
107
108     BEGIN_BCS_BATCH(batch, 5);
109     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
110     OUT_BCS_BATCH(batch,
111                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
112                   (MFD_MODE_VLD << 15) | /* VLD mode */
113                   (0 << 10) | /* disable Stream-Out */
114                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
115                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
116                   (0 << 5)  | /* not in stitch mode */
117                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
118                   (standard_select << 0));
119     OUT_BCS_BATCH(batch,
120                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
121                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
122                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
123                   (0 << 1)  |
124                   (0 << 0));
125     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
126     OUT_BCS_BATCH(batch, 0); /* reserved */
127     ADVANCE_BCS_BATCH(batch);
128 }
129
130 static void
131 gen75_mfd_surface_state(VADriverContextP ctx,
132                        struct decode_state *decode_state,
133                        int standard_select,
134                        struct gen7_mfd_context *gen7_mfd_context)
135 {
136     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
137     struct object_surface *obj_surface = decode_state->render_object;
138     unsigned int y_cb_offset;
139     unsigned int y_cr_offset;
140
141     assert(obj_surface);
142
143     y_cb_offset = obj_surface->y_cb_offset;
144     y_cr_offset = obj_surface->y_cr_offset;
145
146     BEGIN_BCS_BATCH(batch, 6);
147     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
148     OUT_BCS_BATCH(batch, 0);
149     OUT_BCS_BATCH(batch,
150                   ((obj_surface->orig_height - 1) << 18) |
151                   ((obj_surface->orig_width - 1) << 4));
152     OUT_BCS_BATCH(batch,
153                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
154                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
155                   (0 << 22) | /* surface object control state, ignored */
156                   ((obj_surface->width - 1) << 3) | /* pitch */
157                   (0 << 2)  | /* must be 0 */
158                   (1 << 1)  | /* must be tiled */
159                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
160     OUT_BCS_BATCH(batch,
161                   (0 << 16) | /* X offset for U(Cb), must be 0 */
162                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
163     OUT_BCS_BATCH(batch,
164                   (0 << 16) | /* X offset for V(Cr), must be 0 */
165                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
166     ADVANCE_BCS_BATCH(batch);
167 }
168
169 static void
170 gen75_mfd_pipe_buf_addr_state_bplus(VADriverContextP ctx,
171                              struct decode_state *decode_state,
172                              int standard_select,
173                              struct gen7_mfd_context *gen7_mfd_context)
174 {
175     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
176     int i;
177
178     BEGIN_BCS_BATCH(batch, 61);
179     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
180         /* Pre-deblock 1-3 */
181     if (gen7_mfd_context->pre_deblocking_output.valid)
182         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
183                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
184                       0);
185     else
186         OUT_BCS_BATCH(batch, 0);
187
188         OUT_BCS_BATCH(batch, 0);
189         OUT_BCS_BATCH(batch, 0);
190         /* Post-debloing 4-6 */
191     if (gen7_mfd_context->post_deblocking_output.valid)
192         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
193                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
194                       0);
195     else
196         OUT_BCS_BATCH(batch, 0);
197
198         OUT_BCS_BATCH(batch, 0);
199         OUT_BCS_BATCH(batch, 0);
200
201         /* uncompressed-video & stream out 7-12 */
202     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
203     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
204         OUT_BCS_BATCH(batch, 0);
205         OUT_BCS_BATCH(batch, 0);
206         OUT_BCS_BATCH(batch, 0);
207         OUT_BCS_BATCH(batch, 0);
208
209         /* intra row-store scratch 13-15 */
210     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
211         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
212                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
213                       0);
214     else
215         OUT_BCS_BATCH(batch, 0);
216
217         OUT_BCS_BATCH(batch, 0);
218         OUT_BCS_BATCH(batch, 0);
219         /* deblocking-filter-row-store 16-18 */
220     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
221         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
222                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
223                       0);
224     else
225         OUT_BCS_BATCH(batch, 0);
226         OUT_BCS_BATCH(batch, 0);
227         OUT_BCS_BATCH(batch, 0);
228
229     /* DW 19..50 */
230     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
231         struct object_surface *obj_surface;
232
233         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
234             gen7_mfd_context->reference_surface[i].obj_surface &&
235             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
236             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
237
238             OUT_BCS_RELOC(batch, obj_surface->bo,
239                           I915_GEM_DOMAIN_INSTRUCTION, 0,
240                           0);
241         } else {
242             OUT_BCS_BATCH(batch, 0);
243         }
244             OUT_BCS_BATCH(batch, 0);
245     }
246         /* reference property 51 */
247     OUT_BCS_BATCH(batch, 0);  
248         
249         /* Macroblock status & ILDB 52-57 */
250         OUT_BCS_BATCH(batch, 0);
251         OUT_BCS_BATCH(batch, 0);
252         OUT_BCS_BATCH(batch, 0);
253         OUT_BCS_BATCH(batch, 0);
254         OUT_BCS_BATCH(batch, 0);
255         OUT_BCS_BATCH(batch, 0);
256
257         /* the second Macroblock status 58-60 */        
258         OUT_BCS_BATCH(batch, 0);
259         OUT_BCS_BATCH(batch, 0);
260         OUT_BCS_BATCH(batch, 0);
261     ADVANCE_BCS_BATCH(batch);
262 }
263
264 static void
265 gen75_mfd_pipe_buf_addr_state(VADriverContextP ctx,
266                              struct decode_state *decode_state,
267                              int standard_select,
268                              struct gen7_mfd_context *gen7_mfd_context)
269 {
270     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
271     struct i965_driver_data *i965 = i965_driver_data(ctx);
272     int i;
273
274     if (IS_STEPPING_BPLUS(i965)) {
275         gen75_mfd_pipe_buf_addr_state_bplus(ctx, decode_state,
276                                             standard_select, gen7_mfd_context);
277         return;
278     }
279
280     BEGIN_BCS_BATCH(batch, 25);
281     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
282     if (gen7_mfd_context->pre_deblocking_output.valid)
283         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
284                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
285                       0);
286     else
287         OUT_BCS_BATCH(batch, 0);
288
289     if (gen7_mfd_context->post_deblocking_output.valid)
290         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
291                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
292                       0);
293     else
294         OUT_BCS_BATCH(batch, 0);
295
296     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
297     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
298
299     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
300         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
301                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
302                       0);
303     else
304         OUT_BCS_BATCH(batch, 0);
305
306     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
307         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
308                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
309                       0);
310     else
311         OUT_BCS_BATCH(batch, 0);
312
313     /* DW 7..22 */
314     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
315         struct object_surface *obj_surface;
316
317         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
318             gen7_mfd_context->reference_surface[i].obj_surface &&
319             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
320             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
321
322             OUT_BCS_RELOC(batch, obj_surface->bo,
323                           I915_GEM_DOMAIN_INSTRUCTION, 0,
324                           0);
325         } else {
326             OUT_BCS_BATCH(batch, 0);
327         }
328     }
329
330     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
331     OUT_BCS_BATCH(batch, 0);   /* ignore DW24 for decoding */
332     ADVANCE_BCS_BATCH(batch);
333 }
334
335 static void
336 gen75_mfd_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
337                                  dri_bo *slice_data_bo,
338                                  int standard_select,
339                                  struct gen7_mfd_context *gen7_mfd_context)
340 {
341     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
342
343     BEGIN_BCS_BATCH(batch, 26);
344     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
345         /* MFX In BS 1-5 */
346     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
347     OUT_BCS_BATCH(batch, 0);
348     OUT_BCS_BATCH(batch, 0);
349         /* Upper bound 4-5 */   
350     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
351     OUT_BCS_BATCH(batch, 0);
352
353         /* MFX indirect MV 6-10 */
354     OUT_BCS_BATCH(batch, 0);
355     OUT_BCS_BATCH(batch, 0);
356     OUT_BCS_BATCH(batch, 0);
357     OUT_BCS_BATCH(batch, 0);
358     OUT_BCS_BATCH(batch, 0);
359         
360         /* MFX IT_COFF 11-15 */
361     OUT_BCS_BATCH(batch, 0);
362     OUT_BCS_BATCH(batch, 0);
363     OUT_BCS_BATCH(batch, 0);
364     OUT_BCS_BATCH(batch, 0);
365     OUT_BCS_BATCH(batch, 0);
366
367         /* MFX IT_DBLK 16-20 */
368     OUT_BCS_BATCH(batch, 0);
369     OUT_BCS_BATCH(batch, 0);
370     OUT_BCS_BATCH(batch, 0);
371     OUT_BCS_BATCH(batch, 0);
372     OUT_BCS_BATCH(batch, 0);
373
374         /* MFX PAK_BSE object for encoder 21-25 */
375     OUT_BCS_BATCH(batch, 0);
376     OUT_BCS_BATCH(batch, 0);
377     OUT_BCS_BATCH(batch, 0);
378     OUT_BCS_BATCH(batch, 0);
379     OUT_BCS_BATCH(batch, 0);
380
381     ADVANCE_BCS_BATCH(batch);
382 }
383
384 static void
385 gen75_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
386                                  dri_bo *slice_data_bo,
387                                  int standard_select,
388                                  struct gen7_mfd_context *gen7_mfd_context)
389 {
390     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
391     struct i965_driver_data *i965 = i965_driver_data(ctx);
392
393     if (IS_STEPPING_BPLUS(i965)) {
394         gen75_mfd_ind_obj_base_addr_state_bplus(ctx, slice_data_bo,
395                                                 standard_select, gen7_mfd_context);
396         return;
397     }
398
399     BEGIN_BCS_BATCH(batch, 11);
400     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
401     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
402     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
403     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
404     OUT_BCS_BATCH(batch, 0);
405     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
406     OUT_BCS_BATCH(batch, 0);
407     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
408     OUT_BCS_BATCH(batch, 0);
409     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
410     OUT_BCS_BATCH(batch, 0);
411     ADVANCE_BCS_BATCH(batch);
412 }
413
414 static void
415 gen75_mfd_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
416                                  struct decode_state *decode_state,
417                                  int standard_select,
418                                  struct gen7_mfd_context *gen7_mfd_context)
419 {
420     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
421
422     BEGIN_BCS_BATCH(batch, 10);
423     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
424
425     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
426         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
427                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
428                       0);
429         else
430                 OUT_BCS_BATCH(batch, 0);
431                 
432     OUT_BCS_BATCH(batch, 0);
433     OUT_BCS_BATCH(batch, 0);
434         /* MPR Row Store Scratch buffer 4-6 */
435     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
436         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
437                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
438                       0);
439     else
440             OUT_BCS_BATCH(batch, 0);
441     OUT_BCS_BATCH(batch, 0);
442     OUT_BCS_BATCH(batch, 0);
443
444         /* Bitplane 7-9 */ 
445     if (gen7_mfd_context->bitplane_read_buffer.valid)
446         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
447                       I915_GEM_DOMAIN_INSTRUCTION, 0,
448                       0);
449     else
450         OUT_BCS_BATCH(batch, 0);
451     OUT_BCS_BATCH(batch, 0);
452     OUT_BCS_BATCH(batch, 0);
453
454     ADVANCE_BCS_BATCH(batch);
455 }
456
457 static void
458 gen75_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
459                                  struct decode_state *decode_state,
460                                  int standard_select,
461                                  struct gen7_mfd_context *gen7_mfd_context)
462 {
463     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
464     struct i965_driver_data *i965 = i965_driver_data(ctx);
465
466     if (IS_STEPPING_BPLUS(i965)) {
467         gen75_mfd_bsp_buf_base_addr_state_bplus(ctx, decode_state,
468                                                 standard_select, gen7_mfd_context);
469         return;
470     }
471
472     BEGIN_BCS_BATCH(batch, 4);
473     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
474
475     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
476         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
477                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
478                       0);
479     else
480         OUT_BCS_BATCH(batch, 0);
481
482     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
483         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
484                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
485                       0);
486     else
487         OUT_BCS_BATCH(batch, 0);
488
489     if (gen7_mfd_context->bitplane_read_buffer.valid)
490         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
491                       I915_GEM_DOMAIN_INSTRUCTION, 0,
492                       0);
493     else
494         OUT_BCS_BATCH(batch, 0);
495
496     ADVANCE_BCS_BATCH(batch);
497 }
498
499 static void
500 gen75_mfd_qm_state(VADriverContextP ctx,
501                   int qm_type,
502                   unsigned char *qm,
503                   int qm_length,
504                   struct gen7_mfd_context *gen7_mfd_context)
505 {
506     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
507     unsigned int qm_buffer[16];
508
509     assert(qm_length <= 16 * 4);
510     memcpy(qm_buffer, qm, qm_length);
511
512     BEGIN_BCS_BATCH(batch, 18);
513     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
514     OUT_BCS_BATCH(batch, qm_type << 0);
515     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
516     ADVANCE_BCS_BATCH(batch);
517 }
518
519 static void
520 gen75_mfd_avc_img_state(VADriverContextP ctx,
521                        struct decode_state *decode_state,
522                        struct gen7_mfd_context *gen7_mfd_context)
523 {
524     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
525     int img_struct;
526     int mbaff_frame_flag;
527     unsigned int width_in_mbs, height_in_mbs;
528     VAPictureParameterBufferH264 *pic_param;
529
530     assert(decode_state->pic_param && decode_state->pic_param->buffer);
531     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
532
533     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
534
535     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
536         img_struct = 1;
537     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
538         img_struct = 3;
539     else
540         img_struct = 0;
541
542     if ((img_struct & 0x1) == 0x1) {
543         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
544     } else {
545         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
546     }
547
548     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
549         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
550         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
551     } else {
552         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
553     }
554
555     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
556                         !pic_param->pic_fields.bits.field_pic_flag);
557
558     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
559     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
560
561     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
562     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
563            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
564     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
565
566     BEGIN_BCS_BATCH(batch, 17);
567     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
568     OUT_BCS_BATCH(batch, 
569                   (width_in_mbs * height_in_mbs - 1));
570     OUT_BCS_BATCH(batch, 
571                   ((height_in_mbs - 1) << 16) | 
572                   ((width_in_mbs - 1) << 0));
573     OUT_BCS_BATCH(batch, 
574                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
575                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
576                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
577                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
578                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
579                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
580                   (img_struct << 8));
581     OUT_BCS_BATCH(batch,
582                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
583                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
584                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
585                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
586                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
587                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
588                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
589                   (mbaff_frame_flag << 1) |
590                   (pic_param->pic_fields.bits.field_pic_flag << 0));
591     OUT_BCS_BATCH(batch, 0);
592     OUT_BCS_BATCH(batch, 0);
593     OUT_BCS_BATCH(batch, 0);
594     OUT_BCS_BATCH(batch, 0);
595     OUT_BCS_BATCH(batch, 0);
596     OUT_BCS_BATCH(batch, 0);
597     OUT_BCS_BATCH(batch, 0);
598     OUT_BCS_BATCH(batch, 0);
599     OUT_BCS_BATCH(batch, 0);
600     OUT_BCS_BATCH(batch, 0);
601     OUT_BCS_BATCH(batch, 0);
602     OUT_BCS_BATCH(batch, 0);
603     ADVANCE_BCS_BATCH(batch);
604 }
605
606 static void
607 gen75_mfd_avc_qm_state(VADriverContextP ctx,
608                       struct decode_state *decode_state,
609                       struct gen7_mfd_context *gen7_mfd_context)
610 {
611     VAIQMatrixBufferH264 *iq_matrix;
612     VAPictureParameterBufferH264 *pic_param;
613
614     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
615         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
616     else
617         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
618
619     assert(decode_state->pic_param && decode_state->pic_param->buffer);
620     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
621
622     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
623     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
624
625     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
626         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
627         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
628     }
629 }
630
631 static void
632 gen75_mfd_avc_picid_state(VADriverContextP ctx,
633                       struct decode_state *decode_state,
634                       struct gen7_mfd_context *gen7_mfd_context)
635 {
636     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
637
638     BEGIN_BCS_BATCH(batch, 10);
639     OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
640     OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
641     OUT_BCS_BATCH(batch, 0);
642     OUT_BCS_BATCH(batch, 0);
643     OUT_BCS_BATCH(batch, 0);
644     OUT_BCS_BATCH(batch, 0);
645     OUT_BCS_BATCH(batch, 0);
646     OUT_BCS_BATCH(batch, 0);
647     OUT_BCS_BATCH(batch, 0);
648     OUT_BCS_BATCH(batch, 0);
649     ADVANCE_BCS_BATCH(batch);
650 }
651
652 static void
653 gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx,
654                               struct decode_state *decode_state,
655                               VAPictureParameterBufferH264 *pic_param,
656                               VASliceParameterBufferH264 *slice_param,
657                               struct gen7_mfd_context *gen7_mfd_context)
658 {
659     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
660     struct object_surface *obj_surface;
661     GenAvcSurface *gen7_avc_surface;
662     VAPictureH264 *va_pic;
663     int i, j;
664
665     BEGIN_BCS_BATCH(batch, 71);
666     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
667
668     /* reference surfaces 0..15 */
669     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
670         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
671             gen7_mfd_context->reference_surface[i].obj_surface &&
672             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
673
674             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
675             gen7_avc_surface = obj_surface->private_data;
676             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
677                           I915_GEM_DOMAIN_INSTRUCTION, 0,
678                           0);
679             OUT_BCS_BATCH(batch, 0);
680         } else {
681             OUT_BCS_BATCH(batch, 0);
682             OUT_BCS_BATCH(batch, 0);
683         }
684     }
685
686     OUT_BCS_BATCH(batch, 0);
687
688     /* the current decoding frame/field */
689     va_pic = &pic_param->CurrPic;
690     obj_surface = decode_state->render_object;
691     assert(obj_surface->bo && obj_surface->private_data);
692     gen7_avc_surface = obj_surface->private_data;
693
694     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
695                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
696                   0);
697
698     OUT_BCS_BATCH(batch, 0);
699     OUT_BCS_BATCH(batch, 0);
700
701     /* POC List */
702     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
703         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
704             int found = 0;
705
706             assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
707
708             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
709                 va_pic = &pic_param->ReferenceFrames[j];
710                 
711                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
712                     continue;
713
714                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
715                     found = 1;
716                     break;
717                 }
718             }
719
720             assert(found == 1);
721             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
722             
723             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
724             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
725         } else {
726             OUT_BCS_BATCH(batch, 0);
727             OUT_BCS_BATCH(batch, 0);
728         }
729     }
730
731     va_pic = &pic_param->CurrPic;
732     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
733     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
734
735     ADVANCE_BCS_BATCH(batch);
736 }
737
738 static void
739 gen75_mfd_avc_directmode_state(VADriverContextP ctx,
740                               struct decode_state *decode_state,
741                               VAPictureParameterBufferH264 *pic_param,
742                               VASliceParameterBufferH264 *slice_param,
743                               struct gen7_mfd_context *gen7_mfd_context)
744 {
745     struct i965_driver_data *i965 = i965_driver_data(ctx);
746     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
747     struct object_surface *obj_surface;
748     GenAvcSurface *gen7_avc_surface;
749     VAPictureH264 *va_pic;
750     int i, j;
751
752     if (IS_STEPPING_BPLUS(i965)) {
753         gen75_mfd_avc_directmode_state_bplus(ctx, decode_state, pic_param, slice_param,
754                                              gen7_mfd_context);
755
756         return;
757     }
758
759     BEGIN_BCS_BATCH(batch, 69);
760     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
761
762     /* reference surfaces 0..15 */
763     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
764         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
765             gen7_mfd_context->reference_surface[i].obj_surface &&
766             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
767
768             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
769             gen7_avc_surface = obj_surface->private_data;
770
771             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
772                           I915_GEM_DOMAIN_INSTRUCTION, 0,
773                           0);
774
775             if (gen7_avc_surface->dmv_bottom_flag == 1)
776                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
777                               I915_GEM_DOMAIN_INSTRUCTION, 0,
778                               0);
779             else
780                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
781                               I915_GEM_DOMAIN_INSTRUCTION, 0,
782                               0);
783         } else {
784             OUT_BCS_BATCH(batch, 0);
785             OUT_BCS_BATCH(batch, 0);
786         }
787     }
788
789     /* the current decoding frame/field */
790     va_pic = &pic_param->CurrPic;
791     obj_surface = decode_state->render_object;
792     assert(obj_surface->bo && obj_surface->private_data);
793     gen7_avc_surface = obj_surface->private_data;
794
795     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
796                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
797                   0);
798
799     if (gen7_avc_surface->dmv_bottom_flag == 1)
800         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
801                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
802                       0);
803     else
804         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
805                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
806                       0);
807
808     /* POC List */
809     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
810         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
811             int found = 0;
812
813             assert(gen7_mfd_context->reference_surface[i].obj_surface != NULL);
814
815             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
816                 va_pic = &pic_param->ReferenceFrames[j];
817                 
818                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
819                     continue;
820
821                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
822                     found = 1;
823                     break;
824                 }
825             }
826
827             assert(found == 1);
828             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
829             
830             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
831             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
832         } else {
833             OUT_BCS_BATCH(batch, 0);
834             OUT_BCS_BATCH(batch, 0);
835         }
836     }
837
838     va_pic = &pic_param->CurrPic;
839     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
840     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
841
842     ADVANCE_BCS_BATCH(batch);
843 }
844
845 static void
846 gen75_mfd_avc_slice_state(VADriverContextP ctx,
847                          VAPictureParameterBufferH264 *pic_param,
848                          VASliceParameterBufferH264 *slice_param,
849                          VASliceParameterBufferH264 *next_slice_param,
850                          struct gen7_mfd_context *gen7_mfd_context)
851 {
852     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
853     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
854     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
855     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
856     int num_ref_idx_l0, num_ref_idx_l1;
857     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
858                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
859     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
860     int slice_type;
861
862     if (slice_param->slice_type == SLICE_TYPE_I ||
863         slice_param->slice_type == SLICE_TYPE_SI) {
864         slice_type = SLICE_TYPE_I;
865     } else if (slice_param->slice_type == SLICE_TYPE_P ||
866                slice_param->slice_type == SLICE_TYPE_SP) {
867         slice_type = SLICE_TYPE_P;
868     } else { 
869         assert(slice_param->slice_type == SLICE_TYPE_B);
870         slice_type = SLICE_TYPE_B;
871     }
872
873     if (slice_type == SLICE_TYPE_I) {
874         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
875         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
876         num_ref_idx_l0 = 0;
877         num_ref_idx_l1 = 0;
878     } else if (slice_type == SLICE_TYPE_P) {
879         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
880         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
881         num_ref_idx_l1 = 0;
882     } else {
883         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
884         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
885     }
886
887     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
888     slice_hor_pos = first_mb_in_slice % width_in_mbs; 
889     slice_ver_pos = first_mb_in_slice / width_in_mbs;
890
891     if (next_slice_param) {
892         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
893         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
894         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
895     } else {
896         next_slice_hor_pos = 0;
897         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
898     }
899
900     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
901     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
902     OUT_BCS_BATCH(batch, slice_type);
903     OUT_BCS_BATCH(batch, 
904                   (num_ref_idx_l1 << 24) |
905                   (num_ref_idx_l0 << 16) |
906                   (slice_param->chroma_log2_weight_denom << 8) |
907                   (slice_param->luma_log2_weight_denom << 0));
908     OUT_BCS_BATCH(batch, 
909                   (slice_param->direct_spatial_mv_pred_flag << 29) |
910                   (slice_param->disable_deblocking_filter_idc << 27) |
911                   (slice_param->cabac_init_idc << 24) |
912                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
913                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
914                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
915     OUT_BCS_BATCH(batch, 
916                   (slice_ver_pos << 24) |
917                   (slice_hor_pos << 16) | 
918                   (first_mb_in_slice << 0));
919     OUT_BCS_BATCH(batch,
920                   (next_slice_ver_pos << 16) |
921                   (next_slice_hor_pos << 0));
922     OUT_BCS_BATCH(batch, 
923                   (next_slice_param == NULL) << 19); /* last slice flag */
924     OUT_BCS_BATCH(batch, 0);
925     OUT_BCS_BATCH(batch, 0);
926     OUT_BCS_BATCH(batch, 0);
927     OUT_BCS_BATCH(batch, 0);
928     ADVANCE_BCS_BATCH(batch);
929 }
930
931 static inline void
932 gen75_mfd_avc_ref_idx_state(VADriverContextP ctx,
933                            VAPictureParameterBufferH264 *pic_param,
934                            VASliceParameterBufferH264 *slice_param,
935                            struct gen7_mfd_context *gen7_mfd_context)
936 {
937     gen6_send_avc_ref_idx_state(
938         gen7_mfd_context->base.batch,
939         slice_param,
940         gen7_mfd_context->reference_surface
941     );
942 }
943
944 static void
945 gen75_mfd_avc_weightoffset_state(VADriverContextP ctx,
946                                 VAPictureParameterBufferH264 *pic_param,
947                                 VASliceParameterBufferH264 *slice_param,
948                                 struct gen7_mfd_context *gen7_mfd_context)
949 {
950     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
951     int i, j, num_weight_offset_table = 0;
952     short weightoffsets[32 * 6];
953
954     if ((slice_param->slice_type == SLICE_TYPE_P ||
955          slice_param->slice_type == SLICE_TYPE_SP) &&
956         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
957         num_weight_offset_table = 1;
958     }
959     
960     if ((slice_param->slice_type == SLICE_TYPE_B) &&
961         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
962         num_weight_offset_table = 2;
963     }
964
965     for (i = 0; i < num_weight_offset_table; i++) {
966         BEGIN_BCS_BATCH(batch, 98);
967         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
968         OUT_BCS_BATCH(batch, i);
969
970         if (i == 0) {
971             for (j = 0; j < 32; j++) {
972                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
973                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
974                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
975                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
976                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
977                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
978             }
979         } else {
980             for (j = 0; j < 32; j++) {
981                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
982                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
983                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
984                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
985                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
986                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
987             }
988         }
989
990         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
991         ADVANCE_BCS_BATCH(batch);
992     }
993 }
994
995 static void
996 gen75_mfd_avc_bsd_object(VADriverContextP ctx,
997                         VAPictureParameterBufferH264 *pic_param,
998                         VASliceParameterBufferH264 *slice_param,
999                         dri_bo *slice_data_bo,
1000                         VASliceParameterBufferH264 *next_slice_param,
1001                         struct gen7_mfd_context *gen7_mfd_context)
1002 {
1003     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1004     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
1005                                                             slice_param,
1006                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
1007
1008     /* the input bitsteam format on GEN7 differs from GEN6 */
1009     BEGIN_BCS_BATCH(batch, 6);
1010     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
1011     OUT_BCS_BATCH(batch, 
1012                   (slice_param->slice_data_size - slice_param->slice_data_offset));
1013     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
1014     OUT_BCS_BATCH(batch,
1015                   (0 << 31) |
1016                   (0 << 14) |
1017                   (0 << 12) |
1018                   (0 << 10) |
1019                   (0 << 8));
1020     OUT_BCS_BATCH(batch,
1021                   ((slice_data_bit_offset >> 3) << 16) |
1022                   (1 << 7)  |
1023                   (0 << 5)  |
1024                   (0 << 4)  |
1025                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
1026                   (slice_data_bit_offset & 0x7));
1027     OUT_BCS_BATCH(batch, 0);
1028     ADVANCE_BCS_BATCH(batch);
1029 }
1030
1031 static inline void
1032 gen75_mfd_avc_context_init(
1033     VADriverContextP         ctx,
1034     struct gen7_mfd_context *gen7_mfd_context
1035 )
1036 {
1037     /* Initialize flat scaling lists */
1038     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
1039 }
1040
1041 static void
1042 gen75_mfd_avc_decode_init(VADriverContextP ctx,
1043                          struct decode_state *decode_state,
1044                          struct gen7_mfd_context *gen7_mfd_context)
1045 {
1046     VAPictureParameterBufferH264 *pic_param;
1047     VASliceParameterBufferH264 *slice_param;
1048     struct i965_driver_data *i965 = i965_driver_data(ctx);
1049     struct object_surface *obj_surface;
1050     dri_bo *bo;
1051     int i, j, enable_avc_ildb = 0;
1052     unsigned int width_in_mbs, height_in_mbs;
1053
1054     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
1055         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1056         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1057
1058         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1059             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1060             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1061                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1062                    (slice_param->slice_type == SLICE_TYPE_P) ||
1063                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1064                    (slice_param->slice_type == SLICE_TYPE_B));
1065
1066             if (slice_param->disable_deblocking_filter_idc != 1) {
1067                 enable_avc_ildb = 1;
1068                 break;
1069             }
1070
1071             slice_param++;
1072         }
1073     }
1074
1075     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1076     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1077     intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
1078     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
1079     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
1080     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
1081     assert(height_in_mbs > 0 && height_in_mbs <= 256);
1082
1083     /* Current decoded picture */
1084     obj_surface = decode_state->render_object;
1085     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
1086     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
1087     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1088
1089     /* initial uv component for YUV400 case */
1090     if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
1091          unsigned int uv_offset = obj_surface->width * obj_surface->height; 
1092          unsigned int uv_size   = obj_surface->width * obj_surface->height / 2; 
1093
1094          drm_intel_gem_bo_map_gtt(obj_surface->bo);
1095          memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
1096          drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
1097     }
1098
1099     gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface);
1100
1101     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1102     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1103     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1104     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
1105
1106     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1107     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1108     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1109     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
1110
1111     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1112     bo = dri_bo_alloc(i965->intel.bufmgr,
1113                       "intra row store",
1114                       width_in_mbs * 64,
1115                       0x1000);
1116     assert(bo);
1117     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1118     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1119
1120     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1121     bo = dri_bo_alloc(i965->intel.bufmgr,
1122                       "deblocking filter row store",
1123                       width_in_mbs * 64 * 4,
1124                       0x1000);
1125     assert(bo);
1126     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1127     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1128
1129     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1130     bo = dri_bo_alloc(i965->intel.bufmgr,
1131                       "bsd mpc row store",
1132                       width_in_mbs * 64 * 2,
1133                       0x1000);
1134     assert(bo);
1135     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1136     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1137
1138     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
1139     bo = dri_bo_alloc(i965->intel.bufmgr,
1140                       "mpr row store",
1141                       width_in_mbs * 64 * 2,
1142                       0x1000);
1143     assert(bo);
1144     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
1145     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
1146
1147     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1148 }
1149
1150 static void
1151 gen75_mfd_avc_decode_picture(VADriverContextP ctx,
1152                             struct decode_state *decode_state,
1153                             struct gen7_mfd_context *gen7_mfd_context)
1154 {
1155     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1156     VAPictureParameterBufferH264 *pic_param;
1157     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
1158     dri_bo *slice_data_bo;
1159     int i, j;
1160
1161     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1162     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
1163     gen75_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
1164
1165     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1166     intel_batchbuffer_emit_mi_flush(batch);
1167     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1168     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1169     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1170     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
1171     gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
1172     gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
1173     gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
1174
1175     for (j = 0; j < decode_state->num_slice_params; j++) {
1176         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1177         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
1178         slice_data_bo = decode_state->slice_datas[j]->bo;
1179         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
1180
1181         if (j == decode_state->num_slice_params - 1)
1182             next_slice_group_param = NULL;
1183         else
1184             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
1185
1186         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1187             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1188             assert((slice_param->slice_type == SLICE_TYPE_I) ||
1189                    (slice_param->slice_type == SLICE_TYPE_SI) ||
1190                    (slice_param->slice_type == SLICE_TYPE_P) ||
1191                    (slice_param->slice_type == SLICE_TYPE_SP) ||
1192                    (slice_param->slice_type == SLICE_TYPE_B));
1193
1194             if (i < decode_state->slice_params[j]->num_elements - 1)
1195                 next_slice_param = slice_param + 1;
1196             else
1197                 next_slice_param = next_slice_group_param;
1198
1199             gen75_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
1200             gen75_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
1201             gen75_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
1202             gen75_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1203             gen75_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
1204             slice_param++;
1205         }
1206     }
1207
1208     intel_batchbuffer_end_atomic(batch);
1209     intel_batchbuffer_flush(batch);
1210 }
1211
1212 static void
1213 gen75_mfd_mpeg2_decode_init(VADriverContextP ctx,
1214                            struct decode_state *decode_state,
1215                            struct gen7_mfd_context *gen7_mfd_context)
1216 {
1217     VAPictureParameterBufferMPEG2 *pic_param;
1218     struct i965_driver_data *i965 = i965_driver_data(ctx);
1219     struct object_surface *obj_surface;
1220     dri_bo *bo;
1221     unsigned int width_in_mbs;
1222
1223     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1224     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1225     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1226
1227     mpeg2_set_reference_surfaces(
1228         ctx,
1229         gen7_mfd_context->reference_surface,
1230         decode_state,
1231         pic_param
1232     );
1233
1234     /* Current decoded picture */
1235     obj_surface = decode_state->render_object;
1236     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1237
1238     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1239     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1240     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1241     gen7_mfd_context->pre_deblocking_output.valid = 1;
1242
1243     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1244     bo = dri_bo_alloc(i965->intel.bufmgr,
1245                       "bsd mpc row store",
1246                       width_in_mbs * 96,
1247                       0x1000);
1248     assert(bo);
1249     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1250     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1251
1252     gen7_mfd_context->post_deblocking_output.valid = 0;
1253     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
1254     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
1255     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1256     gen7_mfd_context->bitplane_read_buffer.valid = 0;
1257 }
1258
1259 static void
1260 gen75_mfd_mpeg2_pic_state(VADriverContextP ctx,
1261                          struct decode_state *decode_state,
1262                          struct gen7_mfd_context *gen7_mfd_context)
1263 {
1264     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1265     VAPictureParameterBufferMPEG2 *pic_param;
1266     unsigned int slice_concealment_disable_bit = 0;
1267
1268     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1269     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1270
1271     slice_concealment_disable_bit = 1;
1272
1273     BEGIN_BCS_BATCH(batch, 13);
1274     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
1275     OUT_BCS_BATCH(batch,
1276                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
1277                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
1278                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
1279                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
1280                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
1281                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
1282                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
1283                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
1284                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
1285                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
1286                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
1287                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
1288     OUT_BCS_BATCH(batch,
1289                   pic_param->picture_coding_type << 9);
1290     OUT_BCS_BATCH(batch,
1291                   (slice_concealment_disable_bit << 31) |
1292                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
1293                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
1294     OUT_BCS_BATCH(batch, 0);
1295     OUT_BCS_BATCH(batch, 0);
1296     OUT_BCS_BATCH(batch, 0);
1297     OUT_BCS_BATCH(batch, 0);
1298     OUT_BCS_BATCH(batch, 0);
1299     OUT_BCS_BATCH(batch, 0);
1300     OUT_BCS_BATCH(batch, 0);
1301     OUT_BCS_BATCH(batch, 0);
1302     OUT_BCS_BATCH(batch, 0);
1303     ADVANCE_BCS_BATCH(batch);
1304 }
1305
1306 static void
1307 gen75_mfd_mpeg2_qm_state(VADriverContextP ctx,
1308                         struct decode_state *decode_state,
1309                         struct gen7_mfd_context *gen7_mfd_context)
1310 {
1311     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
1312     int i, j;
1313
1314     /* Update internal QM state */
1315     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
1316         VAIQMatrixBufferMPEG2 * const iq_matrix =
1317             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
1318
1319         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
1320             iq_matrix->load_intra_quantiser_matrix) {
1321             gen_iq_matrix->load_intra_quantiser_matrix =
1322                 iq_matrix->load_intra_quantiser_matrix;
1323             if (iq_matrix->load_intra_quantiser_matrix) {
1324                 for (j = 0; j < 64; j++)
1325                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
1326                         iq_matrix->intra_quantiser_matrix[j];
1327             }
1328         }
1329
1330         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
1331             iq_matrix->load_non_intra_quantiser_matrix) {
1332             gen_iq_matrix->load_non_intra_quantiser_matrix =
1333                 iq_matrix->load_non_intra_quantiser_matrix;
1334             if (iq_matrix->load_non_intra_quantiser_matrix) {
1335                 for (j = 0; j < 64; j++)
1336                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
1337                         iq_matrix->non_intra_quantiser_matrix[j];
1338             }
1339         }
1340     }
1341
1342     /* Commit QM state to HW */
1343     for (i = 0; i < 2; i++) {
1344         unsigned char *qm = NULL;
1345         int qm_type;
1346
1347         if (i == 0) {
1348             if (gen_iq_matrix->load_intra_quantiser_matrix) {
1349                 qm = gen_iq_matrix->intra_quantiser_matrix;
1350                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
1351             }
1352         } else {
1353             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
1354                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
1355                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
1356             }
1357         }
1358
1359         if (!qm)
1360             continue;
1361
1362         gen75_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
1363     }
1364 }
1365
1366 static void
1367 gen75_mfd_mpeg2_bsd_object(VADriverContextP ctx,
1368                           VAPictureParameterBufferMPEG2 *pic_param,
1369                           VASliceParameterBufferMPEG2 *slice_param,
1370                           VASliceParameterBufferMPEG2 *next_slice_param,
1371                           struct gen7_mfd_context *gen7_mfd_context)
1372 {
1373     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1374     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
1375     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
1376
1377     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
1378         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
1379         is_field_pic = 1;
1380     is_field_pic_wa = is_field_pic &&
1381         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
1382
1383     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1384     hpos0 = slice_param->slice_horizontal_position;
1385
1386     if (next_slice_param == NULL) {
1387         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
1388         hpos1 = 0;
1389     } else {
1390         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
1391         hpos1 = next_slice_param->slice_horizontal_position;
1392     }
1393
1394     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
1395
1396     BEGIN_BCS_BATCH(batch, 5);
1397     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
1398     OUT_BCS_BATCH(batch, 
1399                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
1400     OUT_BCS_BATCH(batch, 
1401                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
1402     OUT_BCS_BATCH(batch,
1403                   hpos0 << 24 |
1404                   vpos0 << 16 |
1405                   mb_count << 8 |
1406                   (next_slice_param == NULL) << 5 |
1407                   (next_slice_param == NULL) << 3 |
1408                   (slice_param->macroblock_offset & 0x7));
1409     OUT_BCS_BATCH(batch,
1410                   (slice_param->quantiser_scale_code << 24) |
1411                   (vpos1 << 8 | hpos1));
1412     ADVANCE_BCS_BATCH(batch);
1413 }
1414
1415 static void
1416 gen75_mfd_mpeg2_decode_picture(VADriverContextP ctx,
1417                               struct decode_state *decode_state,
1418                               struct gen7_mfd_context *gen7_mfd_context)
1419 {
1420     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1421     VAPictureParameterBufferMPEG2 *pic_param;
1422     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
1423     dri_bo *slice_data_bo;
1424     int i, j;
1425
1426     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1427     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
1428
1429     gen75_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
1430     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
1431     intel_batchbuffer_emit_mi_flush(batch);
1432     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1433     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1434     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1435     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
1436     gen75_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
1437     gen75_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
1438
1439     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
1440         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
1441             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
1442
1443     for (j = 0; j < decode_state->num_slice_params; j++) {
1444         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
1445         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
1446         slice_data_bo = decode_state->slice_datas[j]->bo;
1447         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
1448
1449         if (j == decode_state->num_slice_params - 1)
1450             next_slice_group_param = NULL;
1451         else
1452             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
1453
1454         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
1455             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
1456
1457             if (i < decode_state->slice_params[j]->num_elements - 1)
1458                 next_slice_param = slice_param + 1;
1459             else
1460                 next_slice_param = next_slice_group_param;
1461
1462             gen75_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
1463             slice_param++;
1464         }
1465     }
1466
1467     intel_batchbuffer_end_atomic(batch);
1468     intel_batchbuffer_flush(batch);
1469 }
1470
1471 static const int va_to_gen7_vc1_pic_type[5] = {
1472     GEN7_VC1_I_PICTURE,
1473     GEN7_VC1_P_PICTURE,
1474     GEN7_VC1_B_PICTURE,
1475     GEN7_VC1_BI_PICTURE,
1476     GEN7_VC1_P_PICTURE,
1477 };
1478
1479 static const int va_to_gen7_vc1_mv[4] = {
1480     1, /* 1-MV */
1481     2, /* 1-MV half-pel */
1482     3, /* 1-MV half-pef bilinear */
1483     0, /* Mixed MV */
1484 };
1485
1486 static const int b_picture_scale_factor[21] = {
1487     128, 85,  170, 64,  192,
1488     51,  102, 153, 204, 43,
1489     215, 37,  74,  111, 148,
1490     185, 222, 32,  96,  160, 
1491     224,
1492 };
1493
1494 static const int va_to_gen7_vc1_condover[3] = {
1495     0,
1496     2,
1497     3
1498 };
1499
1500 static const int va_to_gen7_vc1_profile[4] = {
1501     GEN7_VC1_SIMPLE_PROFILE,
1502     GEN7_VC1_MAIN_PROFILE,
1503     GEN7_VC1_RESERVED_PROFILE,
1504     GEN7_VC1_ADVANCED_PROFILE
1505 };
1506
1507 static void 
1508 gen75_mfd_free_vc1_surface(void **data)
1509 {
1510     struct gen7_vc1_surface *gen7_vc1_surface = *data;
1511
1512     if (!gen7_vc1_surface)
1513         return;
1514
1515     dri_bo_unreference(gen7_vc1_surface->dmv);
1516     free(gen7_vc1_surface);
1517     *data = NULL;
1518 }
1519
1520 static void
1521 gen75_mfd_init_vc1_surface(VADriverContextP ctx, 
1522                           VAPictureParameterBufferVC1 *pic_param,
1523                           struct object_surface *obj_surface)
1524 {
1525     struct i965_driver_data *i965 = i965_driver_data(ctx);
1526     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
1527     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1528     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1529
1530     obj_surface->free_private_data = gen75_mfd_free_vc1_surface;
1531
1532     if (!gen7_vc1_surface) {
1533         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
1534         assert((obj_surface->size & 0x3f) == 0);
1535         obj_surface->private_data = gen7_vc1_surface;
1536     }
1537
1538     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
1539
1540     if (gen7_vc1_surface->dmv == NULL) {
1541         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
1542                                              "direct mv w/r buffer",
1543                                              width_in_mbs * height_in_mbs * 64,
1544                                              0x1000);
1545     }
1546 }
1547
1548 static void
1549 gen75_mfd_vc1_decode_init(VADriverContextP ctx,
1550                          struct decode_state *decode_state,
1551                          struct gen7_mfd_context *gen7_mfd_context)
1552 {
1553     VAPictureParameterBufferVC1 *pic_param;
1554     struct i965_driver_data *i965 = i965_driver_data(ctx);
1555     struct object_surface *obj_surface;
1556     dri_bo *bo;
1557     int width_in_mbs;
1558     int picture_type;
1559
1560     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1561     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1562     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1563     picture_type = pic_param->picture_fields.bits.picture_type;
1564  
1565     intel_update_vc1_frame_store_index(ctx,
1566                                        decode_state,
1567                                        pic_param,
1568                                        gen7_mfd_context->reference_surface);
1569
1570     /* Current decoded picture */
1571     obj_surface = decode_state->render_object;
1572     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
1573     gen75_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
1574
1575     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
1576     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
1577     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
1578     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
1579
1580     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
1581     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
1582     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
1583     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
1584
1585     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
1586     bo = dri_bo_alloc(i965->intel.bufmgr,
1587                       "intra row store",
1588                       width_in_mbs * 64,
1589                       0x1000);
1590     assert(bo);
1591     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
1592     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
1593
1594     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
1595     bo = dri_bo_alloc(i965->intel.bufmgr,
1596                       "deblocking filter row store",
1597                       width_in_mbs * 7 * 64,
1598                       0x1000);
1599     assert(bo);
1600     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
1601     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
1602
1603     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
1604     bo = dri_bo_alloc(i965->intel.bufmgr,
1605                       "bsd mpc row store",
1606                       width_in_mbs * 96,
1607                       0x1000);
1608     assert(bo);
1609     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
1610     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
1611
1612     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
1613
1614     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
1615     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
1616     
1617     if (gen7_mfd_context->bitplane_read_buffer.valid) {
1618         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
1619         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
1620         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
1621         int src_w, src_h;
1622         uint8_t *src = NULL, *dst = NULL;
1623
1624         assert(decode_state->bit_plane->buffer);
1625         src = decode_state->bit_plane->buffer;
1626
1627         bo = dri_bo_alloc(i965->intel.bufmgr,
1628                           "VC-1 Bitplane",
1629                           bitplane_width * height_in_mbs,
1630                           0x1000);
1631         assert(bo);
1632         gen7_mfd_context->bitplane_read_buffer.bo = bo;
1633
1634         dri_bo_map(bo, True);
1635         assert(bo->virtual);
1636         dst = bo->virtual;
1637
1638         for (src_h = 0; src_h < height_in_mbs; src_h++) {
1639             for(src_w = 0; src_w < width_in_mbs; src_w++) {
1640                 int src_index, dst_index;
1641                 int src_shift;
1642                 uint8_t src_value;
1643
1644                 src_index = (src_h * width_in_mbs + src_w) / 2;
1645                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
1646                 src_value = ((src[src_index] >> src_shift) & 0xf);
1647
1648                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
1649                     src_value |= 0x2;
1650                 }
1651
1652                 dst_index = src_w / 2;
1653                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
1654             }
1655
1656             if (src_w & 1)
1657                 dst[src_w / 2] >>= 4;
1658
1659             dst += bitplane_width;
1660         }
1661
1662         dri_bo_unmap(bo);
1663     } else
1664         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
1665 }
1666
1667 static void
1668 gen75_mfd_vc1_pic_state(VADriverContextP ctx,
1669                        struct decode_state *decode_state,
1670                        struct gen7_mfd_context *gen7_mfd_context)
1671 {
1672     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1673     VAPictureParameterBufferVC1 *pic_param;
1674     struct object_surface *obj_surface;
1675     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
1676     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
1677     int unified_mv_mode;
1678     int ref_field_pic_polarity = 0;
1679     int scale_factor = 0;
1680     int trans_ac_y = 0;
1681     int dmv_surface_valid = 0;
1682     int brfd = 0;
1683     int fcm = 0;
1684     int picture_type;
1685     int profile;
1686     int overlap;
1687     int interpolation_mode = 0;
1688
1689     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1690     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1691
1692     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
1693     dquant = pic_param->pic_quantizer_fields.bits.dquant;
1694     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
1695     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
1696     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
1697     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
1698     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
1699     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
1700
1701     if (dquant == 0) {
1702         alt_pquant_config = 0;
1703         alt_pquant_edge_mask = 0;
1704     } else if (dquant == 2) {
1705         alt_pquant_config = 1;
1706         alt_pquant_edge_mask = 0xf;
1707     } else {
1708         assert(dquant == 1);
1709         if (dquantfrm == 0) {
1710             alt_pquant_config = 0;
1711             alt_pquant_edge_mask = 0;
1712             alt_pq = 0;
1713         } else {
1714             assert(dquantfrm == 1);
1715             alt_pquant_config = 1;
1716
1717             switch (dqprofile) {
1718             case 3:
1719                 if (dqbilevel == 0) {
1720                     alt_pquant_config = 2;
1721                     alt_pquant_edge_mask = 0;
1722                 } else {
1723                     assert(dqbilevel == 1);
1724                     alt_pquant_config = 3;
1725                     alt_pquant_edge_mask = 0;
1726                 }
1727                 break;
1728                 
1729             case 0:
1730                 alt_pquant_edge_mask = 0xf;
1731                 break;
1732
1733             case 1:
1734                 if (dqdbedge == 3)
1735                     alt_pquant_edge_mask = 0x9;
1736                 else
1737                     alt_pquant_edge_mask = (0x3 << dqdbedge);
1738
1739                 break;
1740
1741             case 2:
1742                 alt_pquant_edge_mask = (0x1 << dqsbedge);
1743                 break;
1744
1745             default:
1746                 assert(0);
1747             }
1748         }
1749     }
1750
1751     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
1752         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
1753         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
1754     } else {
1755         assert(pic_param->mv_fields.bits.mv_mode < 4);
1756         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
1757     }
1758
1759     if (pic_param->sequence_fields.bits.interlace == 1 &&
1760         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
1761         /* FIXME: calculate reference field picture polarity */
1762         assert(0);
1763         ref_field_pic_polarity = 0;
1764     }
1765
1766     if (pic_param->b_picture_fraction < 21)
1767         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
1768
1769     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
1770     
1771     if (profile == GEN7_VC1_ADVANCED_PROFILE && 
1772         picture_type == GEN7_VC1_I_PICTURE)
1773         picture_type = GEN7_VC1_BI_PICTURE;
1774
1775     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
1776         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
1777     else {
1778         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
1779
1780         /*
1781          * 8.3.6.2.1 Transform Type Selection
1782          * If variable-sized transform coding is not enabled,
1783          * then the 8x8 transform shall be used for all blocks.
1784          * it is also MFX_VC1_PIC_STATE requirement.
1785          */
1786         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
1787             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
1788             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
1789         }
1790     }
1791
1792     if (picture_type == GEN7_VC1_B_PICTURE) {
1793         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
1794
1795         obj_surface = decode_state->reference_objects[1];
1796
1797         if (obj_surface)
1798             gen7_vc1_surface = obj_surface->private_data;
1799
1800         if (!gen7_vc1_surface || 
1801             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
1802              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
1803             dmv_surface_valid = 0;
1804         else
1805             dmv_surface_valid = 1;
1806     }
1807
1808     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
1809
1810     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
1811         fcm = pic_param->picture_fields.bits.frame_coding_mode;
1812     else {
1813         if (pic_param->picture_fields.bits.top_field_first)
1814             fcm = 2;
1815         else
1816             fcm = 3;
1817     }
1818
1819     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
1820         brfd = pic_param->reference_fields.bits.reference_distance;
1821         brfd = (scale_factor * brfd) >> 8;
1822         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
1823
1824         if (brfd < 0)
1825             brfd = 0;
1826     }
1827
1828     overlap = 0;
1829     if (profile != GEN7_VC1_ADVANCED_PROFILE){
1830         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
1831             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
1832             overlap = 1; 
1833         }
1834     }else {
1835         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
1836              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1837               overlap = 1; 
1838         }
1839         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
1840             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
1841              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
1842                 overlap = 1; 
1843              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
1844                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
1845                  overlap = 1;
1846              }
1847         }
1848     } 
1849
1850     assert(pic_param->conditional_overlap_flag < 3);
1851     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
1852
1853     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
1854         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1855          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
1856         interpolation_mode = 9; /* Half-pel bilinear */
1857     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
1858              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
1859               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
1860         interpolation_mode = 1; /* Half-pel bicubic */
1861     else
1862         interpolation_mode = 0; /* Quarter-pel bicubic */
1863
1864     BEGIN_BCS_BATCH(batch, 6);
1865     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
1866     OUT_BCS_BATCH(batch,
1867                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
1868                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
1869     OUT_BCS_BATCH(batch,
1870                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
1871                   dmv_surface_valid << 15 |
1872                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
1873                   pic_param->rounding_control << 13 |
1874                   pic_param->sequence_fields.bits.syncmarker << 12 |
1875                   interpolation_mode << 8 |
1876                   0 << 7 | /* FIXME: scale up or down ??? */
1877                   pic_param->range_reduction_frame << 6 |
1878                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
1879                   overlap << 4 |
1880                   !pic_param->picture_fields.bits.is_first_field << 3 |
1881                   (pic_param->sequence_fields.bits.profile == 3) << 0);
1882     OUT_BCS_BATCH(batch,
1883                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
1884                   picture_type << 26 |
1885                   fcm << 24 |
1886                   alt_pq << 16 |
1887                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
1888                   scale_factor << 0);
1889     OUT_BCS_BATCH(batch,
1890                   unified_mv_mode << 28 |
1891                   pic_param->mv_fields.bits.four_mv_switch << 27 |
1892                   pic_param->fast_uvmc_flag << 26 |
1893                   ref_field_pic_polarity << 25 |
1894                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
1895                   pic_param->reference_fields.bits.reference_distance << 20 |
1896                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
1897                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
1898                   pic_param->mv_fields.bits.extended_mv_range << 8 |
1899                   alt_pquant_edge_mask << 4 |
1900                   alt_pquant_config << 2 |
1901                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
1902                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
1903     OUT_BCS_BATCH(batch,
1904                   !!pic_param->bitplane_present.value << 31 |
1905                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
1906                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
1907                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
1908                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
1909                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
1910                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
1911                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
1912                   pic_param->mv_fields.bits.mv_table << 20 |
1913                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
1914                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
1915                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
1916                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
1917                   pic_param->mb_mode_table << 8 |
1918                   trans_ac_y << 6 |
1919                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
1920                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
1921                   pic_param->cbp_table << 0);
1922     ADVANCE_BCS_BATCH(batch);
1923 }
1924
1925 static void
1926 gen75_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
1927                              struct decode_state *decode_state,
1928                              struct gen7_mfd_context *gen7_mfd_context)
1929 {
1930     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1931     VAPictureParameterBufferVC1 *pic_param;
1932     int intensitycomp_single;
1933
1934     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1935     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1936
1937     assert(decode_state->pic_param && decode_state->pic_param->buffer);
1938     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
1939     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
1940
1941     BEGIN_BCS_BATCH(batch, 6);
1942     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
1943     OUT_BCS_BATCH(batch,
1944                   0 << 14 | /* FIXME: double ??? */
1945                   0 << 12 |
1946                   intensitycomp_single << 10 |
1947                   intensitycomp_single << 8 |
1948                   0 << 4 | /* FIXME: interlace mode */
1949                   0);
1950     OUT_BCS_BATCH(batch,
1951                   pic_param->luma_shift << 16 |
1952                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
1953     OUT_BCS_BATCH(batch, 0);
1954     OUT_BCS_BATCH(batch, 0);
1955     OUT_BCS_BATCH(batch, 0);
1956     ADVANCE_BCS_BATCH(batch);
1957 }
1958
1959 static void
1960 gen75_mfd_vc1_directmode_state_bplus(VADriverContextP ctx,
1961                               struct decode_state *decode_state,
1962                               struct gen7_mfd_context *gen7_mfd_context)
1963 {
1964     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
1965     struct object_surface *obj_surface;
1966     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
1967
1968     obj_surface = decode_state->render_object;
1969
1970     if (obj_surface && obj_surface->private_data) {
1971         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1972     }
1973
1974     obj_surface = decode_state->reference_objects[1];
1975
1976     if (obj_surface && obj_surface->private_data) {
1977         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
1978     }
1979
1980     BEGIN_BCS_BATCH(batch, 7);
1981     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
1982
1983     if (dmv_write_buffer)
1984         OUT_BCS_RELOC(batch, dmv_write_buffer,
1985                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
1986                       0);
1987     else
1988         OUT_BCS_BATCH(batch, 0);
1989
1990         OUT_BCS_BATCH(batch, 0);
1991         OUT_BCS_BATCH(batch, 0);
1992
1993     if (dmv_read_buffer)
1994         OUT_BCS_RELOC(batch, dmv_read_buffer,
1995                       I915_GEM_DOMAIN_INSTRUCTION, 0,
1996                       0);
1997     else
1998         OUT_BCS_BATCH(batch, 0);
1999         OUT_BCS_BATCH(batch, 0);
2000         OUT_BCS_BATCH(batch, 0);
2001                   
2002     ADVANCE_BCS_BATCH(batch);
2003 }
2004
2005 static void
2006 gen75_mfd_vc1_directmode_state(VADriverContextP ctx,
2007                               struct decode_state *decode_state,
2008                               struct gen7_mfd_context *gen7_mfd_context)
2009 {
2010     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2011     struct i965_driver_data *i965 = i965_driver_data(ctx);
2012     struct object_surface *obj_surface;
2013     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
2014
2015     if (IS_STEPPING_BPLUS(i965)) {
2016         gen75_mfd_vc1_directmode_state_bplus(ctx, decode_state, gen7_mfd_context);
2017         return;
2018     }
2019
2020     obj_surface = decode_state->render_object;
2021
2022     if (obj_surface && obj_surface->private_data) {
2023         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2024     }
2025
2026     obj_surface = decode_state->reference_objects[1];
2027
2028     if (obj_surface && obj_surface->private_data) {
2029         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
2030     }
2031
2032     BEGIN_BCS_BATCH(batch, 3);
2033     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
2034
2035     if (dmv_write_buffer)
2036         OUT_BCS_RELOC(batch, dmv_write_buffer,
2037                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2038                       0);
2039     else
2040         OUT_BCS_BATCH(batch, 0);
2041
2042     if (dmv_read_buffer)
2043         OUT_BCS_RELOC(batch, dmv_read_buffer,
2044                       I915_GEM_DOMAIN_INSTRUCTION, 0,
2045                       0);
2046     else
2047         OUT_BCS_BATCH(batch, 0);
2048                   
2049     ADVANCE_BCS_BATCH(batch);
2050 }
2051
2052 static int
2053 gen75_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
2054 {
2055     int out_slice_data_bit_offset;
2056     int slice_header_size = in_slice_data_bit_offset / 8;
2057     int i, j;
2058
2059     if (profile != 3)
2060         out_slice_data_bit_offset = in_slice_data_bit_offset;
2061     else {
2062         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
2063             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
2064                 i++, j += 2;
2065             }
2066         }
2067
2068         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
2069     }
2070
2071     return out_slice_data_bit_offset;
2072 }
2073
2074 static void
2075 gen75_mfd_vc1_bsd_object(VADriverContextP ctx,
2076                         VAPictureParameterBufferVC1 *pic_param,
2077                         VASliceParameterBufferVC1 *slice_param,
2078                         VASliceParameterBufferVC1 *next_slice_param,
2079                         dri_bo *slice_data_bo,
2080                         struct gen7_mfd_context *gen7_mfd_context)
2081 {
2082     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2083     int next_slice_start_vert_pos;
2084     int macroblock_offset;
2085     uint8_t *slice_data = NULL;
2086
2087     dri_bo_map(slice_data_bo, 0);
2088     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
2089     macroblock_offset = gen75_mfd_vc1_get_macroblock_bit_offset(slice_data, 
2090                                                                slice_param->macroblock_offset,
2091                                                                pic_param->sequence_fields.bits.profile);
2092     dri_bo_unmap(slice_data_bo);
2093
2094     if (next_slice_param)
2095         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
2096     else
2097         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
2098
2099     BEGIN_BCS_BATCH(batch, 5);
2100     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
2101     OUT_BCS_BATCH(batch, 
2102                   slice_param->slice_data_size - (macroblock_offset >> 3));
2103     OUT_BCS_BATCH(batch, 
2104                   slice_param->slice_data_offset + (macroblock_offset >> 3));
2105     OUT_BCS_BATCH(batch,
2106                   slice_param->slice_vertical_position << 16 |
2107                   next_slice_start_vert_pos << 0);
2108     OUT_BCS_BATCH(batch,
2109                   (macroblock_offset & 0x7));
2110     ADVANCE_BCS_BATCH(batch);
2111 }
2112
2113 static void
2114 gen75_mfd_vc1_decode_picture(VADriverContextP ctx,
2115                             struct decode_state *decode_state,
2116                             struct gen7_mfd_context *gen7_mfd_context)
2117 {
2118     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2119     VAPictureParameterBufferVC1 *pic_param;
2120     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
2121     dri_bo *slice_data_bo;
2122     int i, j;
2123
2124     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2125     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
2126
2127     gen75_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
2128     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
2129     intel_batchbuffer_emit_mi_flush(batch);
2130     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2131     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2132     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2133     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
2134     gen75_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
2135     gen75_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
2136     gen75_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
2137
2138     for (j = 0; j < decode_state->num_slice_params; j++) {
2139         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
2140         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
2141         slice_data_bo = decode_state->slice_datas[j]->bo;
2142         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
2143
2144         if (j == decode_state->num_slice_params - 1)
2145             next_slice_group_param = NULL;
2146         else
2147             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
2148
2149         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
2150             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
2151
2152             if (i < decode_state->slice_params[j]->num_elements - 1)
2153                 next_slice_param = slice_param + 1;
2154             else
2155                 next_slice_param = next_slice_group_param;
2156
2157             gen75_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
2158             slice_param++;
2159         }
2160     }
2161
2162     intel_batchbuffer_end_atomic(batch);
2163     intel_batchbuffer_flush(batch);
2164 }
2165
2166 static void
2167 gen75_mfd_jpeg_decode_init(VADriverContextP ctx,
2168                           struct decode_state *decode_state,
2169                           struct gen7_mfd_context *gen7_mfd_context)
2170 {
2171     struct object_surface *obj_surface;
2172     VAPictureParameterBufferJPEGBaseline *pic_param;
2173     int subsampling = SUBSAMPLE_YUV420;
2174     int fourcc = VA_FOURCC('I', 'M', 'C', '3');
2175
2176     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2177
2178     if (pic_param->num_components == 1) {
2179         subsampling = SUBSAMPLE_YUV400;
2180         fourcc = VA_FOURCC('Y', '8', '0', '0');
2181     } else if (pic_param->num_components == 3) {
2182         int h1 = pic_param->components[0].h_sampling_factor;
2183         int h2 = pic_param->components[1].h_sampling_factor;
2184         int h3 = pic_param->components[2].h_sampling_factor;
2185         int v1 = pic_param->components[0].v_sampling_factor;
2186         int v2 = pic_param->components[1].v_sampling_factor;
2187         int v3 = pic_param->components[2].v_sampling_factor;
2188
2189         if (h1 == 2 && h2 == 1 && h3 == 1 &&
2190             v1 == 2 && v2 == 1 && v3 == 1) {
2191             subsampling = SUBSAMPLE_YUV420;
2192             fourcc = VA_FOURCC('I', 'M', 'C', '3');
2193         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2194                    v1 == 1 && v2 == 1 && v3 == 1) {
2195             subsampling = SUBSAMPLE_YUV422H;
2196             fourcc = VA_FOURCC('4', '2', '2', 'H');
2197         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2198                    v1 == 1 && v2 == 1 && v3 == 1) {
2199             subsampling = SUBSAMPLE_YUV444;
2200             fourcc = VA_FOURCC('4', '4', '4', 'P');
2201         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2202                    v1 == 1 && v2 == 1 && v3 == 1) {
2203             subsampling = SUBSAMPLE_YUV411;
2204             fourcc = VA_FOURCC('4', '1', '1', 'P');
2205         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2206                    v1 == 2 && v2 == 1 && v3 == 1) {
2207             subsampling = SUBSAMPLE_YUV422V;
2208             fourcc = VA_FOURCC('4', '2', '2', 'V');
2209         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2210                    v1 == 2 && v2 == 2 && v3 == 2) {
2211             subsampling = SUBSAMPLE_YUV422H;
2212             fourcc = VA_FOURCC('4', '2', '2', 'H');
2213         } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2214                    v1 == 2 && v2 == 1 && v3 == 1) {
2215             subsampling = SUBSAMPLE_YUV422V;
2216             fourcc = VA_FOURCC('4', '2', '2', 'V');
2217         } else
2218             assert(0);
2219     } else {
2220         assert(0);
2221     }
2222
2223     /* Current decoded picture */
2224     obj_surface = decode_state->render_object;
2225     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
2226
2227     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
2228     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
2229     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
2230     gen7_mfd_context->pre_deblocking_output.valid = 1;
2231
2232     gen7_mfd_context->post_deblocking_output.bo = NULL;
2233     gen7_mfd_context->post_deblocking_output.valid = 0;
2234
2235     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
2236     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
2237
2238     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
2239     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
2240
2241     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
2242     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
2243
2244     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
2245     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
2246
2247     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
2248     gen7_mfd_context->bitplane_read_buffer.valid = 0;
2249 }
2250
2251 static const int va_to_gen7_jpeg_rotation[4] = {
2252     GEN7_JPEG_ROTATION_0,
2253     GEN7_JPEG_ROTATION_90,
2254     GEN7_JPEG_ROTATION_180,
2255     GEN7_JPEG_ROTATION_270
2256 };
2257
2258 static void
2259 gen75_mfd_jpeg_pic_state(VADriverContextP ctx,
2260                         struct decode_state *decode_state,
2261                         struct gen7_mfd_context *gen7_mfd_context)
2262 {
2263     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2264     VAPictureParameterBufferJPEGBaseline *pic_param;
2265     int chroma_type = GEN7_YUV420;
2266     int frame_width_in_blks;
2267     int frame_height_in_blks;
2268
2269     assert(decode_state->pic_param && decode_state->pic_param->buffer);
2270     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2271
2272     if (pic_param->num_components == 1)
2273         chroma_type = GEN7_YUV400;
2274     else if (pic_param->num_components == 3) {
2275         int h1 = pic_param->components[0].h_sampling_factor;
2276         int h2 = pic_param->components[1].h_sampling_factor;
2277         int h3 = pic_param->components[2].h_sampling_factor;
2278         int v1 = pic_param->components[0].v_sampling_factor;
2279         int v2 = pic_param->components[1].v_sampling_factor;
2280         int v3 = pic_param->components[2].v_sampling_factor;
2281
2282         if (h1 == 2 && h2 == 1 && h3 == 1 &&
2283             v1 == 2 && v2 == 1 && v3 == 1)
2284             chroma_type = GEN7_YUV420;
2285         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2286                  v1 == 1 && v2 == 1 && v3 == 1)
2287             chroma_type = GEN7_YUV422H_2Y;
2288         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2289                  v1 == 1 && v2 == 1 && v3 == 1)
2290             chroma_type = GEN7_YUV444;
2291         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
2292                  v1 == 1 && v2 == 1 && v3 == 1)
2293             chroma_type = GEN7_YUV411;
2294         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
2295                  v1 == 2 && v2 == 1 && v3 == 1)
2296             chroma_type = GEN7_YUV422V_2Y;
2297         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
2298                  v1 == 2 && v2 == 2 && v3 == 2)
2299             chroma_type = GEN7_YUV422H_4Y;
2300         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
2301                  v1 == 2 && v2 == 1 && v3 == 1)
2302             chroma_type = GEN7_YUV422V_4Y;
2303         else
2304             assert(0);
2305     }
2306
2307     if (chroma_type == GEN7_YUV400 ||
2308         chroma_type == GEN7_YUV444 ||
2309         chroma_type == GEN7_YUV422V_2Y) {
2310         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
2311         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
2312     } else if (chroma_type == GEN7_YUV411) {
2313         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
2314         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
2315     } else {
2316         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
2317         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
2318     }
2319
2320     BEGIN_BCS_BATCH(batch, 3);
2321     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
2322     OUT_BCS_BATCH(batch,
2323                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
2324                   (chroma_type << 0));
2325     OUT_BCS_BATCH(batch,
2326                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
2327                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
2328     ADVANCE_BCS_BATCH(batch);
2329 }
2330
2331 static const int va_to_gen7_jpeg_hufftable[2] = {
2332     MFX_HUFFTABLE_ID_Y,
2333     MFX_HUFFTABLE_ID_UV
2334 };
2335
2336 static void
2337 gen75_mfd_jpeg_huff_table_state(VADriverContextP ctx,
2338                                struct decode_state *decode_state,
2339                                struct gen7_mfd_context *gen7_mfd_context,
2340                                int num_tables)
2341 {
2342     VAHuffmanTableBufferJPEGBaseline *huffman_table;
2343     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2344     int index;
2345
2346     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
2347         return;
2348
2349     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
2350
2351     for (index = 0; index < num_tables; index++) {
2352         int id = va_to_gen7_jpeg_hufftable[index];
2353
2354         if (!huffman_table->load_huffman_table[index])
2355             continue;
2356
2357         BEGIN_BCS_BATCH(batch, 53);
2358         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
2359         OUT_BCS_BATCH(batch, id);
2360         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
2361         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
2362         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
2363         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
2364         ADVANCE_BCS_BATCH(batch);
2365     }
2366 }
2367
2368 static const int va_to_gen7_jpeg_qm[5] = {
2369     -1,
2370     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
2371     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
2372     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
2373     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
2374 };
2375
2376 static void
2377 gen75_mfd_jpeg_qm_state(VADriverContextP ctx,
2378                        struct decode_state *decode_state,
2379                        struct gen7_mfd_context *gen7_mfd_context)
2380 {
2381     VAPictureParameterBufferJPEGBaseline *pic_param;
2382     VAIQMatrixBufferJPEGBaseline *iq_matrix;
2383     int index;
2384
2385     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
2386         return;
2387
2388     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
2389     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
2390
2391     assert(pic_param->num_components <= 3);
2392
2393     for (index = 0; index < pic_param->num_components; index++) {
2394         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
2395         int qm_type;
2396         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
2397         unsigned char raster_qm[64];
2398         int j;
2399
2400         if (id > 4 || id < 1)
2401             continue;
2402
2403         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
2404             continue;
2405
2406         qm_type = va_to_gen7_jpeg_qm[id];
2407
2408         for (j = 0; j < 64; j++)
2409             raster_qm[zigzag_direct[j]] = qm[j];
2410
2411         gen75_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
2412     }
2413 }
2414
2415 static void
2416 gen75_mfd_jpeg_bsd_object(VADriverContextP ctx,
2417                          VAPictureParameterBufferJPEGBaseline *pic_param,
2418                          VASliceParameterBufferJPEGBaseline *slice_param,
2419                          VASliceParameterBufferJPEGBaseline *next_slice_param,
2420                          dri_bo *slice_data_bo,
2421                          struct gen7_mfd_context *gen7_mfd_context)
2422 {
2423     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2424     int scan_component_mask = 0;
2425     int i;
2426
2427     assert(slice_param->num_components > 0);
2428     assert(slice_param->num_components < 4);
2429     assert(slice_param->num_components <= pic_param->num_components);
2430
2431     for (i = 0; i < slice_param->num_components; i++) {
2432         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
2433         case 1:
2434             scan_component_mask |= (1 << 0);
2435             break;
2436         case 2:
2437             scan_component_mask |= (1 << 1);
2438             break;
2439         case 3:
2440             scan_component_mask |= (1 << 2);
2441             break;
2442         default:
2443             assert(0);
2444             break;
2445         }
2446     }
2447
2448     BEGIN_BCS_BATCH(batch, 6);
2449     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
2450     OUT_BCS_BATCH(batch, 
2451                   slice_param->slice_data_size);
2452     OUT_BCS_BATCH(batch, 
2453                   slice_param->slice_data_offset);
2454     OUT_BCS_BATCH(batch,
2455                   slice_param->slice_horizontal_position << 16 |
2456                   slice_param->slice_vertical_position << 0);
2457     OUT_BCS_BATCH(batch,
2458                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
2459                   (scan_component_mask << 27) |                 /* scan components */
2460                   (0 << 26) |   /* disable interrupt allowed */
2461                   (slice_param->num_mcus << 0));                /* MCU count */
2462     OUT_BCS_BATCH(batch,
2463                   (slice_param->restart_interval << 0));    /* RestartInterval */
2464     ADVANCE_BCS_BATCH(batch);
2465 }
2466
2467 /* Workaround for JPEG decoding on Ivybridge */
2468
2469 VAStatus 
2470 i965_DestroySurfaces(VADriverContextP ctx,
2471                      VASurfaceID *surface_list,
2472                      int num_surfaces);
2473 VAStatus 
2474 i965_CreateSurfaces(VADriverContextP ctx,
2475                     int width,
2476                     int height,
2477                     int format,
2478                     int num_surfaces,
2479                     VASurfaceID *surfaces);
2480
2481 static struct {
2482     int width;
2483     int height;
2484     unsigned char data[32];
2485     int data_size;
2486     int data_bit_offset;
2487     int qp;
2488 } gen7_jpeg_wa_clip = {
2489     16,
2490     16,
2491     {
2492         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
2493         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
2494     },
2495     14,
2496     40,
2497     28,
2498 };
2499
2500 static void
2501 gen75_jpeg_wa_init(VADriverContextP ctx,
2502                   struct gen7_mfd_context *gen7_mfd_context)
2503 {
2504     struct i965_driver_data *i965 = i965_driver_data(ctx);
2505     VAStatus status;
2506     struct object_surface *obj_surface;
2507
2508     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
2509         i965_DestroySurfaces(ctx,
2510                              &gen7_mfd_context->jpeg_wa_surface_id,
2511                              1);
2512
2513     status = i965_CreateSurfaces(ctx,
2514                                  gen7_jpeg_wa_clip.width,
2515                                  gen7_jpeg_wa_clip.height,
2516                                  VA_RT_FORMAT_YUV420,
2517                                  1,
2518                                  &gen7_mfd_context->jpeg_wa_surface_id);
2519     assert(status == VA_STATUS_SUCCESS);
2520
2521     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
2522     assert(obj_surface);
2523     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
2524     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
2525
2526     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
2527         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
2528                                                                "JPEG WA data",
2529                                                                0x1000,
2530                                                                0x1000);
2531         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
2532                        0,
2533                        gen7_jpeg_wa_clip.data_size,
2534                        gen7_jpeg_wa_clip.data);
2535     }
2536 }
2537
2538 static void
2539 gen75_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
2540                               struct gen7_mfd_context *gen7_mfd_context)
2541 {
2542     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2543
2544     BEGIN_BCS_BATCH(batch, 5);
2545     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2546     OUT_BCS_BATCH(batch,
2547                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
2548                   (MFD_MODE_VLD << 15) | /* VLD mode */
2549                   (0 << 10) | /* disable Stream-Out */
2550                   (0 << 9)  | /* Post Deblocking Output */
2551                   (1 << 8)  | /* Pre Deblocking Output */
2552                   (0 << 5)  | /* not in stitch mode */
2553                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
2554                   (MFX_FORMAT_AVC << 0));
2555     OUT_BCS_BATCH(batch,
2556                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
2557                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
2558                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
2559                   (0 << 1)  |
2560                   (0 << 0));
2561     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */ 
2562     OUT_BCS_BATCH(batch, 0); /* reserved */
2563     ADVANCE_BCS_BATCH(batch);
2564 }
2565
2566 static void
2567 gen75_jpeg_wa_surface_state(VADriverContextP ctx,
2568                            struct gen7_mfd_context *gen7_mfd_context)
2569 {
2570     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2571     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2572
2573     BEGIN_BCS_BATCH(batch, 6);
2574     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2575     OUT_BCS_BATCH(batch, 0);
2576     OUT_BCS_BATCH(batch,
2577                   ((obj_surface->orig_width - 1) << 18) |
2578                   ((obj_surface->orig_height - 1) << 4));
2579     OUT_BCS_BATCH(batch,
2580                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2581                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
2582                   (0 << 22) | /* surface object control state, ignored */
2583                   ((obj_surface->width - 1) << 3) | /* pitch */
2584                   (0 << 2)  | /* must be 0 */
2585                   (1 << 1)  | /* must be tiled */
2586                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
2587     OUT_BCS_BATCH(batch,
2588                   (0 << 16) | /* X offset for U(Cb), must be 0 */
2589                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
2590     OUT_BCS_BATCH(batch,
2591                   (0 << 16) | /* X offset for V(Cr), must be 0 */
2592                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
2593     ADVANCE_BCS_BATCH(batch);
2594 }
2595
2596 static void
2597 gen75_jpeg_wa_pipe_buf_addr_state_bplus(VADriverContextP ctx,
2598                                  struct gen7_mfd_context *gen7_mfd_context)
2599 {
2600     struct i965_driver_data *i965 = i965_driver_data(ctx);
2601     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2602     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2603     dri_bo *intra_bo;
2604     int i;
2605
2606     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2607                             "intra row store",
2608                             128 * 64,
2609                             0x1000);
2610
2611     BEGIN_BCS_BATCH(batch, 61);
2612     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
2613     OUT_BCS_RELOC(batch,
2614                   obj_surface->bo,
2615                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2616                   0);
2617         OUT_BCS_BATCH(batch, 0);
2618         OUT_BCS_BATCH(batch, 0);
2619     
2620
2621     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2622         OUT_BCS_BATCH(batch, 0);
2623         OUT_BCS_BATCH(batch, 0);
2624
2625         /* uncompressed-video & stream out 7-12 */
2626     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2627     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2628         OUT_BCS_BATCH(batch, 0);
2629         OUT_BCS_BATCH(batch, 0);
2630         OUT_BCS_BATCH(batch, 0);
2631         OUT_BCS_BATCH(batch, 0);
2632
2633         /* the DW 13-15 is for intra row store scratch */
2634     OUT_BCS_RELOC(batch,
2635                   intra_bo,
2636                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2637                   0);
2638         OUT_BCS_BATCH(batch, 0);
2639         OUT_BCS_BATCH(batch, 0);
2640
2641         /* the DW 16-18 is for deblocking filter */ 
2642     OUT_BCS_BATCH(batch, 0);
2643         OUT_BCS_BATCH(batch, 0);
2644         OUT_BCS_BATCH(batch, 0);
2645
2646     /* DW 19..50 */
2647     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2648         OUT_BCS_BATCH(batch, 0);
2649         OUT_BCS_BATCH(batch, 0);
2650     }
2651     OUT_BCS_BATCH(batch, 0);
2652
2653         /* the DW52-54 is for mb status address */
2654     OUT_BCS_BATCH(batch, 0);
2655         OUT_BCS_BATCH(batch, 0);
2656         OUT_BCS_BATCH(batch, 0);
2657         /* the DW56-60 is for ILDB & second ILDB address */
2658     OUT_BCS_BATCH(batch, 0);
2659         OUT_BCS_BATCH(batch, 0);
2660         OUT_BCS_BATCH(batch, 0);
2661     OUT_BCS_BATCH(batch, 0);
2662         OUT_BCS_BATCH(batch, 0);
2663         OUT_BCS_BATCH(batch, 0);
2664
2665     ADVANCE_BCS_BATCH(batch);
2666
2667     dri_bo_unreference(intra_bo);
2668 }
2669
2670 static void
2671 gen75_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
2672                                  struct gen7_mfd_context *gen7_mfd_context)
2673 {
2674     struct i965_driver_data *i965 = i965_driver_data(ctx);
2675     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
2676     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2677     dri_bo *intra_bo;
2678     int i;
2679
2680     if (IS_STEPPING_BPLUS(i965)) {
2681         gen75_jpeg_wa_pipe_buf_addr_state_bplus(ctx, gen7_mfd_context);
2682         return;
2683     }
2684
2685     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
2686                             "intra row store",
2687                             128 * 64,
2688                             0x1000);
2689
2690     BEGIN_BCS_BATCH(batch, 25);
2691     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
2692     OUT_BCS_RELOC(batch,
2693                   obj_surface->bo,
2694                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2695                   0);
2696     
2697     OUT_BCS_BATCH(batch, 0); /* post deblocking */
2698
2699     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2700     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
2701
2702     OUT_BCS_RELOC(batch,
2703                   intra_bo,
2704                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2705                   0);
2706
2707     OUT_BCS_BATCH(batch, 0);
2708
2709     /* DW 7..22 */
2710     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2711         OUT_BCS_BATCH(batch, 0);
2712     }
2713
2714     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
2715     OUT_BCS_BATCH(batch, 0);
2716     ADVANCE_BCS_BATCH(batch);
2717
2718     dri_bo_unreference(intra_bo);
2719 }
2720
2721 static void
2722 gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
2723                                      struct gen7_mfd_context *gen7_mfd_context)
2724 {
2725     struct i965_driver_data *i965 = i965_driver_data(ctx);
2726     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2727     dri_bo *bsd_mpc_bo, *mpr_bo;
2728
2729     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2730                               "bsd mpc row store",
2731                               11520, /* 1.5 * 120 * 64 */
2732                               0x1000);
2733
2734     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2735                           "mpr row store",
2736                           7680, /* 1. 0 * 120 * 64 */
2737                           0x1000);
2738
2739     BEGIN_BCS_BATCH(batch, 10);
2740     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2741
2742     OUT_BCS_RELOC(batch,
2743                   bsd_mpc_bo,
2744                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2745                   0);
2746
2747     OUT_BCS_BATCH(batch, 0);
2748     OUT_BCS_BATCH(batch, 0);
2749
2750     OUT_BCS_RELOC(batch,
2751                   mpr_bo,
2752                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2753                   0);
2754     OUT_BCS_BATCH(batch, 0);
2755     OUT_BCS_BATCH(batch, 0);
2756
2757     OUT_BCS_BATCH(batch, 0);
2758     OUT_BCS_BATCH(batch, 0);
2759     OUT_BCS_BATCH(batch, 0);
2760
2761     ADVANCE_BCS_BATCH(batch);
2762
2763     dri_bo_unreference(bsd_mpc_bo);
2764     dri_bo_unreference(mpr_bo);
2765 }
2766
2767 static void
2768 gen75_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
2769                                      struct gen7_mfd_context *gen7_mfd_context)
2770 {
2771     struct i965_driver_data *i965 = i965_driver_data(ctx);
2772     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2773     dri_bo *bsd_mpc_bo, *mpr_bo;
2774
2775     if (IS_STEPPING_BPLUS(i965)) {
2776         gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(ctx, gen7_mfd_context);
2777         return;
2778     }
2779
2780     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
2781                               "bsd mpc row store",
2782                               11520, /* 1.5 * 120 * 64 */
2783                               0x1000);
2784
2785     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
2786                           "mpr row store",
2787                           7680, /* 1. 0 * 120 * 64 */
2788                           0x1000);
2789
2790     BEGIN_BCS_BATCH(batch, 4);
2791     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
2792
2793     OUT_BCS_RELOC(batch,
2794                   bsd_mpc_bo,
2795                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2796                   0);
2797
2798     OUT_BCS_RELOC(batch,
2799                   mpr_bo,
2800                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
2801                   0);
2802     OUT_BCS_BATCH(batch, 0);
2803
2804     ADVANCE_BCS_BATCH(batch);
2805
2806     dri_bo_unreference(bsd_mpc_bo);
2807     dri_bo_unreference(mpr_bo);
2808 }
2809
2810 static void
2811 gen75_jpeg_wa_avc_qm_state(VADriverContextP ctx,
2812                           struct gen7_mfd_context *gen7_mfd_context)
2813 {
2814
2815 }
2816
2817 static void
2818 gen75_jpeg_wa_avc_img_state(VADriverContextP ctx,
2819                            struct gen7_mfd_context *gen7_mfd_context)
2820 {
2821     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2822     int img_struct = 0;
2823     int mbaff_frame_flag = 0;
2824     unsigned int width_in_mbs = 1, height_in_mbs = 1;
2825
2826     BEGIN_BCS_BATCH(batch, 16);
2827     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
2828     OUT_BCS_BATCH(batch, 
2829                   (width_in_mbs * height_in_mbs - 1));
2830     OUT_BCS_BATCH(batch, 
2831                   ((height_in_mbs - 1) << 16) | 
2832                   ((width_in_mbs - 1) << 0));
2833     OUT_BCS_BATCH(batch, 
2834                   (0 << 24) |
2835                   (0 << 16) |
2836                   (0 << 14) |
2837                   (0 << 13) |
2838                   (0 << 12) | /* differ from GEN6 */
2839                   (0 << 10) |
2840                   (img_struct << 8));
2841     OUT_BCS_BATCH(batch,
2842                   (1 << 10) | /* 4:2:0 */
2843                   (1 << 7) |  /* CABAC */
2844                   (0 << 6) |
2845                   (0 << 5) |
2846                   (0 << 4) |
2847                   (0 << 3) |
2848                   (1 << 2) |
2849                   (mbaff_frame_flag << 1) |
2850                   (0 << 0));
2851     OUT_BCS_BATCH(batch, 0);
2852     OUT_BCS_BATCH(batch, 0);
2853     OUT_BCS_BATCH(batch, 0);
2854     OUT_BCS_BATCH(batch, 0);
2855     OUT_BCS_BATCH(batch, 0);
2856     OUT_BCS_BATCH(batch, 0);
2857     OUT_BCS_BATCH(batch, 0);
2858     OUT_BCS_BATCH(batch, 0);
2859     OUT_BCS_BATCH(batch, 0);
2860     OUT_BCS_BATCH(batch, 0);
2861     OUT_BCS_BATCH(batch, 0);
2862     ADVANCE_BCS_BATCH(batch);
2863 }
2864
2865 static void
2866 gen75_jpeg_wa_avc_directmode_state_bplus(VADriverContextP ctx,
2867                                   struct gen7_mfd_context *gen7_mfd_context)
2868 {
2869     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2870     int i;
2871
2872     BEGIN_BCS_BATCH(batch, 71);
2873     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
2874
2875     /* reference surfaces 0..15 */
2876     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2877         OUT_BCS_BATCH(batch, 0); /* top */
2878         OUT_BCS_BATCH(batch, 0); /* bottom */
2879     }
2880         
2881         OUT_BCS_BATCH(batch, 0);
2882
2883     /* the current decoding frame/field */
2884     OUT_BCS_BATCH(batch, 0); /* top */
2885     OUT_BCS_BATCH(batch, 0);
2886     OUT_BCS_BATCH(batch, 0);
2887
2888     /* POC List */
2889     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2890         OUT_BCS_BATCH(batch, 0);
2891         OUT_BCS_BATCH(batch, 0);
2892     }
2893
2894     OUT_BCS_BATCH(batch, 0);
2895     OUT_BCS_BATCH(batch, 0);
2896
2897     ADVANCE_BCS_BATCH(batch);
2898 }
2899
2900 static void
2901 gen75_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
2902                                   struct gen7_mfd_context *gen7_mfd_context)
2903 {
2904     struct i965_driver_data *i965 = i965_driver_data(ctx);
2905     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2906     int i;
2907
2908     if (IS_STEPPING_BPLUS(i965)) {
2909         gen75_jpeg_wa_avc_directmode_state_bplus(ctx, gen7_mfd_context);
2910         return;
2911     }   
2912
2913     BEGIN_BCS_BATCH(batch, 69);
2914     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
2915
2916     /* reference surfaces 0..15 */
2917     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2918         OUT_BCS_BATCH(batch, 0); /* top */
2919         OUT_BCS_BATCH(batch, 0); /* bottom */
2920     }
2921
2922     /* the current decoding frame/field */
2923     OUT_BCS_BATCH(batch, 0); /* top */
2924     OUT_BCS_BATCH(batch, 0); /* bottom */
2925
2926     /* POC List */
2927     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
2928         OUT_BCS_BATCH(batch, 0);
2929         OUT_BCS_BATCH(batch, 0);
2930     }
2931
2932     OUT_BCS_BATCH(batch, 0);
2933     OUT_BCS_BATCH(batch, 0);
2934
2935     ADVANCE_BCS_BATCH(batch);
2936 }
2937
2938 static void 
2939 gen75_jpeg_wa_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
2940                                      struct gen7_mfd_context *gen7_mfd_context)
2941 {
2942     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2943
2944     BEGIN_BCS_BATCH(batch, 11);
2945     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2946     OUT_BCS_RELOC(batch,
2947                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2948                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2949                   0);
2950     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2951     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2952     OUT_BCS_BATCH(batch, 0);
2953     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2954     OUT_BCS_BATCH(batch, 0);
2955     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2956     OUT_BCS_BATCH(batch, 0);
2957     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2958     OUT_BCS_BATCH(batch, 0);
2959     ADVANCE_BCS_BATCH(batch);
2960 }
2961
2962 static void
2963 gen75_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
2964                                      struct gen7_mfd_context *gen7_mfd_context)
2965 {
2966     struct i965_driver_data *i965 = i965_driver_data(ctx);
2967     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2968
2969     if (IS_STEPPING_BPLUS(i965)) {
2970         gen75_jpeg_wa_ind_obj_base_addr_state_bplus(ctx, gen7_mfd_context);
2971         return;
2972     }   
2973
2974     BEGIN_BCS_BATCH(batch, 11);
2975     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
2976     OUT_BCS_RELOC(batch,
2977                   gen7_mfd_context->jpeg_wa_slice_data_bo,
2978                   I915_GEM_DOMAIN_INSTRUCTION, 0,
2979                   0);
2980     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
2981     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2982     OUT_BCS_BATCH(batch, 0);
2983     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2984     OUT_BCS_BATCH(batch, 0);
2985     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2986     OUT_BCS_BATCH(batch, 0);
2987     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
2988     OUT_BCS_BATCH(batch, 0);
2989     ADVANCE_BCS_BATCH(batch);
2990 }
2991
2992 static void
2993 gen75_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
2994                             struct gen7_mfd_context *gen7_mfd_context)
2995 {
2996     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
2997
2998     /* the input bitsteam format on GEN7 differs from GEN6 */
2999     BEGIN_BCS_BATCH(batch, 6);
3000     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
3001     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
3002     OUT_BCS_BATCH(batch, 0);
3003     OUT_BCS_BATCH(batch,
3004                   (0 << 31) |
3005                   (0 << 14) |
3006                   (0 << 12) |
3007                   (0 << 10) |
3008                   (0 << 8));
3009     OUT_BCS_BATCH(batch,
3010                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
3011                   (0 << 5)  |
3012                   (0 << 4)  |
3013                   (1 << 3) | /* LastSlice Flag */
3014                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
3015     OUT_BCS_BATCH(batch, 0);
3016     ADVANCE_BCS_BATCH(batch);
3017 }
3018
3019 static void
3020 gen75_jpeg_wa_avc_slice_state(VADriverContextP ctx,
3021                              struct gen7_mfd_context *gen7_mfd_context)
3022 {
3023     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3024     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
3025     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
3026     int first_mb_in_slice = 0;
3027     int slice_type = SLICE_TYPE_I;
3028
3029     BEGIN_BCS_BATCH(batch, 11);
3030     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
3031     OUT_BCS_BATCH(batch, slice_type);
3032     OUT_BCS_BATCH(batch, 
3033                   (num_ref_idx_l1 << 24) |
3034                   (num_ref_idx_l0 << 16) |
3035                   (0 << 8) |
3036                   (0 << 0));
3037     OUT_BCS_BATCH(batch, 
3038                   (0 << 29) |
3039                   (1 << 27) |   /* disable Deblocking */
3040                   (0 << 24) |
3041                   (gen7_jpeg_wa_clip.qp << 16) |
3042                   (0 << 8) |
3043                   (0 << 0));
3044     OUT_BCS_BATCH(batch, 
3045                   (slice_ver_pos << 24) |
3046                   (slice_hor_pos << 16) | 
3047                   (first_mb_in_slice << 0));
3048     OUT_BCS_BATCH(batch,
3049                   (next_slice_ver_pos << 16) |
3050                   (next_slice_hor_pos << 0));
3051     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
3052     OUT_BCS_BATCH(batch, 0);
3053     OUT_BCS_BATCH(batch, 0);
3054     OUT_BCS_BATCH(batch, 0);
3055     OUT_BCS_BATCH(batch, 0);
3056     ADVANCE_BCS_BATCH(batch);
3057 }
3058
3059 static void
3060 gen75_mfd_jpeg_wa(VADriverContextP ctx,
3061                  struct gen7_mfd_context *gen7_mfd_context)
3062 {
3063     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3064     gen75_jpeg_wa_init(ctx, gen7_mfd_context);
3065     intel_batchbuffer_emit_mi_flush(batch);
3066     gen75_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
3067     gen75_jpeg_wa_surface_state(ctx, gen7_mfd_context);
3068     gen75_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
3069     gen75_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
3070     gen75_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
3071     gen75_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
3072     gen75_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
3073
3074     gen75_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
3075     gen75_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
3076     gen75_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
3077 }
3078
3079 void
3080 gen75_mfd_jpeg_decode_picture(VADriverContextP ctx,
3081                              struct decode_state *decode_state,
3082                              struct gen7_mfd_context *gen7_mfd_context)
3083 {
3084     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
3085     VAPictureParameterBufferJPEGBaseline *pic_param;
3086     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
3087     dri_bo *slice_data_bo;
3088     int i, j, max_selector = 0;
3089
3090     assert(decode_state->pic_param && decode_state->pic_param->buffer);
3091     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
3092
3093     /* Currently only support Baseline DCT */
3094     gen75_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
3095     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3096     gen75_mfd_jpeg_wa(ctx, gen7_mfd_context);
3097     intel_batchbuffer_emit_mi_flush(batch);
3098     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3099     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3100     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
3101     gen75_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
3102     gen75_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
3103
3104     for (j = 0; j < decode_state->num_slice_params; j++) {
3105         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3106         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3107         slice_data_bo = decode_state->slice_datas[j]->bo;
3108         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3109
3110         if (j == decode_state->num_slice_params - 1)
3111             next_slice_group_param = NULL;
3112         else
3113             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3114
3115         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3116             int component;
3117
3118             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3119
3120             if (i < decode_state->slice_params[j]->num_elements - 1)
3121                 next_slice_param = slice_param + 1;
3122             else
3123                 next_slice_param = next_slice_group_param;
3124
3125             for (component = 0; component < slice_param->num_components; component++) {
3126                 if (max_selector < slice_param->components[component].dc_table_selector)
3127                     max_selector = slice_param->components[component].dc_table_selector;
3128
3129                 if (max_selector < slice_param->components[component].ac_table_selector)
3130                     max_selector = slice_param->components[component].ac_table_selector;
3131             }
3132
3133             slice_param++;
3134         }
3135     }
3136
3137     assert(max_selector < 2);
3138     gen75_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
3139
3140     for (j = 0; j < decode_state->num_slice_params; j++) {
3141         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
3142         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
3143         slice_data_bo = decode_state->slice_datas[j]->bo;
3144         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
3145
3146         if (j == decode_state->num_slice_params - 1)
3147             next_slice_group_param = NULL;
3148         else
3149             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
3150
3151         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
3152             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
3153
3154             if (i < decode_state->slice_params[j]->num_elements - 1)
3155                 next_slice_param = slice_param + 1;
3156             else
3157                 next_slice_param = next_slice_group_param;
3158
3159             gen75_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
3160             slice_param++;
3161         }
3162     }
3163
3164     intel_batchbuffer_end_atomic(batch);
3165     intel_batchbuffer_flush(batch);
3166 }
3167
3168 static VAStatus
3169 gen75_mfd_decode_picture(VADriverContextP ctx, 
3170                         VAProfile profile, 
3171                         union codec_state *codec_state,
3172                         struct hw_context *hw_context)
3173
3174 {
3175     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3176     struct decode_state *decode_state = &codec_state->decode;
3177     VAStatus vaStatus;
3178
3179     assert(gen7_mfd_context);
3180
3181     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
3182
3183     if (vaStatus != VA_STATUS_SUCCESS)
3184         goto out;
3185
3186     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
3187
3188     switch (profile) {
3189     case VAProfileMPEG2Simple:
3190     case VAProfileMPEG2Main:
3191         gen75_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
3192         break;
3193         
3194     case VAProfileH264ConstrainedBaseline:
3195     case VAProfileH264Main:
3196     case VAProfileH264High:
3197         gen75_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
3198         break;
3199
3200     case VAProfileVC1Simple:
3201     case VAProfileVC1Main:
3202     case VAProfileVC1Advanced:
3203         gen75_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
3204         break;
3205
3206     case VAProfileJPEGBaseline:
3207         gen75_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
3208         break;
3209
3210     default:
3211         assert(0);
3212         break;
3213     }
3214
3215     vaStatus = VA_STATUS_SUCCESS;
3216
3217 out:
3218     return vaStatus;
3219 }
3220
3221 static void
3222 gen75_mfd_context_destroy(void *hw_context)
3223 {
3224     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
3225
3226     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
3227     gen7_mfd_context->post_deblocking_output.bo = NULL;
3228
3229     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
3230     gen7_mfd_context->pre_deblocking_output.bo = NULL;
3231
3232     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
3233     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
3234
3235     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
3236     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
3237
3238     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
3239     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
3240
3241     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
3242     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
3243
3244     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
3245     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
3246
3247     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
3248
3249     intel_batchbuffer_free(gen7_mfd_context->base.batch);
3250     free(gen7_mfd_context);
3251 }
3252
3253 static void gen75_mfd_mpeg2_context_init(VADriverContextP ctx,
3254                                     struct gen7_mfd_context *gen7_mfd_context)
3255 {
3256     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
3257     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
3258     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
3259     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
3260 }
3261
3262 struct hw_context *
3263 gen75_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
3264 {
3265     struct intel_driver_data *intel = intel_driver_data(ctx);
3266     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
3267     int i;
3268
3269     gen7_mfd_context->base.destroy = gen75_mfd_context_destroy;
3270     gen7_mfd_context->base.run = gen75_mfd_decode_picture;
3271     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
3272
3273     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
3274         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
3275         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
3276         gen7_mfd_context->reference_surface[i].obj_surface = NULL;
3277     }
3278
3279     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
3280     gen7_mfd_context->jpeg_wa_surface_object = NULL;
3281
3282     switch (obj_config->profile) {
3283     case VAProfileMPEG2Simple:
3284     case VAProfileMPEG2Main:
3285         gen75_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
3286         break;
3287
3288     case VAProfileH264ConstrainedBaseline:
3289     case VAProfileH264Main:
3290     case VAProfileH264High:
3291         gen75_mfd_avc_context_init(ctx, gen7_mfd_context);
3292         break;
3293     default:
3294         break;
3295     }
3296     return (struct hw_context *)gen7_mfd_context;
3297 }