MPEG-2 encoding path
[profile/ivi/vaapi-intel-driver.git] / src / i965_media_h264.c
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <assert.h>
5
6
7 #include "intel_batchbuffer.h"
8 #include "intel_driver.h"
9
10 #include "i965_defines.h"
11 #include "i965_drv_video.h"
12 #include "i965_media.h"
13 #include "i965_media_h264.h"
14
15 enum {
16     INTRA_16X16 = 0,
17     INTRA_8X8,
18     INTRA_4X4,
19     INTRA_PCM,
20     FRAMEMB_MOTION,
21     FIELDMB_MOTION,
22     MBAFF_MOTION,
23 };
24
25 struct intra_kernel_header
26 {
27     /* R1.0 */
28     unsigned char intra_4x4_luma_mode_0_offset;
29     unsigned char intra_4x4_luma_mode_1_offset;
30     unsigned char intra_4x4_luma_mode_2_offset;
31     unsigned char intra_4x4_luma_mode_3_offset;
32     /* R1.1 */
33     unsigned char intra_4x4_luma_mode_4_offset;
34     unsigned char intra_4x4_luma_mode_5_offset;
35     unsigned char intra_4x4_luma_mode_6_offset;
36     unsigned char intra_4x4_luma_mode_7_offset;
37     /* R1.2 */
38     unsigned char intra_4x4_luma_mode_8_offset;
39     unsigned char pad0;
40     unsigned short top_reference_offset;
41     /* R1.3 */
42     unsigned char intra_8x8_luma_mode_0_offset;
43     unsigned char intra_8x8_luma_mode_1_offset;
44     unsigned char intra_8x8_luma_mode_2_offset;
45     unsigned char intra_8x8_luma_mode_3_offset;
46     /* R1.4 */
47     unsigned char intra_8x8_luma_mode_4_offset;
48     unsigned char intra_8x8_luma_mode_5_offset;
49     unsigned char intra_8x8_luma_mode_6_offset;
50     unsigned char intra_8x8_luma_mode_7_offset;
51     /* R1.5 */
52     unsigned char intra_8x8_luma_mode_8_offset;
53     unsigned char pad1;
54     unsigned short const_reverse_data_transfer_intra_8x8;
55     /* R1.6 */
56     unsigned char intra_16x16_luma_mode_0_offset;
57     unsigned char intra_16x16_luma_mode_1_offset;
58     unsigned char intra_16x16_luma_mode_2_offset;
59     unsigned char intra_16x16_luma_mode_3_offset;
60     /* R1.7 */
61     unsigned char intra_chroma_mode_0_offset;
62     unsigned char intra_chroma_mode_1_offset;
63     unsigned char intra_chroma_mode_2_offset;
64     unsigned char intra_chroma_mode_3_offset;
65     /* R2.0 */
66     unsigned int const_intra_16x16_plane_0;
67     /* R2.1 */
68     unsigned int const_intra_16x16_chroma_plane_0;
69     /* R2.2 */
70     unsigned int const_intra_16x16_chroma_plane_1;
71     /* R2.3 */
72     unsigned int const_intra_16x16_plane_1;
73     /* R2.4 */
74     unsigned int left_shift_count_reverse_dw_ordering;
75     /* R2.5 */
76     unsigned int const_reverse_data_transfer_intra_4x4;
77     /* R2.6 */
78     unsigned int intra_4x4_pred_mode_offset;
79 };
80
81 struct inter_kernel_header
82 {
83     unsigned short weight_offset;
84     unsigned char weight_offset_flag;
85     unsigned char pad0;
86 };
87
88 #include "shaders/h264/mc/export.inc"
89 static unsigned long avc_mc_kernel_offset_gen4[] = {
90     INTRA_16x16_IP * INST_UNIT_GEN4,
91     INTRA_8x8_IP * INST_UNIT_GEN4,
92     INTRA_4x4_IP * INST_UNIT_GEN4,
93     INTRA_PCM_IP * INST_UNIT_GEN4,
94     FRAME_MB_IP * INST_UNIT_GEN4,
95     FIELD_MB_IP * INST_UNIT_GEN4,
96     MBAFF_MB_IP * INST_UNIT_GEN4
97 };
98
99 struct intra_kernel_header intra_kernel_header_gen4 = {
100     0,
101     (INTRA_4X4_HORIZONTAL_IP - INTRA_4X4_VERTICAL_IP),
102     (INTRA_4X4_DC_IP - INTRA_4X4_VERTICAL_IP),
103     (INTRA_4X4_DIAG_DOWN_LEFT_IP - INTRA_4X4_VERTICAL_IP),
104
105     (INTRA_4X4_DIAG_DOWN_RIGHT_IP - INTRA_4X4_VERTICAL_IP),
106     (INTRA_4X4_VERT_RIGHT_IP - INTRA_4X4_VERTICAL_IP),
107     (INTRA_4X4_HOR_DOWN_IP - INTRA_4X4_VERTICAL_IP),
108     (INTRA_4X4_VERT_LEFT_IP - INTRA_4X4_VERTICAL_IP),
109
110     (INTRA_4X4_HOR_UP_IP - INTRA_4X4_VERTICAL_IP),
111     0,
112     0xFFFC,
113
114     0,
115     (INTRA_8X8_HORIZONTAL_IP - INTRA_8X8_VERTICAL_IP),
116     (INTRA_8X8_DC_IP - INTRA_8X8_VERTICAL_IP),
117     (INTRA_8X8_DIAG_DOWN_LEFT_IP - INTRA_8X8_VERTICAL_IP),
118
119     (INTRA_8X8_DIAG_DOWN_RIGHT_IP - INTRA_8X8_VERTICAL_IP),
120     (INTRA_8X8_VERT_RIGHT_IP - INTRA_8X8_VERTICAL_IP),
121     (INTRA_8X8_HOR_DOWN_IP - INTRA_8X8_VERTICAL_IP),
122     (INTRA_8X8_VERT_LEFT_IP - INTRA_8X8_VERTICAL_IP),
123
124     (INTRA_8X8_HOR_UP_IP - INTRA_8X8_VERTICAL_IP),
125     0,
126     0x0001,
127
128     0,
129     (INTRA_16x16_HORIZONTAL_IP - INTRA_16x16_VERTICAL_IP),
130     (INTRA_16x16_DC_IP - INTRA_16x16_VERTICAL_IP),
131     (INTRA_16x16_PLANE_IP - INTRA_16x16_VERTICAL_IP),
132
133     0,
134     (INTRA_CHROMA_HORIZONTAL_IP - INTRA_CHROMA_DC_IP),
135     (INTRA_CHROMA_VERTICAL_IP - INTRA_CHROMA_DC_IP),
136     (INTRA_Chroma_PLANE_IP - INTRA_CHROMA_DC_IP),
137
138     0xFCFBFAF9,
139
140     0x00FFFEFD,
141
142     0x04030201,
143
144     0x08070605,
145
146     0x18100800,
147
148     0x00020406,
149
150     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB3_IP) * 0x1000000 + 
151     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB2_IP) * 0x10000 + 
152     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB1_IP) * 0x100 + 
153     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB0_IP)
154 };
155
156 static const uint32_t h264_avc_combined_gen4[][4] = {
157 #include "shaders/h264/mc/avc_mc.g4b"
158 };
159
160 static const uint32_t h264_avc_null_gen4[][4] = {
161 #include "shaders/h264/mc/null.g4b"
162 };
163
164 static struct i965_kernel h264_avc_kernels_gen4[] = {
165     {
166         "AVC combined kernel",
167         H264_AVC_COMBINED,
168         h264_avc_combined_gen4,
169         sizeof(h264_avc_combined_gen4),
170         NULL
171     },
172
173     {
174         "NULL kernel",
175         H264_AVC_NULL,
176         h264_avc_null_gen4,
177         sizeof(h264_avc_null_gen4),
178         NULL
179     }
180 };
181
182 /* On Ironlake */
183 #include "shaders/h264/mc/export.inc.gen5"
184 static unsigned long avc_mc_kernel_offset_gen5[] = {
185     INTRA_16x16_IP_GEN5 * INST_UNIT_GEN5,
186     INTRA_8x8_IP_GEN5 * INST_UNIT_GEN5,
187     INTRA_4x4_IP_GEN5 * INST_UNIT_GEN5,
188     INTRA_PCM_IP_GEN5 * INST_UNIT_GEN5,
189     FRAME_MB_IP_GEN5 * INST_UNIT_GEN5,
190     FIELD_MB_IP_GEN5 * INST_UNIT_GEN5,
191     MBAFF_MB_IP_GEN5 * INST_UNIT_GEN5
192 };
193
194 struct intra_kernel_header intra_kernel_header_gen5 = {
195     0,
196     (INTRA_4X4_HORIZONTAL_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
197     (INTRA_4X4_DC_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
198     (INTRA_4X4_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
199
200     (INTRA_4X4_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
201     (INTRA_4X4_VERT_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
202     (INTRA_4X4_HOR_DOWN_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
203     (INTRA_4X4_VERT_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
204
205     (INTRA_4X4_HOR_UP_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
206     0,
207     0xFFFC,
208
209     0,
210     (INTRA_8X8_HORIZONTAL_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
211     (INTRA_8X8_DC_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
212     (INTRA_8X8_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
213
214     (INTRA_8X8_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
215     (INTRA_8X8_VERT_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
216     (INTRA_8X8_HOR_DOWN_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
217     (INTRA_8X8_VERT_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
218
219     (INTRA_8X8_HOR_UP_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
220     0,
221     0x0001,
222
223     0,
224     (INTRA_16x16_HORIZONTAL_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
225     (INTRA_16x16_DC_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
226     (INTRA_16x16_PLANE_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
227
228     0,
229     (INTRA_CHROMA_HORIZONTAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
230     (INTRA_CHROMA_VERTICAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
231     (INTRA_Chroma_PLANE_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
232
233     0xFCFBFAF9,
234
235     0x00FFFEFD,
236
237     0x04030201,
238
239     0x08070605,
240
241     0x18100800,
242
243     0x00020406,
244
245     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB3_IP_GEN5) * 0x1000000 + 
246     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB2_IP_GEN5) * 0x10000 + 
247     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB1_IP_GEN5) * 0x100 + 
248     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB0_IP_GEN5)
249 };
250
251 static const uint32_t h264_avc_combined_gen5[][4] = {
252 #include "shaders/h264/mc/avc_mc.g4b.gen5"
253 };
254
255 static const uint32_t h264_avc_null_gen5[][4] = {
256 #include "shaders/h264/mc/null.g4b.gen5"
257 };
258
259 static struct i965_kernel h264_avc_kernels_gen5[] = {
260     {
261         "AVC combined kernel",
262         H264_AVC_COMBINED,
263         h264_avc_combined_gen5,
264         sizeof(h264_avc_combined_gen5),
265         NULL
266     },
267
268     {
269         "NULL kernel",
270         H264_AVC_NULL,
271         h264_avc_null_gen5,
272         sizeof(h264_avc_null_gen5),
273         NULL
274     }
275 };
276
277 #define NUM_AVC_MC_INTERFACES (sizeof(avc_mc_kernel_offset_gen4) / sizeof(avc_mc_kernel_offset_gen4[0]))
278 static unsigned long *avc_mc_kernel_offset = NULL;
279
280 static struct intra_kernel_header *intra_kernel_header = NULL;
281
282 static void
283 i965_media_h264_surface_state(VADriverContextP ctx, 
284                               int index,
285                               struct object_surface *obj_surface,
286                               unsigned long offset, 
287                               int w, int h, int pitch,
288                               Bool is_dst,
289                               int vert_line_stride,
290                               int vert_line_stride_ofs,
291                               int format,
292                               struct i965_media_context *media_context)
293 {
294     struct i965_driver_data *i965 = i965_driver_data(ctx);
295     struct i965_surface_state *ss;
296     dri_bo *bo;
297     uint32_t write_domain, read_domain;
298
299     assert(obj_surface->bo);
300
301     bo = dri_bo_alloc(i965->intel.bufmgr,
302                       "surface state", 
303                       sizeof(struct i965_surface_state), 32);
304     assert(bo);
305     dri_bo_map(bo, 1);
306     assert(bo->virtual);
307     ss = bo->virtual;
308     memset(ss, 0, sizeof(*ss));
309     ss->ss0.surface_type = I965_SURFACE_2D;
310     ss->ss0.surface_format = format;
311     ss->ss0.vert_line_stride = vert_line_stride;
312     ss->ss0.vert_line_stride_ofs = vert_line_stride_ofs;
313     ss->ss1.base_addr = obj_surface->bo->offset + offset;
314     ss->ss2.width = w - 1;
315     ss->ss2.height = h - 1;
316     ss->ss3.pitch = pitch - 1;
317
318     if (is_dst) {
319         write_domain = I915_GEM_DOMAIN_RENDER;
320         read_domain = I915_GEM_DOMAIN_RENDER;
321     } else {
322         write_domain = 0;
323         read_domain = I915_GEM_DOMAIN_SAMPLER;
324     }
325
326     dri_bo_emit_reloc(bo,
327                       read_domain, write_domain,
328                       offset,
329                       offsetof(struct i965_surface_state, ss1),
330                       obj_surface->bo);
331     dri_bo_unmap(bo);
332
333     assert(index < MAX_MEDIA_SURFACES);
334     media_context->surface_state[index].bo = bo;
335 }
336
337 static void 
338 i965_media_h264_surfaces_setup(VADriverContextP ctx, 
339                                struct decode_state *decode_state,
340                                struct i965_media_context *media_context)
341 {
342     struct i965_driver_data *i965 = i965_driver_data(ctx);
343     struct i965_h264_context *i965_h264_context;
344     struct object_surface *obj_surface;
345     VAPictureParameterBufferH264 *pic_param;
346     VAPictureH264 *va_pic;
347     int i, j, w, h;
348     int field_picture;
349
350     assert(media_context->private_context);
351     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
352
353     assert(decode_state->pic_param && decode_state->pic_param->buffer);
354     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
355
356     /* Target Picture */
357     va_pic = &pic_param->CurrPic;
358     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
359     obj_surface = SURFACE(va_pic->picture_id);
360     assert(obj_surface);
361     w = obj_surface->width;
362     h = obj_surface->height;
363     field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
364     i965_media_h264_surface_state(ctx, 0, obj_surface,
365                                   0, w / 4, h / (1 + field_picture), w,
366                                   1, 
367                                   field_picture,
368                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
369                                   I965_SURFACEFORMAT_R8_SINT,   /* Y */
370                                   media_context);
371     i965_media_h264_surface_state(ctx, 1, obj_surface,
372                                   w * h, w / 4, h / 2 / (1 + field_picture), w,
373                                   1, 
374                                   field_picture,
375                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
376                                   I965_SURFACEFORMAT_R8G8_SINT, /* INTERLEAVED U/V */
377                                   media_context);
378
379     /* Reference Pictures */
380     for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
381         if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID) {
382             int found = 0;
383             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
384                 va_pic = &pic_param->ReferenceFrames[j];
385                 
386                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
387                     continue;
388
389                 if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) {
390                     found = 1;
391                     break;
392                 }
393             }
394
395             assert(found == 1);
396
397             obj_surface = SURFACE(va_pic->picture_id);
398             assert(obj_surface);
399             w = obj_surface->width;
400             h = obj_surface->height;
401             field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
402             i965_media_h264_surface_state(ctx, 2 + i, obj_surface,
403                                           0, w / 4, h / (1 + field_picture), w,
404                                           0, 
405                                           field_picture,
406                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
407                                           I965_SURFACEFORMAT_R8_SINT,   /* Y */
408                                           media_context);
409             i965_media_h264_surface_state(ctx, 18 + i, obj_surface,
410                                           w * h, w / 4, h / 2 / (1 + field_picture), w,
411                                           0, 
412                                           field_picture,
413                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
414                                           I965_SURFACEFORMAT_R8G8_SINT, /* INTERLEAVED U/V */
415                                           media_context);
416         }
417     }
418 }
419
420 static void
421 i965_media_h264_binding_table(VADriverContextP ctx, struct i965_media_context *media_context)
422 {
423     int i;
424     unsigned int *binding_table;
425     dri_bo *bo = media_context->binding_table.bo;
426
427     dri_bo_map(bo, 1);
428     assert(bo->virtual);
429     binding_table = bo->virtual;
430     memset(binding_table, 0, bo->size);
431
432     for (i = 0; i < MAX_MEDIA_SURFACES; i++) {
433         if (media_context->surface_state[i].bo) {
434             binding_table[i] = media_context->surface_state[i].bo->offset;
435             dri_bo_emit_reloc(bo,
436                               I915_GEM_DOMAIN_INSTRUCTION, 0,
437                               0,
438                               i * sizeof(*binding_table),
439                               media_context->surface_state[i].bo);
440         }
441     }
442
443     dri_bo_unmap(media_context->binding_table.bo);
444 }
445
446 static void 
447 i965_media_h264_interface_descriptor_remap_table(VADriverContextP ctx, struct i965_media_context *media_context)
448 {
449     struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)media_context->private_context;
450     struct i965_interface_descriptor *desc;
451     int i;
452     dri_bo *bo;
453
454     bo = media_context->idrt.bo;
455     dri_bo_map(bo, 1);
456     assert(bo->virtual);
457     desc = bo->virtual;
458
459     for (i = 0; i < NUM_AVC_MC_INTERFACES; i++) {
460         int kernel_offset = avc_mc_kernel_offset[i];
461         memset(desc, 0, sizeof(*desc));
462         desc->desc0.grf_reg_blocks = 7; 
463         desc->desc0.kernel_start_pointer = (i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo->offset + kernel_offset) >> 6; /* reloc */
464         desc->desc1.const_urb_entry_read_offset = 0;
465         desc->desc1.const_urb_entry_read_len = 2;
466         desc->desc3.binding_table_entry_count = 0;
467         desc->desc3.binding_table_pointer = 
468             media_context->binding_table.bo->offset >> 5; /*reloc */
469
470         dri_bo_emit_reloc(bo,
471                           I915_GEM_DOMAIN_INSTRUCTION, 0,
472                           desc->desc0.grf_reg_blocks + kernel_offset,
473                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc0),
474                           i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo);
475
476         dri_bo_emit_reloc(bo,
477                           I915_GEM_DOMAIN_INSTRUCTION, 0,
478                           desc->desc3.binding_table_entry_count,
479                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc3),
480                           media_context->binding_table.bo);
481         desc++;
482     }
483
484     dri_bo_unmap(bo);
485 }
486
487 static void
488 i965_media_h264_vfe_state(VADriverContextP ctx, struct i965_media_context *media_context)
489 {
490     struct i965_vfe_state *vfe_state;
491     dri_bo *bo;
492
493     bo = media_context->vfe_state.bo;
494     dri_bo_map(bo, 1);
495     assert(bo->virtual);
496     vfe_state = bo->virtual;
497     memset(vfe_state, 0, sizeof(*vfe_state));
498     vfe_state->vfe0.extend_vfe_state_present = 1;
499     vfe_state->vfe1.max_threads = media_context->urb.num_vfe_entries - 1;
500     vfe_state->vfe1.urb_entry_alloc_size = media_context->urb.size_vfe_entry - 1;
501     vfe_state->vfe1.num_urb_entries = media_context->urb.num_vfe_entries;
502     vfe_state->vfe1.vfe_mode = VFE_AVC_IT_MODE;
503     vfe_state->vfe1.children_present = 0;
504     vfe_state->vfe2.interface_descriptor_base = 
505         media_context->idrt.bo->offset >> 4; /* reloc */
506     dri_bo_emit_reloc(bo,
507                       I915_GEM_DOMAIN_INSTRUCTION, 0,
508                       0,
509                       offsetof(struct i965_vfe_state, vfe2),
510                       media_context->idrt.bo);
511     dri_bo_unmap(bo);
512 }
513
514 static void 
515 i965_media_h264_vfe_state_extension(VADriverContextP ctx, 
516                                     struct decode_state *decode_state,
517                                     struct i965_media_context *media_context)
518 {
519     struct i965_h264_context *i965_h264_context;
520     struct i965_vfe_state_ex *vfe_state_ex;
521     VAPictureParameterBufferH264 *pic_param;
522     int mbaff_frame_flag;
523
524     assert(media_context->private_context);
525     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
526
527     assert(decode_state->pic_param && decode_state->pic_param->buffer);
528     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
529     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
530                         !pic_param->pic_fields.bits.field_pic_flag);
531
532     assert(media_context->extended_state.bo);
533     dri_bo_map(media_context->extended_state.bo, 1);
534     assert(media_context->extended_state.bo->virtual);
535     vfe_state_ex = media_context->extended_state.bo->virtual;
536     memset(vfe_state_ex, 0, sizeof(*vfe_state_ex));
537
538     /*
539      * Indirect data buffer:
540      * --------------------------------------------------------
541      * | Motion Vectors | Weight/Offset data | Residual data |
542      * --------------------------------------------------------
543      * R4-R7: Motion Vectors
544      * R8-R9: Weight/Offset
545      * R10-R33: Residual data
546      */
547     vfe_state_ex->vfex1.avc.residual_data_fix_offset_flag = !!RESIDUAL_DATA_OFFSET;
548     vfe_state_ex->vfex1.avc.residual_data_offset = RESIDUAL_DATA_OFFSET;
549
550     if (i965_h264_context->picture.i_flag) {
551         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_NOMV; /* NoMV */
552         vfe_state_ex->vfex1.avc.weight_grf_offset = 0;
553         vfe_state_ex->vfex1.avc.residual_grf_offset = 0;
554     } else {
555         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_MV_WO; /* Both MV and W/O */
556         vfe_state_ex->vfex1.avc.weight_grf_offset = 4;
557         vfe_state_ex->vfex1.avc.residual_grf_offset = 6;
558     }
559
560     if (!pic_param->pic_fields.bits.field_pic_flag) {
561         if (mbaff_frame_flag) {
562             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
563             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
564             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
565             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
566             vfe_state_ex->remap_table0.remap_index_4 = MBAFF_MOTION;
567             vfe_state_ex->remap_table0.remap_index_5 = MBAFF_MOTION;
568             vfe_state_ex->remap_table0.remap_index_6 = MBAFF_MOTION;
569             vfe_state_ex->remap_table0.remap_index_7 = MBAFF_MOTION;
570
571             vfe_state_ex->remap_table1.remap_index_8 = MBAFF_MOTION;
572             vfe_state_ex->remap_table1.remap_index_9 = MBAFF_MOTION;
573             vfe_state_ex->remap_table1.remap_index_10 = MBAFF_MOTION;
574             vfe_state_ex->remap_table1.remap_index_11 = MBAFF_MOTION;
575             vfe_state_ex->remap_table1.remap_index_12 = MBAFF_MOTION;
576             vfe_state_ex->remap_table1.remap_index_13 = MBAFF_MOTION;
577             vfe_state_ex->remap_table1.remap_index_14 = MBAFF_MOTION;
578             vfe_state_ex->remap_table1.remap_index_15 = MBAFF_MOTION;
579         } else {
580             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
581             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
582             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
583             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
584             vfe_state_ex->remap_table0.remap_index_4 = FRAMEMB_MOTION;
585             vfe_state_ex->remap_table0.remap_index_5 = FRAMEMB_MOTION;
586             vfe_state_ex->remap_table0.remap_index_6 = FRAMEMB_MOTION;
587             vfe_state_ex->remap_table0.remap_index_7 = FRAMEMB_MOTION;
588
589             vfe_state_ex->remap_table1.remap_index_8 = FRAMEMB_MOTION;
590             vfe_state_ex->remap_table1.remap_index_9 = FRAMEMB_MOTION;
591             vfe_state_ex->remap_table1.remap_index_10 = FRAMEMB_MOTION;
592             vfe_state_ex->remap_table1.remap_index_11 = FRAMEMB_MOTION;
593             vfe_state_ex->remap_table1.remap_index_12 = FRAMEMB_MOTION;
594             vfe_state_ex->remap_table1.remap_index_13 = FRAMEMB_MOTION;
595             vfe_state_ex->remap_table1.remap_index_14 = FRAMEMB_MOTION;
596             vfe_state_ex->remap_table1.remap_index_15 = FRAMEMB_MOTION;
597         }
598     } else {
599         vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
600         vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
601         vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
602         vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
603         vfe_state_ex->remap_table0.remap_index_4 = FIELDMB_MOTION;
604         vfe_state_ex->remap_table0.remap_index_5 = FIELDMB_MOTION;
605         vfe_state_ex->remap_table0.remap_index_6 = FIELDMB_MOTION;
606         vfe_state_ex->remap_table0.remap_index_7 = FIELDMB_MOTION;
607
608         vfe_state_ex->remap_table1.remap_index_8 = FIELDMB_MOTION;
609         vfe_state_ex->remap_table1.remap_index_9 = FIELDMB_MOTION;
610         vfe_state_ex->remap_table1.remap_index_10 = FIELDMB_MOTION;
611         vfe_state_ex->remap_table1.remap_index_11 = FIELDMB_MOTION;
612         vfe_state_ex->remap_table1.remap_index_12 = FIELDMB_MOTION;
613         vfe_state_ex->remap_table1.remap_index_13 = FIELDMB_MOTION;
614         vfe_state_ex->remap_table1.remap_index_14 = FIELDMB_MOTION;
615         vfe_state_ex->remap_table1.remap_index_15 = FIELDMB_MOTION;
616     }
617
618     if (i965_h264_context->use_avc_hw_scoreboard) {
619         vfe_state_ex->scoreboard0.enable = 1;
620         vfe_state_ex->scoreboard0.type = SCOREBOARD_STALLING;
621         vfe_state_ex->scoreboard0.mask = 0xff;
622
623         vfe_state_ex->scoreboard1.delta_x0 = -1;
624         vfe_state_ex->scoreboard1.delta_y0 = 0;
625         vfe_state_ex->scoreboard1.delta_x1 = 0;
626         vfe_state_ex->scoreboard1.delta_y1 = -1;
627         vfe_state_ex->scoreboard1.delta_x2 = 1;
628         vfe_state_ex->scoreboard1.delta_y2 = -1;
629         vfe_state_ex->scoreboard1.delta_x3 = -1;
630         vfe_state_ex->scoreboard1.delta_y3 = -1;
631
632         vfe_state_ex->scoreboard2.delta_x4 = -1;
633         vfe_state_ex->scoreboard2.delta_y4 = 1;
634         vfe_state_ex->scoreboard2.delta_x5 = 0;
635         vfe_state_ex->scoreboard2.delta_y5 = -2;
636         vfe_state_ex->scoreboard2.delta_x6 = 1;
637         vfe_state_ex->scoreboard2.delta_y6 = -2;
638         vfe_state_ex->scoreboard2.delta_x7 = -1;
639         vfe_state_ex->scoreboard2.delta_y7 = -2;
640     }
641
642     dri_bo_unmap(media_context->extended_state.bo);
643 }
644
645 static void
646 i965_media_h264_upload_constants(VADriverContextP ctx,
647                                  struct decode_state *decode_state,
648                                  struct i965_media_context *media_context)
649 {
650     struct i965_h264_context *i965_h264_context;
651     unsigned char *constant_buffer;
652     VASliceParameterBufferH264 *slice_param;
653
654     assert(media_context->private_context);
655     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
656
657     assert(decode_state->slice_params[0] && decode_state->slice_params[0]->buffer);
658     slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[0]->buffer;
659
660     dri_bo_map(media_context->curbe.bo, 1);
661     assert(media_context->curbe.bo->virtual);
662     constant_buffer = media_context->curbe.bo->virtual;
663
664     /* HW solution for W=128 */
665     if (i965_h264_context->use_hw_w128) {
666         memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
667     } else {
668         if (slice_param->slice_type == SLICE_TYPE_I ||
669             slice_param->slice_type == SLICE_TYPE_SI) {
670             memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
671         } else {
672             /* FIXME: Need to upload CURBE data to inter kernel interface 
673              * to support weighted prediction work-around 
674              */
675             *(short *)constant_buffer = i965_h264_context->weight128_offset0;
676             constant_buffer += 2;
677             *(char *)constant_buffer = i965_h264_context->weight128_offset0_flag;
678             constant_buffer++;
679             *constant_buffer = 0;
680         }
681     }
682
683     dri_bo_unmap(media_context->curbe.bo);
684 }
685
686 static void
687 i965_media_h264_states_setup(VADriverContextP ctx,
688                              struct decode_state *decode_state,
689                              struct i965_media_context *media_context)
690 {
691     struct i965_h264_context *i965_h264_context;
692
693     assert(media_context->private_context);
694     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
695
696     i965_avc_bsd_pipeline(ctx, decode_state, i965_h264_context);
697
698     if (i965_h264_context->use_avc_hw_scoreboard)
699         i965_avc_hw_scoreboard(ctx, decode_state, i965_h264_context);
700
701     i965_media_h264_surfaces_setup(ctx, decode_state, media_context);
702     i965_media_h264_binding_table(ctx, media_context);
703     i965_media_h264_interface_descriptor_remap_table(ctx, media_context);
704     i965_media_h264_vfe_state_extension(ctx, decode_state, media_context);
705     i965_media_h264_vfe_state(ctx, media_context);
706     i965_media_h264_upload_constants(ctx, decode_state, media_context);
707 }
708
709 static void
710 i965_media_h264_objects(VADriverContextP ctx,
711                         struct decode_state *decode_state,
712                         struct i965_media_context *media_context)
713 {
714     struct intel_batchbuffer *batch = media_context->base.batch;
715     struct i965_h264_context *i965_h264_context;
716     unsigned int *object_command;
717
718     assert(media_context->private_context);
719     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
720
721     dri_bo_map(i965_h264_context->avc_it_command_mb_info.bo, True);
722     assert(i965_h264_context->avc_it_command_mb_info.bo->virtual);
723     object_command = i965_h264_context->avc_it_command_mb_info.bo->virtual;
724     memset(object_command, 0, i965_h264_context->avc_it_command_mb_info.mbs * i965_h264_context->use_avc_hw_scoreboard * MB_CMD_IN_BYTES);
725     object_command += i965_h264_context->avc_it_command_mb_info.mbs * (1 + i965_h264_context->use_avc_hw_scoreboard) * MB_CMD_IN_DWS;
726     *object_command++ = 0;
727     *object_command = MI_BATCH_BUFFER_END;
728     dri_bo_unmap(i965_h264_context->avc_it_command_mb_info.bo);
729
730     BEGIN_BATCH(batch, 2);
731     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
732     OUT_RELOC(batch, i965_h264_context->avc_it_command_mb_info.bo, 
733               I915_GEM_DOMAIN_COMMAND, 0, 
734               0);
735     ADVANCE_BATCH(batch);
736
737     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
738      * will cause control to pass back to ring buffer 
739      */
740     intel_batchbuffer_end_atomic(batch);
741     intel_batchbuffer_flush(batch);
742     intel_batchbuffer_start_atomic(batch, 0x1000);
743     i965_avc_ildb(ctx, decode_state, i965_h264_context);
744 }
745
746 static void 
747 i965_media_h264_free_private_context(void **data)
748 {
749     struct i965_h264_context *i965_h264_context = *data;
750     int i;
751
752     if (i965_h264_context == NULL)
753         return;
754
755     i965_avc_ildb_ternimate(&i965_h264_context->avc_ildb_context);
756     i965_avc_hw_scoreboard_ternimate(&i965_h264_context->avc_hw_scoreboard_context);
757     i965_avc_bsd_ternimate(&i965_h264_context->i965_avc_bsd_context);
758     dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
759     dri_bo_unreference(i965_h264_context->avc_it_data.bo);
760     dri_bo_unreference(i965_h264_context->avc_ildb_data.bo);
761
762     for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
763         struct i965_kernel *kernel = &i965_h264_context->avc_kernels[i];
764
765         dri_bo_unreference(kernel->bo);
766         kernel->bo = NULL;
767     }
768
769     free(i965_h264_context);
770     *data = NULL;
771 }
772
773 void
774 i965_media_h264_decode_init(VADriverContextP ctx, 
775                             struct decode_state *decode_state, 
776                             struct i965_media_context *media_context)
777 {
778     struct i965_driver_data *i965 = i965_driver_data(ctx);
779     struct i965_h264_context *i965_h264_context = media_context->private_context;
780     dri_bo *bo;
781     VAPictureParameterBufferH264 *pic_param;
782
783     assert(decode_state->pic_param && decode_state->pic_param->buffer);
784     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
785     i965_h264_context->picture.width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
786     i965_h264_context->picture.height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff) / 
787         (1 + !!pic_param->pic_fields.bits.field_pic_flag); /* picture height */
788     i965_h264_context->picture.mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
789                                                    !pic_param->pic_fields.bits.field_pic_flag);
790     i965_h264_context->avc_it_command_mb_info.mbs = (i965_h264_context->picture.width_in_mbs * 
791                                                      i965_h264_context->picture.height_in_mbs);
792
793     dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
794     bo = dri_bo_alloc(i965->intel.bufmgr,
795                       "avc it command mb info",
796                       i965_h264_context->avc_it_command_mb_info.mbs * MB_CMD_IN_BYTES * (1 + i965_h264_context->use_avc_hw_scoreboard) + 8,
797                       0x1000);
798     assert(bo);
799     i965_h264_context->avc_it_command_mb_info.bo = bo;
800
801     dri_bo_unreference(i965_h264_context->avc_it_data.bo);
802     bo = dri_bo_alloc(i965->intel.bufmgr,
803                       "avc it data",
804                       i965_h264_context->avc_it_command_mb_info.mbs * 
805                       0x800 * 
806                       (1 + !!pic_param->pic_fields.bits.field_pic_flag),
807                       0x1000);
808     assert(bo);
809     i965_h264_context->avc_it_data.bo = bo;
810     i965_h264_context->avc_it_data.write_offset = 0;
811     dri_bo_unreference(media_context->indirect_object.bo);
812     media_context->indirect_object.bo = bo;
813     dri_bo_reference(media_context->indirect_object.bo);
814     media_context->indirect_object.offset = i965_h264_context->avc_it_data.write_offset;
815
816     dri_bo_unreference(i965_h264_context->avc_ildb_data.bo);
817     bo = dri_bo_alloc(i965->intel.bufmgr,
818                       "AVC-ILDB Data Buffer",
819                       i965_h264_context->avc_it_command_mb_info.mbs * 64 * 2,
820                       0x1000);
821     assert(bo);
822     i965_h264_context->avc_ildb_data.bo = bo;
823
824     /* bsd pipeline */
825     i965_avc_bsd_decode_init(ctx, i965_h264_context);
826
827     /* HW scoreboard */
828     if (i965_h264_context->use_avc_hw_scoreboard)
829         i965_avc_hw_scoreboard_decode_init(ctx, i965_h264_context);
830
831     /* ILDB */
832     i965_avc_ildb_decode_init(ctx, i965_h264_context);
833
834     /* for Media pipeline */
835     media_context->extended_state.enabled = 1;
836     dri_bo_unreference(media_context->extended_state.bo);
837     bo = dri_bo_alloc(i965->intel.bufmgr, 
838                       "extened vfe state", 
839                       sizeof(struct i965_vfe_state_ex), 32);
840     assert(bo);
841     media_context->extended_state.bo = bo;
842 }
843
844 void 
845 i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context *media_context)
846 {
847     struct i965_driver_data *i965 = i965_driver_data(ctx);
848     struct i965_h264_context *i965_h264_context;
849     int i;
850
851     i965_h264_context = calloc(1, sizeof(struct i965_h264_context));
852
853     /* kernel */
854     assert(NUM_H264_AVC_KERNELS == (sizeof(h264_avc_kernels_gen5) / 
855                                     sizeof(h264_avc_kernels_gen5[0])));
856     assert(NUM_AVC_MC_INTERFACES == (sizeof(avc_mc_kernel_offset_gen5) /
857                                      sizeof(avc_mc_kernel_offset_gen5[0])));
858     if (IS_IRONLAKE(i965->intel.device_id)) {
859         memcpy(i965_h264_context->avc_kernels, h264_avc_kernels_gen5, sizeof(i965_h264_context->avc_kernels));
860         avc_mc_kernel_offset = avc_mc_kernel_offset_gen5;
861         intra_kernel_header = &intra_kernel_header_gen5;
862         i965_h264_context->use_avc_hw_scoreboard = 1;
863         i965_h264_context->use_hw_w128 = 1;
864     } else {
865         memcpy(i965_h264_context->avc_kernels, h264_avc_kernels_gen4, sizeof(i965_h264_context->avc_kernels));
866         avc_mc_kernel_offset = avc_mc_kernel_offset_gen4;
867         intra_kernel_header = &intra_kernel_header_gen4;
868         i965_h264_context->use_avc_hw_scoreboard = 0;
869         i965_h264_context->use_hw_w128 = 0;
870     }
871
872     for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
873         struct i965_kernel *kernel = &i965_h264_context->avc_kernels[i];
874         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
875                                   kernel->name, 
876                                   kernel->size, 0x1000);
877         assert(kernel->bo);
878         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
879     }
880
881     for (i = 0; i < 16; i++) {
882         i965_h264_context->fsid_list[i].surface_id = VA_INVALID_ID;
883         i965_h264_context->fsid_list[i].frame_store_id = -1;
884     }
885
886     i965_h264_context->batch = media_context->base.batch;
887
888     media_context->private_context = i965_h264_context;
889     media_context->free_private_context = i965_media_h264_free_private_context;
890
891     /* URB */
892     if (IS_IRONLAKE(i965->intel.device_id)) {
893         media_context->urb.num_vfe_entries = 63;
894     } else {
895         media_context->urb.num_vfe_entries = 23;
896     }
897
898     media_context->urb.size_vfe_entry = 16;
899
900     media_context->urb.num_cs_entries = 1;
901     media_context->urb.size_cs_entry = 1;
902
903     media_context->urb.vfe_start = 0;
904     media_context->urb.cs_start = media_context->urb.vfe_start + 
905         media_context->urb.num_vfe_entries * media_context->urb.size_vfe_entry;
906     assert(media_context->urb.cs_start + 
907            media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
908
909     /* hook functions */
910     media_context->media_states_setup = i965_media_h264_states_setup;
911     media_context->media_objects = i965_media_h264_objects;
912 }