VPP: Set the alpha channel when doing the conversion from NV12 to RGBA on Ivy/Haswell/BDW
[platform/upstream/libva-intel-driver.git] / src / i965_media_h264.c
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <assert.h>
5
6
7 #include "intel_batchbuffer.h"
8 #include "intel_driver.h"
9
10 #include "i965_defines.h"
11 #include "i965_drv_video.h"
12 #include "i965_media.h"
13 #include "i965_media_h264.h"
14
15 enum {
16     INTRA_16X16 = 0,
17     INTRA_8X8,
18     INTRA_4X4,
19     INTRA_PCM,
20     FRAMEMB_MOTION,
21     FIELDMB_MOTION,
22     MBAFF_MOTION,
23 };
24
25 struct intra_kernel_header
26 {
27     /* R1.0 */
28     unsigned char intra_4x4_luma_mode_0_offset;
29     unsigned char intra_4x4_luma_mode_1_offset;
30     unsigned char intra_4x4_luma_mode_2_offset;
31     unsigned char intra_4x4_luma_mode_3_offset;
32     /* R1.1 */
33     unsigned char intra_4x4_luma_mode_4_offset;
34     unsigned char intra_4x4_luma_mode_5_offset;
35     unsigned char intra_4x4_luma_mode_6_offset;
36     unsigned char intra_4x4_luma_mode_7_offset;
37     /* R1.2 */
38     unsigned char intra_4x4_luma_mode_8_offset;
39     unsigned char pad0;
40     unsigned short top_reference_offset;
41     /* R1.3 */
42     unsigned char intra_8x8_luma_mode_0_offset;
43     unsigned char intra_8x8_luma_mode_1_offset;
44     unsigned char intra_8x8_luma_mode_2_offset;
45     unsigned char intra_8x8_luma_mode_3_offset;
46     /* R1.4 */
47     unsigned char intra_8x8_luma_mode_4_offset;
48     unsigned char intra_8x8_luma_mode_5_offset;
49     unsigned char intra_8x8_luma_mode_6_offset;
50     unsigned char intra_8x8_luma_mode_7_offset;
51     /* R1.5 */
52     unsigned char intra_8x8_luma_mode_8_offset;
53     unsigned char pad1;
54     unsigned short const_reverse_data_transfer_intra_8x8;
55     /* R1.6 */
56     unsigned char intra_16x16_luma_mode_0_offset;
57     unsigned char intra_16x16_luma_mode_1_offset;
58     unsigned char intra_16x16_luma_mode_2_offset;
59     unsigned char intra_16x16_luma_mode_3_offset;
60     /* R1.7 */
61     unsigned char intra_chroma_mode_0_offset;
62     unsigned char intra_chroma_mode_1_offset;
63     unsigned char intra_chroma_mode_2_offset;
64     unsigned char intra_chroma_mode_3_offset;
65     /* R2.0 */
66     unsigned int const_intra_16x16_plane_0;
67     /* R2.1 */
68     unsigned int const_intra_16x16_chroma_plane_0;
69     /* R2.2 */
70     unsigned int const_intra_16x16_chroma_plane_1;
71     /* R2.3 */
72     unsigned int const_intra_16x16_plane_1;
73     /* R2.4 */
74     unsigned int left_shift_count_reverse_dw_ordering;
75     /* R2.5 */
76     unsigned int const_reverse_data_transfer_intra_4x4;
77     /* R2.6 */
78     unsigned int intra_4x4_pred_mode_offset;
79 };
80
81 struct inter_kernel_header
82 {
83     unsigned short weight_offset;
84     unsigned char weight_offset_flag;
85     unsigned char pad0;
86 };
87
88 #include "shaders/h264/mc/export.inc"
89 static unsigned long avc_mc_kernel_offset_gen4[] = {
90     INTRA_16x16_IP * INST_UNIT_GEN4,
91     INTRA_8x8_IP * INST_UNIT_GEN4,
92     INTRA_4x4_IP * INST_UNIT_GEN4,
93     INTRA_PCM_IP * INST_UNIT_GEN4,
94     FRAME_MB_IP * INST_UNIT_GEN4,
95     FIELD_MB_IP * INST_UNIT_GEN4,
96     MBAFF_MB_IP * INST_UNIT_GEN4
97 };
98
99 struct intra_kernel_header intra_kernel_header_gen4 = {
100     0,
101     (INTRA_4X4_HORIZONTAL_IP - INTRA_4X4_VERTICAL_IP),
102     (INTRA_4X4_DC_IP - INTRA_4X4_VERTICAL_IP),
103     (INTRA_4X4_DIAG_DOWN_LEFT_IP - INTRA_4X4_VERTICAL_IP),
104
105     (INTRA_4X4_DIAG_DOWN_RIGHT_IP - INTRA_4X4_VERTICAL_IP),
106     (INTRA_4X4_VERT_RIGHT_IP - INTRA_4X4_VERTICAL_IP),
107     (INTRA_4X4_HOR_DOWN_IP - INTRA_4X4_VERTICAL_IP),
108     (INTRA_4X4_VERT_LEFT_IP - INTRA_4X4_VERTICAL_IP),
109
110     (INTRA_4X4_HOR_UP_IP - INTRA_4X4_VERTICAL_IP),
111     0,
112     0xFFFC,
113
114     0,
115     (INTRA_8X8_HORIZONTAL_IP - INTRA_8X8_VERTICAL_IP),
116     (INTRA_8X8_DC_IP - INTRA_8X8_VERTICAL_IP),
117     (INTRA_8X8_DIAG_DOWN_LEFT_IP - INTRA_8X8_VERTICAL_IP),
118
119     (INTRA_8X8_DIAG_DOWN_RIGHT_IP - INTRA_8X8_VERTICAL_IP),
120     (INTRA_8X8_VERT_RIGHT_IP - INTRA_8X8_VERTICAL_IP),
121     (INTRA_8X8_HOR_DOWN_IP - INTRA_8X8_VERTICAL_IP),
122     (INTRA_8X8_VERT_LEFT_IP - INTRA_8X8_VERTICAL_IP),
123
124     (INTRA_8X8_HOR_UP_IP - INTRA_8X8_VERTICAL_IP),
125     0,
126     0x0001,
127
128     0,
129     (INTRA_16x16_HORIZONTAL_IP - INTRA_16x16_VERTICAL_IP),
130     (INTRA_16x16_DC_IP - INTRA_16x16_VERTICAL_IP),
131     (INTRA_16x16_PLANE_IP - INTRA_16x16_VERTICAL_IP),
132
133     0,
134     (INTRA_CHROMA_HORIZONTAL_IP - INTRA_CHROMA_DC_IP),
135     (INTRA_CHROMA_VERTICAL_IP - INTRA_CHROMA_DC_IP),
136     (INTRA_Chroma_PLANE_IP - INTRA_CHROMA_DC_IP),
137
138     0xFCFBFAF9,
139
140     0x00FFFEFD,
141
142     0x04030201,
143
144     0x08070605,
145
146     0x18100800,
147
148     0x00020406,
149
150     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB3_IP) * 0x1000000 + 
151     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB2_IP) * 0x10000 + 
152     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB1_IP) * 0x100 + 
153     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB0_IP)
154 };
155
156 static const uint32_t h264_avc_combined_gen4[][4] = {
157 #include "shaders/h264/mc/avc_mc.g4b"
158 };
159
160 static const uint32_t h264_avc_null_gen4[][4] = {
161 #include "shaders/h264/mc/null.g4b"
162 };
163
164 static struct i965_kernel h264_avc_kernels_gen4[] = {
165     {
166         "AVC combined kernel",
167         H264_AVC_COMBINED,
168         h264_avc_combined_gen4,
169         sizeof(h264_avc_combined_gen4),
170         NULL
171     },
172
173     {
174         "NULL kernel",
175         H264_AVC_NULL,
176         h264_avc_null_gen4,
177         sizeof(h264_avc_null_gen4),
178         NULL
179     }
180 };
181
182 /* On Ironlake */
183 #include "shaders/h264/mc/export.inc.gen5"
184 static unsigned long avc_mc_kernel_offset_gen5[] = {
185     INTRA_16x16_IP_GEN5 * INST_UNIT_GEN5,
186     INTRA_8x8_IP_GEN5 * INST_UNIT_GEN5,
187     INTRA_4x4_IP_GEN5 * INST_UNIT_GEN5,
188     INTRA_PCM_IP_GEN5 * INST_UNIT_GEN5,
189     FRAME_MB_IP_GEN5 * INST_UNIT_GEN5,
190     FIELD_MB_IP_GEN5 * INST_UNIT_GEN5,
191     MBAFF_MB_IP_GEN5 * INST_UNIT_GEN5
192 };
193
194 struct intra_kernel_header intra_kernel_header_gen5 = {
195     0,
196     (INTRA_4X4_HORIZONTAL_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
197     (INTRA_4X4_DC_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
198     (INTRA_4X4_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
199
200     (INTRA_4X4_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
201     (INTRA_4X4_VERT_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
202     (INTRA_4X4_HOR_DOWN_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
203     (INTRA_4X4_VERT_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
204
205     (INTRA_4X4_HOR_UP_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
206     0,
207     0xFFFC,
208
209     0,
210     (INTRA_8X8_HORIZONTAL_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
211     (INTRA_8X8_DC_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
212     (INTRA_8X8_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
213
214     (INTRA_8X8_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
215     (INTRA_8X8_VERT_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
216     (INTRA_8X8_HOR_DOWN_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
217     (INTRA_8X8_VERT_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
218
219     (INTRA_8X8_HOR_UP_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
220     0,
221     0x0001,
222
223     0,
224     (INTRA_16x16_HORIZONTAL_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
225     (INTRA_16x16_DC_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
226     (INTRA_16x16_PLANE_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
227
228     0,
229     (INTRA_CHROMA_HORIZONTAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
230     (INTRA_CHROMA_VERTICAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
231     (INTRA_Chroma_PLANE_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
232
233     0xFCFBFAF9,
234
235     0x00FFFEFD,
236
237     0x04030201,
238
239     0x08070605,
240
241     0x18100800,
242
243     0x00020406,
244
245     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB3_IP_GEN5) * 0x1000000 + 
246     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB2_IP_GEN5) * 0x10000 + 
247     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB1_IP_GEN5) * 0x100 + 
248     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB0_IP_GEN5)
249 };
250
251 static const uint32_t h264_avc_combined_gen5[][4] = {
252 #include "shaders/h264/mc/avc_mc.g4b.gen5"
253 };
254
255 static const uint32_t h264_avc_null_gen5[][4] = {
256 #include "shaders/h264/mc/null.g4b.gen5"
257 };
258
259 static struct i965_kernel h264_avc_kernels_gen5[] = {
260     {
261         "AVC combined kernel",
262         H264_AVC_COMBINED,
263         h264_avc_combined_gen5,
264         sizeof(h264_avc_combined_gen5),
265         NULL
266     },
267
268     {
269         "NULL kernel",
270         H264_AVC_NULL,
271         h264_avc_null_gen5,
272         sizeof(h264_avc_null_gen5),
273         NULL
274     }
275 };
276
277 #define NUM_AVC_MC_INTERFACES (sizeof(avc_mc_kernel_offset_gen4) / sizeof(avc_mc_kernel_offset_gen4[0]))
278 static unsigned long *avc_mc_kernel_offset = NULL;
279
280 static struct intra_kernel_header *intra_kernel_header = NULL;
281
282 static void
283 i965_media_h264_surface_state(VADriverContextP ctx, 
284                               int index,
285                               struct object_surface *obj_surface,
286                               unsigned long offset, 
287                               int w, int h, int pitch,
288                               Bool is_dst,
289                               int vert_line_stride,
290                               int vert_line_stride_ofs,
291                               int format,
292                               struct i965_media_context *media_context)
293 {
294     struct i965_driver_data *i965 = i965_driver_data(ctx);
295     struct i965_surface_state *ss;
296     dri_bo *bo;
297     uint32_t write_domain, read_domain;
298
299     assert(obj_surface->bo);
300
301     bo = dri_bo_alloc(i965->intel.bufmgr,
302                       "surface state", 
303                       sizeof(struct i965_surface_state), 32);
304     assert(bo);
305     dri_bo_map(bo, 1);
306     assert(bo->virtual);
307     ss = bo->virtual;
308     memset(ss, 0, sizeof(*ss));
309     ss->ss0.surface_type = I965_SURFACE_2D;
310     ss->ss0.surface_format = format;
311     ss->ss0.vert_line_stride = vert_line_stride;
312     ss->ss0.vert_line_stride_ofs = vert_line_stride_ofs;
313     ss->ss1.base_addr = obj_surface->bo->offset + offset;
314     ss->ss2.width = w - 1;
315     ss->ss2.height = h - 1;
316     ss->ss3.pitch = pitch - 1;
317
318     if (is_dst) {
319         write_domain = I915_GEM_DOMAIN_RENDER;
320         read_domain = I915_GEM_DOMAIN_RENDER;
321     } else {
322         write_domain = 0;
323         read_domain = I915_GEM_DOMAIN_SAMPLER;
324     }
325
326     dri_bo_emit_reloc(bo,
327                       read_domain, write_domain,
328                       offset,
329                       offsetof(struct i965_surface_state, ss1),
330                       obj_surface->bo);
331     dri_bo_unmap(bo);
332
333     assert(index < MAX_MEDIA_SURFACES);
334     media_context->surface_state[index].bo = bo;
335 }
336
337 static void 
338 i965_media_h264_surfaces_setup(VADriverContextP ctx, 
339                                struct decode_state *decode_state,
340                                struct i965_media_context *media_context)
341 {
342     struct i965_h264_context *i965_h264_context;
343     struct object_surface *obj_surface;
344     VAPictureParameterBufferH264 *pic_param;
345     VAPictureH264 *va_pic;
346     int i, j, w, h;
347     int field_picture;
348
349     assert(media_context->private_context);
350     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
351
352     assert(decode_state->pic_param && decode_state->pic_param->buffer);
353     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
354
355     /* Target Picture */
356     va_pic = &pic_param->CurrPic;
357     obj_surface = decode_state->render_object;
358     w = obj_surface->width;
359     h = obj_surface->height;
360     field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
361     i965_media_h264_surface_state(ctx, 0, obj_surface,
362                                   0, w / 4, h / (1 + field_picture), w,
363                                   1, 
364                                   field_picture,
365                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
366                                   I965_SURFACEFORMAT_R8_SINT,   /* Y */
367                                   media_context);
368     i965_media_h264_surface_state(ctx, 1, obj_surface,
369                                   w * h, w / 4, h / 2 / (1 + field_picture), w,
370                                   1, 
371                                   field_picture,
372                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
373                                   I965_SURFACEFORMAT_R8G8_SINT, /* INTERLEAVED U/V */
374                                   media_context);
375
376     /* Reference Pictures */
377     for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
378         if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID &&
379             i965_h264_context->fsid_list[i].obj_surface != NULL) {
380             int found = 0;
381             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
382                 va_pic = &pic_param->ReferenceFrames[j];
383                 
384                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
385                     continue;
386
387                 if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) {
388                     found = 1;
389                     break;
390                 }
391             }
392
393             assert(found == 1);
394
395             obj_surface = i965_h264_context->fsid_list[i].obj_surface;
396             w = obj_surface->width;
397             h = obj_surface->height;
398             field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
399             i965_media_h264_surface_state(ctx, 2 + i, obj_surface,
400                                           0, w / 4, h / (1 + field_picture), w,
401                                           0, 
402                                           field_picture,
403                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
404                                           I965_SURFACEFORMAT_R8_SINT,   /* Y */
405                                           media_context);
406             i965_media_h264_surface_state(ctx, 18 + i, obj_surface,
407                                           w * h, w / 4, h / 2 / (1 + field_picture), w,
408                                           0, 
409                                           field_picture,
410                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
411                                           I965_SURFACEFORMAT_R8G8_SINT, /* INTERLEAVED U/V */
412                                           media_context);
413         }
414     }
415 }
416
417 static void
418 i965_media_h264_binding_table(VADriverContextP ctx, struct i965_media_context *media_context)
419 {
420     int i;
421     unsigned int *binding_table;
422     dri_bo *bo = media_context->binding_table.bo;
423
424     dri_bo_map(bo, 1);
425     assert(bo->virtual);
426     binding_table = bo->virtual;
427     memset(binding_table, 0, bo->size);
428
429     for (i = 0; i < MAX_MEDIA_SURFACES; i++) {
430         if (media_context->surface_state[i].bo) {
431             binding_table[i] = media_context->surface_state[i].bo->offset;
432             dri_bo_emit_reloc(bo,
433                               I915_GEM_DOMAIN_INSTRUCTION, 0,
434                               0,
435                               i * sizeof(*binding_table),
436                               media_context->surface_state[i].bo);
437         }
438     }
439
440     dri_bo_unmap(media_context->binding_table.bo);
441 }
442
443 static void 
444 i965_media_h264_interface_descriptor_remap_table(VADriverContextP ctx, struct i965_media_context *media_context)
445 {
446     struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)media_context->private_context;
447     struct i965_interface_descriptor *desc;
448     int i;
449     dri_bo *bo;
450
451     bo = media_context->idrt.bo;
452     dri_bo_map(bo, 1);
453     assert(bo->virtual);
454     desc = bo->virtual;
455
456     for (i = 0; i < NUM_AVC_MC_INTERFACES; i++) {
457         int kernel_offset = avc_mc_kernel_offset[i];
458         memset(desc, 0, sizeof(*desc));
459         desc->desc0.grf_reg_blocks = 7; 
460         desc->desc0.kernel_start_pointer = (i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo->offset + kernel_offset) >> 6; /* reloc */
461         desc->desc1.const_urb_entry_read_offset = 0;
462         desc->desc1.const_urb_entry_read_len = 2;
463         desc->desc3.binding_table_entry_count = 0;
464         desc->desc3.binding_table_pointer = 
465             media_context->binding_table.bo->offset >> 5; /*reloc */
466
467         dri_bo_emit_reloc(bo,
468                           I915_GEM_DOMAIN_INSTRUCTION, 0,
469                           desc->desc0.grf_reg_blocks + kernel_offset,
470                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc0),
471                           i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo);
472
473         dri_bo_emit_reloc(bo,
474                           I915_GEM_DOMAIN_INSTRUCTION, 0,
475                           desc->desc3.binding_table_entry_count,
476                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc3),
477                           media_context->binding_table.bo);
478         desc++;
479     }
480
481     dri_bo_unmap(bo);
482 }
483
484 static void
485 i965_media_h264_vfe_state(VADriverContextP ctx, struct i965_media_context *media_context)
486 {
487     struct i965_vfe_state *vfe_state;
488     dri_bo *bo;
489
490     bo = media_context->vfe_state.bo;
491     dri_bo_map(bo, 1);
492     assert(bo->virtual);
493     vfe_state = bo->virtual;
494     memset(vfe_state, 0, sizeof(*vfe_state));
495     vfe_state->vfe0.extend_vfe_state_present = 1;
496     vfe_state->vfe1.max_threads = media_context->urb.num_vfe_entries - 1;
497     vfe_state->vfe1.urb_entry_alloc_size = media_context->urb.size_vfe_entry - 1;
498     vfe_state->vfe1.num_urb_entries = media_context->urb.num_vfe_entries;
499     vfe_state->vfe1.vfe_mode = VFE_AVC_IT_MODE;
500     vfe_state->vfe1.children_present = 0;
501     vfe_state->vfe2.interface_descriptor_base = 
502         media_context->idrt.bo->offset >> 4; /* reloc */
503     dri_bo_emit_reloc(bo,
504                       I915_GEM_DOMAIN_INSTRUCTION, 0,
505                       0,
506                       offsetof(struct i965_vfe_state, vfe2),
507                       media_context->idrt.bo);
508     dri_bo_unmap(bo);
509 }
510
511 static void 
512 i965_media_h264_vfe_state_extension(VADriverContextP ctx, 
513                                     struct decode_state *decode_state,
514                                     struct i965_media_context *media_context)
515 {
516     struct i965_h264_context *i965_h264_context;
517     struct i965_vfe_state_ex *vfe_state_ex;
518     VAPictureParameterBufferH264 *pic_param;
519     int mbaff_frame_flag;
520
521     assert(media_context->private_context);
522     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
523
524     assert(decode_state->pic_param && decode_state->pic_param->buffer);
525     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
526     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
527                         !pic_param->pic_fields.bits.field_pic_flag);
528
529     assert(media_context->extended_state.bo);
530     dri_bo_map(media_context->extended_state.bo, 1);
531     assert(media_context->extended_state.bo->virtual);
532     vfe_state_ex = media_context->extended_state.bo->virtual;
533     memset(vfe_state_ex, 0, sizeof(*vfe_state_ex));
534
535     /*
536      * Indirect data buffer:
537      * --------------------------------------------------------
538      * | Motion Vectors | Weight/Offset data | Residual data |
539      * --------------------------------------------------------
540      * R4-R7: Motion Vectors
541      * R8-R9: Weight/Offset
542      * R10-R33: Residual data
543      */
544     vfe_state_ex->vfex1.avc.residual_data_fix_offset_flag = !!RESIDUAL_DATA_OFFSET;
545     vfe_state_ex->vfex1.avc.residual_data_offset = RESIDUAL_DATA_OFFSET;
546
547     if (i965_h264_context->picture.i_flag) {
548         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_NOMV; /* NoMV */
549         vfe_state_ex->vfex1.avc.weight_grf_offset = 0;
550         vfe_state_ex->vfex1.avc.residual_grf_offset = 0;
551     } else {
552         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_MV_WO; /* Both MV and W/O */
553         vfe_state_ex->vfex1.avc.weight_grf_offset = 4;
554         vfe_state_ex->vfex1.avc.residual_grf_offset = 6;
555     }
556
557     if (!pic_param->pic_fields.bits.field_pic_flag) {
558         if (mbaff_frame_flag) {
559             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
560             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
561             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
562             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
563             vfe_state_ex->remap_table0.remap_index_4 = MBAFF_MOTION;
564             vfe_state_ex->remap_table0.remap_index_5 = MBAFF_MOTION;
565             vfe_state_ex->remap_table0.remap_index_6 = MBAFF_MOTION;
566             vfe_state_ex->remap_table0.remap_index_7 = MBAFF_MOTION;
567
568             vfe_state_ex->remap_table1.remap_index_8 = MBAFF_MOTION;
569             vfe_state_ex->remap_table1.remap_index_9 = MBAFF_MOTION;
570             vfe_state_ex->remap_table1.remap_index_10 = MBAFF_MOTION;
571             vfe_state_ex->remap_table1.remap_index_11 = MBAFF_MOTION;
572             vfe_state_ex->remap_table1.remap_index_12 = MBAFF_MOTION;
573             vfe_state_ex->remap_table1.remap_index_13 = MBAFF_MOTION;
574             vfe_state_ex->remap_table1.remap_index_14 = MBAFF_MOTION;
575             vfe_state_ex->remap_table1.remap_index_15 = MBAFF_MOTION;
576         } else {
577             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
578             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
579             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
580             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
581             vfe_state_ex->remap_table0.remap_index_4 = FRAMEMB_MOTION;
582             vfe_state_ex->remap_table0.remap_index_5 = FRAMEMB_MOTION;
583             vfe_state_ex->remap_table0.remap_index_6 = FRAMEMB_MOTION;
584             vfe_state_ex->remap_table0.remap_index_7 = FRAMEMB_MOTION;
585
586             vfe_state_ex->remap_table1.remap_index_8 = FRAMEMB_MOTION;
587             vfe_state_ex->remap_table1.remap_index_9 = FRAMEMB_MOTION;
588             vfe_state_ex->remap_table1.remap_index_10 = FRAMEMB_MOTION;
589             vfe_state_ex->remap_table1.remap_index_11 = FRAMEMB_MOTION;
590             vfe_state_ex->remap_table1.remap_index_12 = FRAMEMB_MOTION;
591             vfe_state_ex->remap_table1.remap_index_13 = FRAMEMB_MOTION;
592             vfe_state_ex->remap_table1.remap_index_14 = FRAMEMB_MOTION;
593             vfe_state_ex->remap_table1.remap_index_15 = FRAMEMB_MOTION;
594         }
595     } else {
596         vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
597         vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
598         vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
599         vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
600         vfe_state_ex->remap_table0.remap_index_4 = FIELDMB_MOTION;
601         vfe_state_ex->remap_table0.remap_index_5 = FIELDMB_MOTION;
602         vfe_state_ex->remap_table0.remap_index_6 = FIELDMB_MOTION;
603         vfe_state_ex->remap_table0.remap_index_7 = FIELDMB_MOTION;
604
605         vfe_state_ex->remap_table1.remap_index_8 = FIELDMB_MOTION;
606         vfe_state_ex->remap_table1.remap_index_9 = FIELDMB_MOTION;
607         vfe_state_ex->remap_table1.remap_index_10 = FIELDMB_MOTION;
608         vfe_state_ex->remap_table1.remap_index_11 = FIELDMB_MOTION;
609         vfe_state_ex->remap_table1.remap_index_12 = FIELDMB_MOTION;
610         vfe_state_ex->remap_table1.remap_index_13 = FIELDMB_MOTION;
611         vfe_state_ex->remap_table1.remap_index_14 = FIELDMB_MOTION;
612         vfe_state_ex->remap_table1.remap_index_15 = FIELDMB_MOTION;
613     }
614
615     if (i965_h264_context->use_avc_hw_scoreboard) {
616         vfe_state_ex->scoreboard0.enable = 1;
617         vfe_state_ex->scoreboard0.type = SCOREBOARD_STALLING;
618         vfe_state_ex->scoreboard0.mask = 0xff;
619
620         vfe_state_ex->scoreboard1.delta_x0 = -1;
621         vfe_state_ex->scoreboard1.delta_y0 = 0;
622         vfe_state_ex->scoreboard1.delta_x1 = 0;
623         vfe_state_ex->scoreboard1.delta_y1 = -1;
624         vfe_state_ex->scoreboard1.delta_x2 = 1;
625         vfe_state_ex->scoreboard1.delta_y2 = -1;
626         vfe_state_ex->scoreboard1.delta_x3 = -1;
627         vfe_state_ex->scoreboard1.delta_y3 = -1;
628
629         vfe_state_ex->scoreboard2.delta_x4 = -1;
630         vfe_state_ex->scoreboard2.delta_y4 = 1;
631         vfe_state_ex->scoreboard2.delta_x5 = 0;
632         vfe_state_ex->scoreboard2.delta_y5 = -2;
633         vfe_state_ex->scoreboard2.delta_x6 = 1;
634         vfe_state_ex->scoreboard2.delta_y6 = -2;
635         vfe_state_ex->scoreboard2.delta_x7 = -1;
636         vfe_state_ex->scoreboard2.delta_y7 = -2;
637     }
638
639     dri_bo_unmap(media_context->extended_state.bo);
640 }
641
642 static void
643 i965_media_h264_upload_constants(VADriverContextP ctx,
644                                  struct decode_state *decode_state,
645                                  struct i965_media_context *media_context)
646 {
647     struct i965_h264_context *i965_h264_context;
648     unsigned char *constant_buffer;
649     VASliceParameterBufferH264 *slice_param;
650
651     assert(media_context->private_context);
652     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
653
654     assert(decode_state->slice_params[0] && decode_state->slice_params[0]->buffer);
655     slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[0]->buffer;
656
657     dri_bo_map(media_context->curbe.bo, 1);
658     assert(media_context->curbe.bo->virtual);
659     constant_buffer = media_context->curbe.bo->virtual;
660
661     /* HW solution for W=128 */
662     if (i965_h264_context->use_hw_w128) {
663         memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
664     } else {
665         if (slice_param->slice_type == SLICE_TYPE_I ||
666             slice_param->slice_type == SLICE_TYPE_SI) {
667             memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
668         } else {
669             /* FIXME: Need to upload CURBE data to inter kernel interface 
670              * to support weighted prediction work-around 
671              */
672             *(short *)constant_buffer = i965_h264_context->weight128_offset0;
673             constant_buffer += 2;
674             *(char *)constant_buffer = i965_h264_context->weight128_offset0_flag;
675             constant_buffer++;
676             *constant_buffer = 0;
677         }
678     }
679
680     dri_bo_unmap(media_context->curbe.bo);
681 }
682
683 static void
684 i965_media_h264_states_setup(VADriverContextP ctx,
685                              struct decode_state *decode_state,
686                              struct i965_media_context *media_context)
687 {
688     struct i965_h264_context *i965_h264_context;
689
690     assert(media_context->private_context);
691     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
692
693     i965_avc_bsd_pipeline(ctx, decode_state, i965_h264_context);
694
695     if (i965_h264_context->use_avc_hw_scoreboard)
696         i965_avc_hw_scoreboard(ctx, decode_state, i965_h264_context);
697
698     i965_media_h264_surfaces_setup(ctx, decode_state, media_context);
699     i965_media_h264_binding_table(ctx, media_context);
700     i965_media_h264_interface_descriptor_remap_table(ctx, media_context);
701     i965_media_h264_vfe_state_extension(ctx, decode_state, media_context);
702     i965_media_h264_vfe_state(ctx, media_context);
703     i965_media_h264_upload_constants(ctx, decode_state, media_context);
704 }
705
706 static void
707 i965_media_h264_objects(VADriverContextP ctx,
708                         struct decode_state *decode_state,
709                         struct i965_media_context *media_context)
710 {
711     struct intel_batchbuffer *batch = media_context->base.batch;
712     struct i965_h264_context *i965_h264_context;
713     unsigned int *object_command;
714
715     assert(media_context->private_context);
716     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
717
718     dri_bo_map(i965_h264_context->avc_it_command_mb_info.bo, True);
719     assert(i965_h264_context->avc_it_command_mb_info.bo->virtual);
720     object_command = i965_h264_context->avc_it_command_mb_info.bo->virtual;
721     memset(object_command, 0, i965_h264_context->avc_it_command_mb_info.mbs * i965_h264_context->use_avc_hw_scoreboard * MB_CMD_IN_BYTES);
722     object_command += i965_h264_context->avc_it_command_mb_info.mbs * (1 + i965_h264_context->use_avc_hw_scoreboard) * MB_CMD_IN_DWS;
723     *object_command++ = 0;
724     *object_command = MI_BATCH_BUFFER_END;
725     dri_bo_unmap(i965_h264_context->avc_it_command_mb_info.bo);
726
727     BEGIN_BATCH(batch, 2);
728     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
729     OUT_RELOC(batch, i965_h264_context->avc_it_command_mb_info.bo, 
730               I915_GEM_DOMAIN_COMMAND, 0, 
731               0);
732     ADVANCE_BATCH(batch);
733
734     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
735      * will cause control to pass back to ring buffer 
736      */
737     intel_batchbuffer_end_atomic(batch);
738     intel_batchbuffer_flush(batch);
739     intel_batchbuffer_start_atomic(batch, 0x1000);
740     i965_avc_ildb(ctx, decode_state, i965_h264_context);
741 }
742
743 static void 
744 i965_media_h264_free_private_context(void **data)
745 {
746     struct i965_h264_context *i965_h264_context = *data;
747     int i;
748
749     if (i965_h264_context == NULL)
750         return;
751
752     i965_avc_ildb_ternimate(&i965_h264_context->avc_ildb_context);
753     i965_avc_hw_scoreboard_ternimate(&i965_h264_context->avc_hw_scoreboard_context);
754     i965_avc_bsd_ternimate(&i965_h264_context->i965_avc_bsd_context);
755     dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
756     dri_bo_unreference(i965_h264_context->avc_it_data.bo);
757     dri_bo_unreference(i965_h264_context->avc_ildb_data.bo);
758
759     for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
760         struct i965_kernel *kernel = &i965_h264_context->avc_kernels[i];
761
762         dri_bo_unreference(kernel->bo);
763         kernel->bo = NULL;
764     }
765
766     free(i965_h264_context);
767     *data = NULL;
768 }
769
770 void
771 i965_media_h264_decode_init(VADriverContextP ctx, 
772                             struct decode_state *decode_state, 
773                             struct i965_media_context *media_context)
774 {
775     struct i965_driver_data *i965 = i965_driver_data(ctx);
776     struct i965_h264_context *i965_h264_context = media_context->private_context;
777     dri_bo *bo;
778     VAPictureParameterBufferH264 *pic_param;
779
780     assert(decode_state->pic_param && decode_state->pic_param->buffer);
781     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
782     i965_h264_context->picture.width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
783     i965_h264_context->picture.height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff) / 
784         (1 + !!pic_param->pic_fields.bits.field_pic_flag); /* picture height */
785     i965_h264_context->picture.mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
786                                                    !pic_param->pic_fields.bits.field_pic_flag);
787     i965_h264_context->avc_it_command_mb_info.mbs = (i965_h264_context->picture.width_in_mbs * 
788                                                      i965_h264_context->picture.height_in_mbs);
789
790     dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
791     bo = dri_bo_alloc(i965->intel.bufmgr,
792                       "avc it command mb info",
793                       i965_h264_context->avc_it_command_mb_info.mbs * MB_CMD_IN_BYTES * (1 + i965_h264_context->use_avc_hw_scoreboard) + 8,
794                       0x1000);
795     assert(bo);
796     i965_h264_context->avc_it_command_mb_info.bo = bo;
797
798     dri_bo_unreference(i965_h264_context->avc_it_data.bo);
799     bo = dri_bo_alloc(i965->intel.bufmgr,
800                       "avc it data",
801                       i965_h264_context->avc_it_command_mb_info.mbs * 
802                       0x800 * 
803                       (1 + !!pic_param->pic_fields.bits.field_pic_flag),
804                       0x1000);
805     assert(bo);
806     i965_h264_context->avc_it_data.bo = bo;
807     i965_h264_context->avc_it_data.write_offset = 0;
808     dri_bo_unreference(media_context->indirect_object.bo);
809     media_context->indirect_object.bo = bo;
810     dri_bo_reference(media_context->indirect_object.bo);
811     media_context->indirect_object.offset = i965_h264_context->avc_it_data.write_offset;
812
813     dri_bo_unreference(i965_h264_context->avc_ildb_data.bo);
814     bo = dri_bo_alloc(i965->intel.bufmgr,
815                       "AVC-ILDB Data Buffer",
816                       i965_h264_context->avc_it_command_mb_info.mbs * 64 * 2,
817                       0x1000);
818     assert(bo);
819     i965_h264_context->avc_ildb_data.bo = bo;
820
821     /* bsd pipeline */
822     i965_avc_bsd_decode_init(ctx, i965_h264_context);
823
824     /* HW scoreboard */
825     if (i965_h264_context->use_avc_hw_scoreboard)
826         i965_avc_hw_scoreboard_decode_init(ctx, i965_h264_context);
827
828     /* ILDB */
829     i965_avc_ildb_decode_init(ctx, i965_h264_context);
830
831     /* for Media pipeline */
832     media_context->extended_state.enabled = 1;
833     dri_bo_unreference(media_context->extended_state.bo);
834     bo = dri_bo_alloc(i965->intel.bufmgr, 
835                       "extened vfe state", 
836                       sizeof(struct i965_vfe_state_ex), 32);
837     assert(bo);
838     media_context->extended_state.bo = bo;
839 }
840
841 void 
842 i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context *media_context)
843 {
844     struct i965_driver_data *i965 = i965_driver_data(ctx);
845     struct i965_h264_context *i965_h264_context;
846     int i;
847
848     i965_h264_context = calloc(1, sizeof(struct i965_h264_context));
849
850     /* kernel */
851     assert(NUM_H264_AVC_KERNELS == (sizeof(h264_avc_kernels_gen5) / 
852                                     sizeof(h264_avc_kernels_gen5[0])));
853     assert(NUM_AVC_MC_INTERFACES == (sizeof(avc_mc_kernel_offset_gen5) /
854                                      sizeof(avc_mc_kernel_offset_gen5[0])));
855     if (IS_IRONLAKE(i965->intel.device_id)) {
856         memcpy(i965_h264_context->avc_kernels, h264_avc_kernels_gen5, sizeof(i965_h264_context->avc_kernels));
857         avc_mc_kernel_offset = avc_mc_kernel_offset_gen5;
858         intra_kernel_header = &intra_kernel_header_gen5;
859         i965_h264_context->use_avc_hw_scoreboard = 1;
860         i965_h264_context->use_hw_w128 = 1;
861     } else {
862         memcpy(i965_h264_context->avc_kernels, h264_avc_kernels_gen4, sizeof(i965_h264_context->avc_kernels));
863         avc_mc_kernel_offset = avc_mc_kernel_offset_gen4;
864         intra_kernel_header = &intra_kernel_header_gen4;
865         i965_h264_context->use_avc_hw_scoreboard = 0;
866         i965_h264_context->use_hw_w128 = 0;
867     }
868
869     for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
870         struct i965_kernel *kernel = &i965_h264_context->avc_kernels[i];
871         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
872                                   kernel->name, 
873                                   kernel->size, 0x1000);
874         assert(kernel->bo);
875         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
876     }
877
878     for (i = 0; i < 16; i++) {
879         i965_h264_context->fsid_list[i].surface_id = VA_INVALID_ID;
880         i965_h264_context->fsid_list[i].frame_store_id = -1;
881     }
882
883     i965_h264_context->batch = media_context->base.batch;
884
885     media_context->private_context = i965_h264_context;
886     media_context->free_private_context = i965_media_h264_free_private_context;
887
888     /* URB */
889     if (IS_IRONLAKE(i965->intel.device_id)) {
890         media_context->urb.num_vfe_entries = 63;
891     } else {
892         media_context->urb.num_vfe_entries = 23;
893     }
894
895     media_context->urb.size_vfe_entry = 16;
896
897     media_context->urb.num_cs_entries = 1;
898     media_context->urb.size_cs_entry = 1;
899
900     media_context->urb.vfe_start = 0;
901     media_context->urb.cs_start = media_context->urb.vfe_start + 
902         media_context->urb.num_vfe_entries * media_context->urb.size_vfe_entry;
903     assert(media_context->urb.cs_start + 
904            media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
905
906     /* hook functions */
907     media_context->media_states_setup = i965_media_h264_states_setup;
908     media_context->media_objects = i965_media_h264_objects;
909 }