16208773314d65bfa6ac322336581f0383089d8e
[platform/upstream/libva.git] / i965_drv_video / i965_media_h264.c
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <assert.h>
5
6 #include "va_backend.h"
7
8 #include "intel_batchbuffer.h"
9 #include "intel_driver.h"
10
11 #include "i965_defines.h"
12 #include "i965_drv_video.h"
13 #include "i965_media.h"
14 #include "i965_media_h264.h"
15
16 enum {
17     INTRA_16X16 = 0,
18     INTRA_8X8,
19     INTRA_4X4,
20     INTRA_PCM,
21     FRAMEMB_MOTION,
22     FIELDMB_MOTION,
23     MBAFF_MOTION,
24 };
25
26 struct intra_kernel_header
27 {
28     /* R1.0 */
29     unsigned char intra_4x4_luma_mode_0_offset;
30     unsigned char intra_4x4_luma_mode_1_offset;
31     unsigned char intra_4x4_luma_mode_2_offset;
32     unsigned char intra_4x4_luma_mode_3_offset;
33     /* R1.1 */
34     unsigned char intra_4x4_luma_mode_4_offset;
35     unsigned char intra_4x4_luma_mode_5_offset;
36     unsigned char intra_4x4_luma_mode_6_offset;
37     unsigned char intra_4x4_luma_mode_7_offset;
38     /* R1.2 */
39     unsigned char intra_4x4_luma_mode_8_offset;
40     unsigned char pad0;
41     unsigned short top_reference_offset;
42     /* R1.3 */
43     unsigned char intra_8x8_luma_mode_0_offset;
44     unsigned char intra_8x8_luma_mode_1_offset;
45     unsigned char intra_8x8_luma_mode_2_offset;
46     unsigned char intra_8x8_luma_mode_3_offset;
47     /* R1.4 */
48     unsigned char intra_8x8_luma_mode_4_offset;
49     unsigned char intra_8x8_luma_mode_5_offset;
50     unsigned char intra_8x8_luma_mode_6_offset;
51     unsigned char intra_8x8_luma_mode_7_offset;
52     /* R1.5 */
53     unsigned char intra_8x8_luma_mode_8_offset;
54     unsigned char pad1;
55     unsigned short const_reverse_data_transfer_intra_8x8;
56     /* R1.6 */
57     unsigned char intra_16x16_luma_mode_0_offset;
58     unsigned char intra_16x16_luma_mode_1_offset;
59     unsigned char intra_16x16_luma_mode_2_offset;
60     unsigned char intra_16x16_luma_mode_3_offset;
61     /* R1.7 */
62     unsigned char intra_chroma_mode_0_offset;
63     unsigned char intra_chroma_mode_1_offset;
64     unsigned char intra_chroma_mode_2_offset;
65     unsigned char intra_chroma_mode_3_offset;
66     /* R2.0 */
67     unsigned int const_intra_16x16_plane_0;
68     /* R2.1 */
69     unsigned int const_intra_16x16_chroma_plane_0;
70     /* R2.2 */
71     unsigned int const_intra_16x16_chroma_plane_1;
72     /* R2.3 */
73     unsigned int const_intra_16x16_plane_1;
74     /* R2.4 */
75     unsigned int left_shift_count_reverse_dw_ordering;
76     /* R2.5 */
77     unsigned int const_reverse_data_transfer_intra_4x4;
78     /* R2.6 */
79     unsigned int intra_4x4_pred_mode_offset;
80 };
81
82 struct inter_kernel_header
83 {
84     unsigned short weight_offset;
85     unsigned char weight_offset_flag;
86     unsigned char pad0;
87 };
88
89 #include "shaders/h264/mc/export.inc"
90 static unsigned long avc_mc_kernel_offset_gen4[] = {
91     INTRA_16x16_IP * INST_UNIT_GEN4,
92     INTRA_8x8_IP * INST_UNIT_GEN4,
93     INTRA_4x4_IP * INST_UNIT_GEN4,
94     INTRA_PCM_IP * INST_UNIT_GEN4,
95     FRAME_MB_IP * INST_UNIT_GEN4,
96     FIELD_MB_IP * INST_UNIT_GEN4,
97     MBAFF_MB_IP * INST_UNIT_GEN4
98 };
99
100 struct intra_kernel_header intra_kernel_header_gen4 = {
101     0,
102     (INTRA_4X4_HORIZONTAL_IP - INTRA_4X4_VERTICAL_IP),
103     (INTRA_4X4_DC_IP - INTRA_4X4_VERTICAL_IP),
104     (INTRA_4X4_DIAG_DOWN_LEFT_IP - INTRA_4X4_VERTICAL_IP),
105
106     (INTRA_4X4_DIAG_DOWN_RIGHT_IP - INTRA_4X4_VERTICAL_IP),
107     (INTRA_4X4_VERT_RIGHT_IP - INTRA_4X4_VERTICAL_IP),
108     (INTRA_4X4_HOR_DOWN_IP - INTRA_4X4_VERTICAL_IP),
109     (INTRA_4X4_VERT_LEFT_IP - INTRA_4X4_VERTICAL_IP),
110
111     (INTRA_4X4_HOR_UP_IP - INTRA_4X4_VERTICAL_IP),
112     0,
113     0xFFFC,
114
115     0,
116     (INTRA_8X8_HORIZONTAL_IP - INTRA_8X8_VERTICAL_IP),
117     (INTRA_8X8_DC_IP - INTRA_8X8_VERTICAL_IP),
118     (INTRA_8X8_DIAG_DOWN_LEFT_IP - INTRA_8X8_VERTICAL_IP),
119
120     (INTRA_8X8_DIAG_DOWN_RIGHT_IP - INTRA_8X8_VERTICAL_IP),
121     (INTRA_8X8_VERT_RIGHT_IP - INTRA_8X8_VERTICAL_IP),
122     (INTRA_8X8_HOR_DOWN_IP - INTRA_8X8_VERTICAL_IP),
123     (INTRA_8X8_VERT_LEFT_IP - INTRA_8X8_VERTICAL_IP),
124
125     (INTRA_8X8_HOR_UP_IP - INTRA_8X8_VERTICAL_IP),
126     0,
127     0x0001,
128
129     0,
130     (INTRA_16x16_HORIZONTAL_IP - INTRA_16x16_VERTICAL_IP),
131     (INTRA_16x16_DC_IP - INTRA_16x16_VERTICAL_IP),
132     (INTRA_16x16_PLANE_IP - INTRA_16x16_VERTICAL_IP),
133
134     0,
135     (INTRA_CHROMA_HORIZONTAL_IP - INTRA_CHROMA_DC_IP),
136     (INTRA_CHROMA_VERTICAL_IP - INTRA_CHROMA_DC_IP),
137     (INTRA_Chroma_PLANE_IP - INTRA_CHROMA_DC_IP),
138
139     0xFCFBFAF9,
140
141     0x00FFFEFD,
142
143     0x04030201,
144
145     0x08070605,
146
147     0x18100800,
148
149     0x00020406,
150
151     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB3_IP) * 0x1000000 + 
152     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB2_IP) * 0x10000 + 
153     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB1_IP) * 0x100 + 
154     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB0_IP)
155 };
156
157 static const uint32_t h264_avc_combined_gen4[][4] = {
158 #include "shaders/h264/mc/avc_mc.g4b"
159 };
160
161 static const uint32_t h264_avc_null_gen4[][4] = {
162 #include "shaders/h264/mc/null.g4b"
163 };
164
165 static struct i965_kernel h264_avc_kernels_gen4[] = {
166     {
167         "AVC combined kernel",
168         H264_AVC_COMBINED,
169         h264_avc_combined_gen4,
170         sizeof(h264_avc_combined_gen4),
171         NULL
172     },
173
174     {
175         "NULL kernel",
176         H264_AVC_NULL,
177         h264_avc_null_gen4,
178         sizeof(h264_avc_null_gen4),
179         NULL
180     }
181 };
182
183 /* On Ironlake */
184 #include "shaders/h264/mc/export.inc.gen5"
185 static unsigned long avc_mc_kernel_offset_gen5[] = {
186     INTRA_16x16_IP_GEN5 * INST_UNIT_GEN5,
187     INTRA_8x8_IP_GEN5 * INST_UNIT_GEN5,
188     INTRA_4x4_IP_GEN5 * INST_UNIT_GEN5,
189     INTRA_PCM_IP_GEN5 * INST_UNIT_GEN5,
190     FRAME_MB_IP_GEN5 * INST_UNIT_GEN5,
191     FIELD_MB_IP_GEN5 * INST_UNIT_GEN5,
192     MBAFF_MB_IP_GEN5 * INST_UNIT_GEN5
193 };
194
195 struct intra_kernel_header intra_kernel_header_gen5 = {
196     0,
197     (INTRA_4X4_HORIZONTAL_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
198     (INTRA_4X4_DC_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
199     (INTRA_4X4_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
200
201     (INTRA_4X4_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
202     (INTRA_4X4_VERT_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
203     (INTRA_4X4_HOR_DOWN_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
204     (INTRA_4X4_VERT_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
205
206     (INTRA_4X4_HOR_UP_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
207     0,
208     0xFFFC,
209
210     0,
211     (INTRA_8X8_HORIZONTAL_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
212     (INTRA_8X8_DC_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
213     (INTRA_8X8_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
214
215     (INTRA_8X8_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
216     (INTRA_8X8_VERT_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
217     (INTRA_8X8_HOR_DOWN_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
218     (INTRA_8X8_VERT_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
219
220     (INTRA_8X8_HOR_UP_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
221     0,
222     0x0001,
223
224     0,
225     (INTRA_16x16_HORIZONTAL_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
226     (INTRA_16x16_DC_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
227     (INTRA_16x16_PLANE_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
228
229     0,
230     (INTRA_CHROMA_HORIZONTAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
231     (INTRA_CHROMA_VERTICAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
232     (INTRA_Chroma_PLANE_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
233
234     0xFCFBFAF9,
235
236     0x00FFFEFD,
237
238     0x04030201,
239
240     0x08070605,
241
242     0x18100800,
243
244     0x00020406,
245
246     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB3_IP_GEN5) * 0x1000000 + 
247     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB2_IP_GEN5) * 0x10000 + 
248     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB1_IP_GEN5) * 0x100 + 
249     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB0_IP_GEN5)
250 };
251
252 static const uint32_t h264_avc_combined_gen5[][4] = {
253 #include "shaders/h264/mc/avc_mc.g4b.gen5"
254 };
255
256 static const uint32_t h264_avc_null_gen5[][4] = {
257 #include "shaders/h264/mc/null.g4b.gen5"
258 };
259
260 static struct i965_kernel h264_avc_kernels_gen5[] = {
261     {
262         "AVC combined kernel",
263         H264_AVC_COMBINED,
264         h264_avc_combined_gen5,
265         sizeof(h264_avc_combined_gen5),
266         NULL
267     },
268
269     {
270         "NULL kernel",
271         H264_AVC_NULL,
272         h264_avc_null_gen5,
273         sizeof(h264_avc_null_gen5),
274         NULL
275     }
276 };
277
278 #define NUM_H264_AVC_KERNELS (sizeof(h264_avc_kernels_gen4) / sizeof(h264_avc_kernels_gen4[0]))
279 struct i965_kernel *h264_avc_kernels = NULL;
280
281 #define NUM_AVC_MC_INTERFACES (sizeof(avc_mc_kernel_offset_gen4) / sizeof(avc_mc_kernel_offset_gen4[0]))
282 static unsigned long *avc_mc_kernel_offset = NULL;
283
284 static struct intra_kernel_header *intra_kernel_header = NULL;
285
286 static void
287 i965_media_h264_surface_state(VADriverContextP ctx, 
288                               int index,
289                               struct object_surface *obj_surface,
290                               unsigned long offset, 
291                               int w, int h, int pitch,
292                               Bool is_dst,
293                               int vert_line_stride,
294                               int vert_line_stride_ofs,
295                               int format,
296                               struct i965_media_context *media_context)
297 {
298     struct i965_driver_data *i965 = i965_driver_data(ctx);
299     struct i965_surface_state *ss;
300     dri_bo *bo;
301     uint32_t write_domain, read_domain;
302
303     bo = dri_bo_alloc(i965->intel.bufmgr, 
304                       "surface state", 
305                       sizeof(struct i965_surface_state), 32);
306     assert(bo);
307     dri_bo_map(bo, 1);
308     assert(bo->virtual);
309     ss = bo->virtual;
310     memset(ss, 0, sizeof(*ss));
311     ss->ss0.surface_type = I965_SURFACE_2D;
312     ss->ss0.surface_format = format;
313     ss->ss0.vert_line_stride = vert_line_stride;
314     ss->ss0.vert_line_stride_ofs = vert_line_stride_ofs;
315     ss->ss1.base_addr = obj_surface->bo->offset + offset;
316     ss->ss2.width = w - 1;
317     ss->ss2.height = h - 1;
318     ss->ss3.pitch = pitch - 1;
319
320     if (is_dst) {
321         write_domain = I915_GEM_DOMAIN_RENDER;
322         read_domain = I915_GEM_DOMAIN_RENDER;
323     } else {
324         write_domain = 0;
325         read_domain = I915_GEM_DOMAIN_SAMPLER;
326     }
327
328     dri_bo_emit_reloc(bo,
329                       read_domain, write_domain,
330                       offset,
331                       offsetof(struct i965_surface_state, ss1),
332                       obj_surface->bo);
333     dri_bo_unmap(bo);
334
335     assert(index < MAX_MEDIA_SURFACES);
336     media_context->surface_state[index].bo = bo;
337 }
338
339 static void 
340 i965_media_h264_surfaces_setup(VADriverContextP ctx, 
341                                struct decode_state *decode_state,
342                                struct i965_media_context *media_context)
343 {
344     struct i965_driver_data *i965 = i965_driver_data(ctx);
345     struct i965_h264_context *i965_h264_context;
346     struct object_surface *obj_surface;
347     VAPictureParameterBufferH264 *pic_param;
348     VAPictureH264 *va_pic;
349     int i, j, w, h;
350     int field_picture;
351
352     assert(media_context->private_context);
353     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
354
355     assert(decode_state->pic_param && decode_state->pic_param->buffer);
356     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
357
358     /* Target Picture */
359     va_pic = &pic_param->CurrPic;
360     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
361     obj_surface = SURFACE(va_pic->picture_id);
362     assert(obj_surface);
363     w = obj_surface->width;
364     h = obj_surface->height;
365     field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
366     i965_media_h264_surface_state(ctx, 0, obj_surface,
367                                   0, w / 4, h / (1 + field_picture), w,
368                                   1, 
369                                   field_picture,
370                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
371                                   I965_SURFACEFORMAT_R8_SINT,   /* Y */
372                                   media_context);
373     i965_media_h264_surface_state(ctx, 1, obj_surface,
374                                   w * h, w / 4, h / 2 / (1 + field_picture), w,
375                                   1, 
376                                   field_picture,
377                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
378                                   I965_SURFACEFORMAT_R8G8_SINT, /* INTERLEAVED U/V */
379                                   media_context);
380
381     /* Reference Pictures */
382     for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
383         if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID) {
384             int found = 0;
385             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
386                 va_pic = &pic_param->ReferenceFrames[j];
387                 
388                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
389                     continue;
390
391                 if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) {
392                     found = 1;
393                     break;
394                 }
395             }
396
397             assert(found == 1);
398
399             obj_surface = SURFACE(va_pic->picture_id);
400             assert(obj_surface);
401             w = obj_surface->width;
402             h = obj_surface->height;
403             field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
404             i965_media_h264_surface_state(ctx, 2 + i, obj_surface,
405                                           0, w / 4, h / (1 + field_picture), w,
406                                           0, 
407                                           field_picture,
408                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
409                                           I965_SURFACEFORMAT_R8_SINT,   /* Y */
410                                           media_context);
411             i965_media_h264_surface_state(ctx, 18 + i, obj_surface,
412                                           w * h, w / 4, h / 2 / (1 + field_picture), w,
413                                           0, 
414                                           field_picture,
415                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
416                                           I965_SURFACEFORMAT_R8G8_SINT, /* INTERLEAVED U/V */
417                                           media_context);
418         }
419     }
420 }
421
422 static void
423 i965_media_h264_binding_table(VADriverContextP ctx, struct i965_media_context *media_context)
424 {
425     int i;
426     unsigned int *binding_table;
427     dri_bo *bo = media_context->binding_table.bo;
428
429     dri_bo_map(bo, 1);
430     assert(bo->virtual);
431     binding_table = bo->virtual;
432     memset(binding_table, 0, bo->size);
433
434     for (i = 0; i < MAX_MEDIA_SURFACES; i++) {
435         if (media_context->surface_state[i].bo) {
436             binding_table[i] = media_context->surface_state[i].bo->offset;
437             dri_bo_emit_reloc(bo,
438                               I915_GEM_DOMAIN_INSTRUCTION, 0,
439                               0,
440                               i * sizeof(*binding_table),
441                               media_context->surface_state[i].bo);
442         }
443     }
444
445     dri_bo_unmap(media_context->binding_table.bo);
446 }
447
448 static void 
449 i965_media_h264_interface_descriptor_remap_table(VADriverContextP ctx, struct i965_media_context *media_context)
450 {
451     struct i965_interface_descriptor *desc;
452     int i;
453     dri_bo *bo;
454
455     bo = media_context->idrt.bo;
456     dri_bo_map(bo, 1);
457     assert(bo->virtual);
458     desc = bo->virtual;
459
460     for (i = 0; i < NUM_AVC_MC_INTERFACES; i++) {
461         int kernel_offset = avc_mc_kernel_offset[i];
462         memset(desc, 0, sizeof(*desc));
463         desc->desc0.grf_reg_blocks = 7; 
464         desc->desc0.kernel_start_pointer = (h264_avc_kernels[H264_AVC_COMBINED].bo->offset + kernel_offset) >> 6; /* reloc */
465         desc->desc1.const_urb_entry_read_offset = 0;
466         desc->desc1.const_urb_entry_read_len = 2;
467         desc->desc3.binding_table_entry_count = 0;
468         desc->desc3.binding_table_pointer = 
469             media_context->binding_table.bo->offset >> 5; /*reloc */
470
471         dri_bo_emit_reloc(bo,
472                           I915_GEM_DOMAIN_INSTRUCTION, 0,
473                           desc->desc0.grf_reg_blocks + kernel_offset,
474                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc0),
475                           h264_avc_kernels[H264_AVC_COMBINED].bo);
476
477         dri_bo_emit_reloc(bo,
478                           I915_GEM_DOMAIN_INSTRUCTION, 0,
479                           desc->desc3.binding_table_entry_count,
480                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc3),
481                           media_context->binding_table.bo);
482         desc++;
483     }
484
485     dri_bo_unmap(bo);
486 }
487
488 static void
489 i965_media_h264_vfe_state(VADriverContextP ctx, struct i965_media_context *media_context)
490 {
491     struct i965_vfe_state *vfe_state;
492     dri_bo *bo;
493
494     bo = media_context->vfe_state.bo;
495     dri_bo_map(bo, 1);
496     assert(bo->virtual);
497     vfe_state = bo->virtual;
498     memset(vfe_state, 0, sizeof(*vfe_state));
499     vfe_state->vfe0.extend_vfe_state_present = 1;
500     vfe_state->vfe1.max_threads = media_context->urb.num_vfe_entries - 1;
501     vfe_state->vfe1.urb_entry_alloc_size = media_context->urb.size_vfe_entry - 1;
502     vfe_state->vfe1.num_urb_entries = media_context->urb.num_vfe_entries;
503     vfe_state->vfe1.vfe_mode = VFE_AVC_IT_MODE;
504     vfe_state->vfe1.children_present = 0;
505     vfe_state->vfe2.interface_descriptor_base = 
506         media_context->idrt.bo->offset >> 4; /* reloc */
507     dri_bo_emit_reloc(bo,
508                       I915_GEM_DOMAIN_INSTRUCTION, 0,
509                       0,
510                       offsetof(struct i965_vfe_state, vfe2),
511                       media_context->idrt.bo);
512     dri_bo_unmap(bo);
513 }
514
515 static void 
516 i965_media_h264_vfe_state_extension(VADriverContextP ctx, 
517                                     struct decode_state *decode_state,
518                                     struct i965_media_context *media_context)
519 {
520     struct i965_h264_context *i965_h264_context;
521     struct i965_vfe_state_ex *vfe_state_ex;
522     VAPictureParameterBufferH264 *pic_param;
523     int mbaff_frame_flag;
524
525     assert(media_context->private_context);
526     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
527
528     assert(decode_state->pic_param && decode_state->pic_param->buffer);
529     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
530     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
531                         !pic_param->pic_fields.bits.field_pic_flag);
532
533     assert(media_context->extended_state.bo);
534     dri_bo_map(media_context->extended_state.bo, 1);
535     assert(media_context->extended_state.bo->virtual);
536     vfe_state_ex = media_context->extended_state.bo->virtual;
537     memset(vfe_state_ex, 0, sizeof(*vfe_state_ex));
538
539     /*
540      * Indirect data buffer:
541      * --------------------------------------------------------
542      * | Motion Vectors | Weight/Offset data | Residual data |
543      * --------------------------------------------------------
544      * R4-R7: Motion Vectors
545      * R8-R9: Weight/Offset
546      * R10-R33: Residual data
547      */
548     vfe_state_ex->vfex1.avc.residual_data_fix_offset_flag = !!RESIDUAL_DATA_OFFSET;
549     vfe_state_ex->vfex1.avc.residual_data_offset = RESIDUAL_DATA_OFFSET;
550
551     if (i965_h264_context->picture.i_flag) {
552         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_NOMV; /* NoMV */
553         vfe_state_ex->vfex1.avc.weight_grf_offset = 0;
554         vfe_state_ex->vfex1.avc.residual_grf_offset = 0;
555     } else {
556         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_MV_WO; /* Both MV and W/O */
557         vfe_state_ex->vfex1.avc.weight_grf_offset = 4;
558         vfe_state_ex->vfex1.avc.residual_grf_offset = 6;
559     }
560
561     if (!pic_param->pic_fields.bits.field_pic_flag) {
562         if (mbaff_frame_flag) {
563             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
564             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
565             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
566             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
567             vfe_state_ex->remap_table0.remap_index_4 = MBAFF_MOTION;
568             vfe_state_ex->remap_table0.remap_index_5 = MBAFF_MOTION;
569             vfe_state_ex->remap_table0.remap_index_6 = MBAFF_MOTION;
570             vfe_state_ex->remap_table0.remap_index_7 = MBAFF_MOTION;
571
572             vfe_state_ex->remap_table1.remap_index_8 = MBAFF_MOTION;
573             vfe_state_ex->remap_table1.remap_index_9 = MBAFF_MOTION;
574             vfe_state_ex->remap_table1.remap_index_10 = MBAFF_MOTION;
575             vfe_state_ex->remap_table1.remap_index_11 = MBAFF_MOTION;
576             vfe_state_ex->remap_table1.remap_index_12 = MBAFF_MOTION;
577             vfe_state_ex->remap_table1.remap_index_13 = MBAFF_MOTION;
578             vfe_state_ex->remap_table1.remap_index_14 = MBAFF_MOTION;
579             vfe_state_ex->remap_table1.remap_index_15 = MBAFF_MOTION;
580         } else {
581             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
582             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
583             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
584             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
585             vfe_state_ex->remap_table0.remap_index_4 = FRAMEMB_MOTION;
586             vfe_state_ex->remap_table0.remap_index_5 = FRAMEMB_MOTION;
587             vfe_state_ex->remap_table0.remap_index_6 = FRAMEMB_MOTION;
588             vfe_state_ex->remap_table0.remap_index_7 = FRAMEMB_MOTION;
589
590             vfe_state_ex->remap_table1.remap_index_8 = FRAMEMB_MOTION;
591             vfe_state_ex->remap_table1.remap_index_9 = FRAMEMB_MOTION;
592             vfe_state_ex->remap_table1.remap_index_10 = FRAMEMB_MOTION;
593             vfe_state_ex->remap_table1.remap_index_11 = FRAMEMB_MOTION;
594             vfe_state_ex->remap_table1.remap_index_12 = FRAMEMB_MOTION;
595             vfe_state_ex->remap_table1.remap_index_13 = FRAMEMB_MOTION;
596             vfe_state_ex->remap_table1.remap_index_14 = FRAMEMB_MOTION;
597             vfe_state_ex->remap_table1.remap_index_15 = FRAMEMB_MOTION;
598         }
599     } else {
600         vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
601         vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
602         vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
603         vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
604         vfe_state_ex->remap_table0.remap_index_4 = FIELDMB_MOTION;
605         vfe_state_ex->remap_table0.remap_index_5 = FIELDMB_MOTION;
606         vfe_state_ex->remap_table0.remap_index_6 = FIELDMB_MOTION;
607         vfe_state_ex->remap_table0.remap_index_7 = FIELDMB_MOTION;
608
609         vfe_state_ex->remap_table1.remap_index_8 = FIELDMB_MOTION;
610         vfe_state_ex->remap_table1.remap_index_9 = FIELDMB_MOTION;
611         vfe_state_ex->remap_table1.remap_index_10 = FIELDMB_MOTION;
612         vfe_state_ex->remap_table1.remap_index_11 = FIELDMB_MOTION;
613         vfe_state_ex->remap_table1.remap_index_12 = FIELDMB_MOTION;
614         vfe_state_ex->remap_table1.remap_index_13 = FIELDMB_MOTION;
615         vfe_state_ex->remap_table1.remap_index_14 = FIELDMB_MOTION;
616         vfe_state_ex->remap_table1.remap_index_15 = FIELDMB_MOTION;
617     }
618
619     if (i965_h264_context->use_avc_hw_scoreboard) {
620         vfe_state_ex->scoreboard0.enable = 1;
621         vfe_state_ex->scoreboard0.type = SCOREBOARD_STALLING;
622         vfe_state_ex->scoreboard0.mask = 0xff;
623
624         vfe_state_ex->scoreboard1.delta_x0 = -1;
625         vfe_state_ex->scoreboard1.delta_y0 = 0;
626         vfe_state_ex->scoreboard1.delta_x1 = 0;
627         vfe_state_ex->scoreboard1.delta_y1 = -1;
628         vfe_state_ex->scoreboard1.delta_x2 = 1;
629         vfe_state_ex->scoreboard1.delta_y2 = -1;
630         vfe_state_ex->scoreboard1.delta_x3 = -1;
631         vfe_state_ex->scoreboard1.delta_y3 = -1;
632
633         vfe_state_ex->scoreboard2.delta_x4 = -1;
634         vfe_state_ex->scoreboard2.delta_y4 = 1;
635         vfe_state_ex->scoreboard2.delta_x5 = 0;
636         vfe_state_ex->scoreboard2.delta_y5 = -2;
637         vfe_state_ex->scoreboard2.delta_x6 = 1;
638         vfe_state_ex->scoreboard2.delta_y6 = -2;
639         vfe_state_ex->scoreboard2.delta_x7 = -1;
640         vfe_state_ex->scoreboard2.delta_y7 = -2;
641     }
642
643     dri_bo_unmap(media_context->extended_state.bo);
644 }
645
646 static void
647 i965_media_h264_upload_constants(VADriverContextP ctx,
648                                  struct decode_state *decode_state,
649                                  struct i965_media_context *media_context)
650 {
651     struct i965_h264_context *i965_h264_context;
652     unsigned char *constant_buffer;
653     VASliceParameterBufferH264 *slice_param;
654
655     assert(media_context->private_context);
656     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
657
658     assert(decode_state->slice_params[0] && decode_state->slice_params[0]->buffer);
659     slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[0]->buffer;
660
661     dri_bo_map(media_context->curbe.bo, 1);
662     assert(media_context->curbe.bo->virtual);
663     constant_buffer = media_context->curbe.bo->virtual;
664
665     /* HW solution for W=128 */
666     if (i965_h264_context->use_hw_w128) {
667         memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
668     } else {
669         if (slice_param->slice_type == SLICE_TYPE_I ||
670             slice_param->slice_type == SLICE_TYPE_SI) {
671             memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
672         } else {
673             /* FIXME: Need to upload CURBE data to inter kernel interface 
674              * to support weighted prediction work-around 
675              */
676             *(short *)constant_buffer = i965_h264_context->weight128_offset0;
677             constant_buffer += 2;
678             *(char *)constant_buffer = i965_h264_context->weight128_offset0_flag;
679             constant_buffer++;
680             *constant_buffer = 0;
681         }
682     }
683
684     dri_bo_unmap(media_context->curbe.bo);
685 }
686
687 static void
688 i965_media_h264_states_setup(VADriverContextP ctx,
689                              struct decode_state *decode_state,
690                              struct i965_media_context *media_context)
691 {
692     struct i965_h264_context *i965_h264_context;
693
694     assert(media_context->private_context);
695     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
696
697     i965_avc_bsd_pipeline(ctx, decode_state, i965_h264_context);
698
699     if (i965_h264_context->use_avc_hw_scoreboard)
700         i965_avc_hw_scoreboard(ctx, decode_state, i965_h264_context);
701
702     i965_media_h264_surfaces_setup(ctx, decode_state, media_context);
703     i965_media_h264_binding_table(ctx, media_context);
704     i965_media_h264_interface_descriptor_remap_table(ctx, media_context);
705     i965_media_h264_vfe_state_extension(ctx, decode_state, media_context);
706     i965_media_h264_vfe_state(ctx, media_context);
707     i965_media_h264_upload_constants(ctx, decode_state, media_context);
708 }
709
710 static void
711 i965_media_h264_objects(VADriverContextP ctx,
712                         struct decode_state *decode_state,
713                         struct i965_media_context *media_context)
714 {
715     struct i965_h264_context *i965_h264_context;
716     unsigned int *object_command;
717
718     assert(media_context->private_context);
719     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
720
721     dri_bo_map(i965_h264_context->avc_it_command_mb_info.bo, True);
722     assert(i965_h264_context->avc_it_command_mb_info.bo->virtual);
723     object_command = i965_h264_context->avc_it_command_mb_info.bo->virtual;
724     memset(object_command, 0, i965_h264_context->avc_it_command_mb_info.mbs * i965_h264_context->use_avc_hw_scoreboard * MB_CMD_IN_BYTES);
725     object_command += i965_h264_context->avc_it_command_mb_info.mbs * (1 + i965_h264_context->use_avc_hw_scoreboard) * MB_CMD_IN_DWS;
726     *object_command++ = 0;
727     *object_command = MI_BATCH_BUFFER_END;
728     dri_bo_unmap(i965_h264_context->avc_it_command_mb_info.bo);
729
730     BEGIN_BATCH(ctx, 2);
731     OUT_BATCH(ctx, MI_BATCH_BUFFER_START | (2 << 6));
732     OUT_RELOC(ctx, i965_h264_context->avc_it_command_mb_info.bo, 
733               I915_GEM_DOMAIN_COMMAND, 0, 
734               0);
735     ADVANCE_BATCH(ctx);
736
737     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
738      * will cause control to pass back to ring buffer 
739      */
740     intel_batchbuffer_end_atomic(ctx);
741     intel_batchbuffer_flush(ctx);
742     intel_batchbuffer_start_atomic(ctx, 0x1000);
743     i965_avc_ildb(ctx, decode_state, i965_h264_context);
744 }
745
746 static void 
747 i965_media_h264_free_private_context(void **data)
748 {
749     struct i965_h264_context *i965_h264_context = *data;
750     int i;
751
752     if (i965_h264_context == NULL)
753         return;
754
755     i965_avc_ildb_ternimate(&i965_h264_context->avc_ildb_context);
756     i965_avc_hw_scoreboard_ternimate(&i965_h264_context->avc_hw_scoreboard_context);
757     i965_avc_bsd_ternimate(&i965_h264_context->i965_avc_bsd_context);
758     dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
759     dri_bo_unreference(i965_h264_context->avc_it_data.bo);
760     dri_bo_unreference(i965_h264_context->avc_ildb_data.bo);
761     free(i965_h264_context);
762     *data = NULL;
763
764     for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
765         struct i965_kernel *kernel = &h264_avc_kernels[i];
766
767         dri_bo_unreference(kernel->bo);
768         kernel->bo = NULL;
769     }
770 }
771
772 void
773 i965_media_h264_decode_init(VADriverContextP ctx, 
774                             struct decode_state *decode_state, 
775                             struct i965_media_context *media_context)
776 {
777     struct i965_driver_data *i965 = i965_driver_data(ctx);
778     struct i965_h264_context *i965_h264_context = media_context->private_context;
779     dri_bo *bo;
780     VAPictureParameterBufferH264 *pic_param;
781
782     assert(decode_state->pic_param && decode_state->pic_param->buffer);
783     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
784     i965_h264_context->picture.width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
785     i965_h264_context->picture.height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff) / 
786         (1 + !!pic_param->pic_fields.bits.field_pic_flag); /* picture height */
787     i965_h264_context->picture.mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
788                                                    !pic_param->pic_fields.bits.field_pic_flag);
789     i965_h264_context->avc_it_command_mb_info.mbs = (i965_h264_context->picture.width_in_mbs * 
790                                                      i965_h264_context->picture.height_in_mbs);
791
792     dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
793     bo = dri_bo_alloc(i965->intel.bufmgr,
794                       "avc it command mb info",
795                       i965_h264_context->avc_it_command_mb_info.mbs * MB_CMD_IN_BYTES * (1 + i965_h264_context->use_avc_hw_scoreboard) + 8,
796                       0x1000);
797     assert(bo);
798     i965_h264_context->avc_it_command_mb_info.bo = bo;
799
800     dri_bo_unreference(i965_h264_context->avc_it_data.bo);
801     bo = dri_bo_alloc(i965->intel.bufmgr,
802                       "avc it data",
803                       i965_h264_context->avc_it_command_mb_info.mbs * 
804                       0x800 * 
805                       (1 + !!pic_param->pic_fields.bits.field_pic_flag),
806                       0x1000);
807     assert(bo);
808     i965_h264_context->avc_it_data.bo = bo;
809     i965_h264_context->avc_it_data.write_offset = 0;
810     dri_bo_unreference(media_context->indirect_object.bo);
811     media_context->indirect_object.bo = bo;
812     dri_bo_reference(media_context->indirect_object.bo);
813     media_context->indirect_object.offset = i965_h264_context->avc_it_data.write_offset;
814
815     dri_bo_unreference(i965_h264_context->avc_ildb_data.bo);
816     bo = dri_bo_alloc(i965->intel.bufmgr,
817                       "AVC-ILDB Data Buffer",
818                       i965_h264_context->avc_it_command_mb_info.mbs * 64 * 2,
819                       0x1000);
820     assert(bo);
821     i965_h264_context->avc_ildb_data.bo = bo;
822
823     /* bsd pipeline */
824     i965_avc_bsd_decode_init(ctx, i965_h264_context);
825
826     /* HW scoreboard */
827     if (i965_h264_context->use_avc_hw_scoreboard)
828         i965_avc_hw_scoreboard_decode_init(ctx, i965_h264_context);
829
830     /* ILDB */
831     i965_avc_ildb_decode_init(ctx, i965_h264_context);
832
833     /* for Media pipeline */
834     media_context->extended_state.enabled = 1;
835     dri_bo_unreference(media_context->extended_state.bo);
836     bo = dri_bo_alloc(i965->intel.bufmgr, 
837                       "extened vfe state", 
838                       sizeof(struct i965_vfe_state_ex), 32);
839     assert(bo);
840     media_context->extended_state.bo = bo;
841 }
842
843 void 
844 i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context *media_context)
845 {
846     struct i965_driver_data *i965 = i965_driver_data(ctx);
847     struct i965_h264_context *i965_h264_context;
848     int i;
849
850     i965_h264_context = calloc(1, sizeof(struct i965_h264_context));
851
852     /* kernel */
853     if (h264_avc_kernels == NULL) {
854         assert(NUM_H264_AVC_KERNELS == (sizeof(h264_avc_kernels_gen5) / 
855                                         sizeof(h264_avc_kernels_gen5[0])));
856         assert(NUM_AVC_MC_INTERFACES == (sizeof(avc_mc_kernel_offset_gen5) /
857                                          sizeof(avc_mc_kernel_offset_gen5[0])));
858
859         if (IS_IRONLAKE(i965->intel.device_id)) {
860             h264_avc_kernels = h264_avc_kernels_gen5;
861             avc_mc_kernel_offset = avc_mc_kernel_offset_gen5;
862             intra_kernel_header = &intra_kernel_header_gen5;
863             i965_h264_context->use_avc_hw_scoreboard = 1;
864             i965_h264_context->use_hw_w128 = 1;
865         } else {
866             h264_avc_kernels = h264_avc_kernels_gen4;
867             avc_mc_kernel_offset = avc_mc_kernel_offset_gen4;
868             intra_kernel_header = &intra_kernel_header_gen4;
869             i965_h264_context->use_avc_hw_scoreboard = 0;
870             i965_h264_context->use_hw_w128 = 0;
871         }
872
873         for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
874             struct i965_kernel *kernel = &h264_avc_kernels[i];
875             kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
876                                       kernel->name, 
877                                       kernel->size, 0x1000);
878             assert(kernel->bo);
879             dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
880         }
881     }
882
883     for (i = 0; i < 16; i++) {
884         i965_h264_context->fsid_list[i].surface_id = VA_INVALID_ID;
885         i965_h264_context->fsid_list[i].frame_store_id = -1;
886     }
887
888     media_context->private_context = i965_h264_context;
889     media_context->free_private_context = i965_media_h264_free_private_context;
890
891     /* URB */
892     if (IS_IRONLAKE(i965->intel.device_id)) {
893         media_context->urb.num_vfe_entries = 63;
894     } else {
895         media_context->urb.num_vfe_entries = 23;
896     }
897
898     media_context->urb.size_vfe_entry = 16;
899
900     media_context->urb.num_cs_entries = 1;
901     media_context->urb.size_cs_entry = 1;
902
903     media_context->urb.vfe_start = 0;
904     media_context->urb.cs_start = media_context->urb.vfe_start + 
905         media_context->urb.num_vfe_entries * media_context->urb.size_vfe_entry;
906     assert(media_context->urb.cs_start + 
907            media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
908
909     /* hook functions */
910     media_context->media_states_setup = i965_media_h264_states_setup;
911     media_context->media_objects = i965_media_h264_objects;
912 }