i965_drv_video: store kernel info in the corresponding context
[platform/upstream/libva.git] / i965_drv_video / i965_media_h264.c
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <assert.h>
5
6 #include "va_backend.h"
7
8 #include "intel_batchbuffer.h"
9 #include "intel_driver.h"
10
11 #include "i965_defines.h"
12 #include "i965_drv_video.h"
13 #include "i965_media.h"
14 #include "i965_media_h264.h"
15
16 enum {
17     INTRA_16X16 = 0,
18     INTRA_8X8,
19     INTRA_4X4,
20     INTRA_PCM,
21     FRAMEMB_MOTION,
22     FIELDMB_MOTION,
23     MBAFF_MOTION,
24 };
25
26 struct intra_kernel_header
27 {
28     /* R1.0 */
29     unsigned char intra_4x4_luma_mode_0_offset;
30     unsigned char intra_4x4_luma_mode_1_offset;
31     unsigned char intra_4x4_luma_mode_2_offset;
32     unsigned char intra_4x4_luma_mode_3_offset;
33     /* R1.1 */
34     unsigned char intra_4x4_luma_mode_4_offset;
35     unsigned char intra_4x4_luma_mode_5_offset;
36     unsigned char intra_4x4_luma_mode_6_offset;
37     unsigned char intra_4x4_luma_mode_7_offset;
38     /* R1.2 */
39     unsigned char intra_4x4_luma_mode_8_offset;
40     unsigned char pad0;
41     unsigned short top_reference_offset;
42     /* R1.3 */
43     unsigned char intra_8x8_luma_mode_0_offset;
44     unsigned char intra_8x8_luma_mode_1_offset;
45     unsigned char intra_8x8_luma_mode_2_offset;
46     unsigned char intra_8x8_luma_mode_3_offset;
47     /* R1.4 */
48     unsigned char intra_8x8_luma_mode_4_offset;
49     unsigned char intra_8x8_luma_mode_5_offset;
50     unsigned char intra_8x8_luma_mode_6_offset;
51     unsigned char intra_8x8_luma_mode_7_offset;
52     /* R1.5 */
53     unsigned char intra_8x8_luma_mode_8_offset;
54     unsigned char pad1;
55     unsigned short const_reverse_data_transfer_intra_8x8;
56     /* R1.6 */
57     unsigned char intra_16x16_luma_mode_0_offset;
58     unsigned char intra_16x16_luma_mode_1_offset;
59     unsigned char intra_16x16_luma_mode_2_offset;
60     unsigned char intra_16x16_luma_mode_3_offset;
61     /* R1.7 */
62     unsigned char intra_chroma_mode_0_offset;
63     unsigned char intra_chroma_mode_1_offset;
64     unsigned char intra_chroma_mode_2_offset;
65     unsigned char intra_chroma_mode_3_offset;
66     /* R2.0 */
67     unsigned int const_intra_16x16_plane_0;
68     /* R2.1 */
69     unsigned int const_intra_16x16_chroma_plane_0;
70     /* R2.2 */
71     unsigned int const_intra_16x16_chroma_plane_1;
72     /* R2.3 */
73     unsigned int const_intra_16x16_plane_1;
74     /* R2.4 */
75     unsigned int left_shift_count_reverse_dw_ordering;
76     /* R2.5 */
77     unsigned int const_reverse_data_transfer_intra_4x4;
78     /* R2.6 */
79     unsigned int intra_4x4_pred_mode_offset;
80 };
81
82 struct inter_kernel_header
83 {
84     unsigned short weight_offset;
85     unsigned char weight_offset_flag;
86     unsigned char pad0;
87 };
88
89 #include "shaders/h264/mc/export.inc"
90 static unsigned long avc_mc_kernel_offset_gen4[] = {
91     INTRA_16x16_IP * INST_UNIT_GEN4,
92     INTRA_8x8_IP * INST_UNIT_GEN4,
93     INTRA_4x4_IP * INST_UNIT_GEN4,
94     INTRA_PCM_IP * INST_UNIT_GEN4,
95     FRAME_MB_IP * INST_UNIT_GEN4,
96     FIELD_MB_IP * INST_UNIT_GEN4,
97     MBAFF_MB_IP * INST_UNIT_GEN4
98 };
99
100 struct intra_kernel_header intra_kernel_header_gen4 = {
101     0,
102     (INTRA_4X4_HORIZONTAL_IP - INTRA_4X4_VERTICAL_IP),
103     (INTRA_4X4_DC_IP - INTRA_4X4_VERTICAL_IP),
104     (INTRA_4X4_DIAG_DOWN_LEFT_IP - INTRA_4X4_VERTICAL_IP),
105
106     (INTRA_4X4_DIAG_DOWN_RIGHT_IP - INTRA_4X4_VERTICAL_IP),
107     (INTRA_4X4_VERT_RIGHT_IP - INTRA_4X4_VERTICAL_IP),
108     (INTRA_4X4_HOR_DOWN_IP - INTRA_4X4_VERTICAL_IP),
109     (INTRA_4X4_VERT_LEFT_IP - INTRA_4X4_VERTICAL_IP),
110
111     (INTRA_4X4_HOR_UP_IP - INTRA_4X4_VERTICAL_IP),
112     0,
113     0xFFFC,
114
115     0,
116     (INTRA_8X8_HORIZONTAL_IP - INTRA_8X8_VERTICAL_IP),
117     (INTRA_8X8_DC_IP - INTRA_8X8_VERTICAL_IP),
118     (INTRA_8X8_DIAG_DOWN_LEFT_IP - INTRA_8X8_VERTICAL_IP),
119
120     (INTRA_8X8_DIAG_DOWN_RIGHT_IP - INTRA_8X8_VERTICAL_IP),
121     (INTRA_8X8_VERT_RIGHT_IP - INTRA_8X8_VERTICAL_IP),
122     (INTRA_8X8_HOR_DOWN_IP - INTRA_8X8_VERTICAL_IP),
123     (INTRA_8X8_VERT_LEFT_IP - INTRA_8X8_VERTICAL_IP),
124
125     (INTRA_8X8_HOR_UP_IP - INTRA_8X8_VERTICAL_IP),
126     0,
127     0x0001,
128
129     0,
130     (INTRA_16x16_HORIZONTAL_IP - INTRA_16x16_VERTICAL_IP),
131     (INTRA_16x16_DC_IP - INTRA_16x16_VERTICAL_IP),
132     (INTRA_16x16_PLANE_IP - INTRA_16x16_VERTICAL_IP),
133
134     0,
135     (INTRA_CHROMA_HORIZONTAL_IP - INTRA_CHROMA_DC_IP),
136     (INTRA_CHROMA_VERTICAL_IP - INTRA_CHROMA_DC_IP),
137     (INTRA_Chroma_PLANE_IP - INTRA_CHROMA_DC_IP),
138
139     0xFCFBFAF9,
140
141     0x00FFFEFD,
142
143     0x04030201,
144
145     0x08070605,
146
147     0x18100800,
148
149     0x00020406,
150
151     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB3_IP) * 0x1000000 + 
152     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB2_IP) * 0x10000 + 
153     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB1_IP) * 0x100 + 
154     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB0_IP)
155 };
156
157 static const uint32_t h264_avc_combined_gen4[][4] = {
158 #include "shaders/h264/mc/avc_mc.g4b"
159 };
160
161 static const uint32_t h264_avc_null_gen4[][4] = {
162 #include "shaders/h264/mc/null.g4b"
163 };
164
165 static struct i965_kernel h264_avc_kernels_gen4[] = {
166     {
167         "AVC combined kernel",
168         H264_AVC_COMBINED,
169         h264_avc_combined_gen4,
170         sizeof(h264_avc_combined_gen4),
171         NULL
172     },
173
174     {
175         "NULL kernel",
176         H264_AVC_NULL,
177         h264_avc_null_gen4,
178         sizeof(h264_avc_null_gen4),
179         NULL
180     }
181 };
182
183 /* On Ironlake */
184 #include "shaders/h264/mc/export.inc.gen5"
185 static unsigned long avc_mc_kernel_offset_gen5[] = {
186     INTRA_16x16_IP_GEN5 * INST_UNIT_GEN5,
187     INTRA_8x8_IP_GEN5 * INST_UNIT_GEN5,
188     INTRA_4x4_IP_GEN5 * INST_UNIT_GEN5,
189     INTRA_PCM_IP_GEN5 * INST_UNIT_GEN5,
190     FRAME_MB_IP_GEN5 * INST_UNIT_GEN5,
191     FIELD_MB_IP_GEN5 * INST_UNIT_GEN5,
192     MBAFF_MB_IP_GEN5 * INST_UNIT_GEN5
193 };
194
195 struct intra_kernel_header intra_kernel_header_gen5 = {
196     0,
197     (INTRA_4X4_HORIZONTAL_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
198     (INTRA_4X4_DC_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
199     (INTRA_4X4_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
200
201     (INTRA_4X4_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
202     (INTRA_4X4_VERT_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
203     (INTRA_4X4_HOR_DOWN_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
204     (INTRA_4X4_VERT_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
205
206     (INTRA_4X4_HOR_UP_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
207     0,
208     0xFFFC,
209
210     0,
211     (INTRA_8X8_HORIZONTAL_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
212     (INTRA_8X8_DC_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
213     (INTRA_8X8_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
214
215     (INTRA_8X8_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
216     (INTRA_8X8_VERT_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
217     (INTRA_8X8_HOR_DOWN_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
218     (INTRA_8X8_VERT_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
219
220     (INTRA_8X8_HOR_UP_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
221     0,
222     0x0001,
223
224     0,
225     (INTRA_16x16_HORIZONTAL_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
226     (INTRA_16x16_DC_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
227     (INTRA_16x16_PLANE_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
228
229     0,
230     (INTRA_CHROMA_HORIZONTAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
231     (INTRA_CHROMA_VERTICAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
232     (INTRA_Chroma_PLANE_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
233
234     0xFCFBFAF9,
235
236     0x00FFFEFD,
237
238     0x04030201,
239
240     0x08070605,
241
242     0x18100800,
243
244     0x00020406,
245
246     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB3_IP_GEN5) * 0x1000000 + 
247     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB2_IP_GEN5) * 0x10000 + 
248     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB1_IP_GEN5) * 0x100 + 
249     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB0_IP_GEN5)
250 };
251
252 static const uint32_t h264_avc_combined_gen5[][4] = {
253 #include "shaders/h264/mc/avc_mc.g4b.gen5"
254 };
255
256 static const uint32_t h264_avc_null_gen5[][4] = {
257 #include "shaders/h264/mc/null.g4b.gen5"
258 };
259
260 static struct i965_kernel h264_avc_kernels_gen5[] = {
261     {
262         "AVC combined kernel",
263         H264_AVC_COMBINED,
264         h264_avc_combined_gen5,
265         sizeof(h264_avc_combined_gen5),
266         NULL
267     },
268
269     {
270         "NULL kernel",
271         H264_AVC_NULL,
272         h264_avc_null_gen5,
273         sizeof(h264_avc_null_gen5),
274         NULL
275     }
276 };
277
278 #define NUM_AVC_MC_INTERFACES (sizeof(avc_mc_kernel_offset_gen4) / sizeof(avc_mc_kernel_offset_gen4[0]))
279 static unsigned long *avc_mc_kernel_offset = NULL;
280
281 static struct intra_kernel_header *intra_kernel_header = NULL;
282
283 static void
284 i965_media_h264_surface_state(VADriverContextP ctx, 
285                               int index,
286                               struct object_surface *obj_surface,
287                               unsigned long offset, 
288                               int w, int h, int pitch,
289                               Bool is_dst,
290                               int vert_line_stride,
291                               int vert_line_stride_ofs,
292                               int format,
293                               struct i965_media_context *media_context)
294 {
295     struct i965_driver_data *i965 = i965_driver_data(ctx);
296     struct i965_surface_state *ss;
297     dri_bo *bo;
298     uint32_t write_domain, read_domain;
299
300     bo = dri_bo_alloc(i965->intel.bufmgr, 
301                       "surface state", 
302                       sizeof(struct i965_surface_state), 32);
303     assert(bo);
304     dri_bo_map(bo, 1);
305     assert(bo->virtual);
306     ss = bo->virtual;
307     memset(ss, 0, sizeof(*ss));
308     ss->ss0.surface_type = I965_SURFACE_2D;
309     ss->ss0.surface_format = format;
310     ss->ss0.vert_line_stride = vert_line_stride;
311     ss->ss0.vert_line_stride_ofs = vert_line_stride_ofs;
312     ss->ss1.base_addr = obj_surface->bo->offset + offset;
313     ss->ss2.width = w - 1;
314     ss->ss2.height = h - 1;
315     ss->ss3.pitch = pitch - 1;
316
317     if (is_dst) {
318         write_domain = I915_GEM_DOMAIN_RENDER;
319         read_domain = I915_GEM_DOMAIN_RENDER;
320     } else {
321         write_domain = 0;
322         read_domain = I915_GEM_DOMAIN_SAMPLER;
323     }
324
325     dri_bo_emit_reloc(bo,
326                       read_domain, write_domain,
327                       offset,
328                       offsetof(struct i965_surface_state, ss1),
329                       obj_surface->bo);
330     dri_bo_unmap(bo);
331
332     assert(index < MAX_MEDIA_SURFACES);
333     media_context->surface_state[index].bo = bo;
334 }
335
336 static void 
337 i965_media_h264_surfaces_setup(VADriverContextP ctx, 
338                                struct decode_state *decode_state,
339                                struct i965_media_context *media_context)
340 {
341     struct i965_driver_data *i965 = i965_driver_data(ctx);
342     struct i965_h264_context *i965_h264_context;
343     struct object_surface *obj_surface;
344     VAPictureParameterBufferH264 *pic_param;
345     VAPictureH264 *va_pic;
346     int i, j, w, h;
347     int field_picture;
348
349     assert(media_context->private_context);
350     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
351
352     assert(decode_state->pic_param && decode_state->pic_param->buffer);
353     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
354
355     /* Target Picture */
356     va_pic = &pic_param->CurrPic;
357     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
358     obj_surface = SURFACE(va_pic->picture_id);
359     assert(obj_surface);
360     w = obj_surface->width;
361     h = obj_surface->height;
362     field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
363     i965_media_h264_surface_state(ctx, 0, obj_surface,
364                                   0, w / 4, h / (1 + field_picture), w,
365                                   1, 
366                                   field_picture,
367                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
368                                   I965_SURFACEFORMAT_R8_SINT,   /* Y */
369                                   media_context);
370     i965_media_h264_surface_state(ctx, 1, obj_surface,
371                                   w * h, w / 4, h / 2 / (1 + field_picture), w,
372                                   1, 
373                                   field_picture,
374                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
375                                   I965_SURFACEFORMAT_R8G8_SINT, /* INTERLEAVED U/V */
376                                   media_context);
377
378     /* Reference Pictures */
379     for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
380         if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID) {
381             int found = 0;
382             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
383                 va_pic = &pic_param->ReferenceFrames[j];
384                 
385                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
386                     continue;
387
388                 if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) {
389                     found = 1;
390                     break;
391                 }
392             }
393
394             assert(found == 1);
395
396             obj_surface = SURFACE(va_pic->picture_id);
397             assert(obj_surface);
398             w = obj_surface->width;
399             h = obj_surface->height;
400             field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
401             i965_media_h264_surface_state(ctx, 2 + i, obj_surface,
402                                           0, w / 4, h / (1 + field_picture), w,
403                                           0, 
404                                           field_picture,
405                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
406                                           I965_SURFACEFORMAT_R8_SINT,   /* Y */
407                                           media_context);
408             i965_media_h264_surface_state(ctx, 18 + i, obj_surface,
409                                           w * h, w / 4, h / 2 / (1 + field_picture), w,
410                                           0, 
411                                           field_picture,
412                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
413                                           I965_SURFACEFORMAT_R8G8_SINT, /* INTERLEAVED U/V */
414                                           media_context);
415         }
416     }
417 }
418
419 static void
420 i965_media_h264_binding_table(VADriverContextP ctx, struct i965_media_context *media_context)
421 {
422     int i;
423     unsigned int *binding_table;
424     dri_bo *bo = media_context->binding_table.bo;
425
426     dri_bo_map(bo, 1);
427     assert(bo->virtual);
428     binding_table = bo->virtual;
429     memset(binding_table, 0, bo->size);
430
431     for (i = 0; i < MAX_MEDIA_SURFACES; i++) {
432         if (media_context->surface_state[i].bo) {
433             binding_table[i] = media_context->surface_state[i].bo->offset;
434             dri_bo_emit_reloc(bo,
435                               I915_GEM_DOMAIN_INSTRUCTION, 0,
436                               0,
437                               i * sizeof(*binding_table),
438                               media_context->surface_state[i].bo);
439         }
440     }
441
442     dri_bo_unmap(media_context->binding_table.bo);
443 }
444
445 static void 
446 i965_media_h264_interface_descriptor_remap_table(VADriverContextP ctx, struct i965_media_context *media_context)
447 {
448     struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)media_context->private_context;
449     struct i965_interface_descriptor *desc;
450     int i;
451     dri_bo *bo;
452
453     bo = media_context->idrt.bo;
454     dri_bo_map(bo, 1);
455     assert(bo->virtual);
456     desc = bo->virtual;
457
458     for (i = 0; i < NUM_AVC_MC_INTERFACES; i++) {
459         int kernel_offset = avc_mc_kernel_offset[i];
460         memset(desc, 0, sizeof(*desc));
461         desc->desc0.grf_reg_blocks = 7; 
462         desc->desc0.kernel_start_pointer = (i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo->offset + kernel_offset) >> 6; /* reloc */
463         desc->desc1.const_urb_entry_read_offset = 0;
464         desc->desc1.const_urb_entry_read_len = 2;
465         desc->desc3.binding_table_entry_count = 0;
466         desc->desc3.binding_table_pointer = 
467             media_context->binding_table.bo->offset >> 5; /*reloc */
468
469         dri_bo_emit_reloc(bo,
470                           I915_GEM_DOMAIN_INSTRUCTION, 0,
471                           desc->desc0.grf_reg_blocks + kernel_offset,
472                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc0),
473                           i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo);
474
475         dri_bo_emit_reloc(bo,
476                           I915_GEM_DOMAIN_INSTRUCTION, 0,
477                           desc->desc3.binding_table_entry_count,
478                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc3),
479                           media_context->binding_table.bo);
480         desc++;
481     }
482
483     dri_bo_unmap(bo);
484 }
485
486 static void
487 i965_media_h264_vfe_state(VADriverContextP ctx, struct i965_media_context *media_context)
488 {
489     struct i965_vfe_state *vfe_state;
490     dri_bo *bo;
491
492     bo = media_context->vfe_state.bo;
493     dri_bo_map(bo, 1);
494     assert(bo->virtual);
495     vfe_state = bo->virtual;
496     memset(vfe_state, 0, sizeof(*vfe_state));
497     vfe_state->vfe0.extend_vfe_state_present = 1;
498     vfe_state->vfe1.max_threads = media_context->urb.num_vfe_entries - 1;
499     vfe_state->vfe1.urb_entry_alloc_size = media_context->urb.size_vfe_entry - 1;
500     vfe_state->vfe1.num_urb_entries = media_context->urb.num_vfe_entries;
501     vfe_state->vfe1.vfe_mode = VFE_AVC_IT_MODE;
502     vfe_state->vfe1.children_present = 0;
503     vfe_state->vfe2.interface_descriptor_base = 
504         media_context->idrt.bo->offset >> 4; /* reloc */
505     dri_bo_emit_reloc(bo,
506                       I915_GEM_DOMAIN_INSTRUCTION, 0,
507                       0,
508                       offsetof(struct i965_vfe_state, vfe2),
509                       media_context->idrt.bo);
510     dri_bo_unmap(bo);
511 }
512
513 static void 
514 i965_media_h264_vfe_state_extension(VADriverContextP ctx, 
515                                     struct decode_state *decode_state,
516                                     struct i965_media_context *media_context)
517 {
518     struct i965_h264_context *i965_h264_context;
519     struct i965_vfe_state_ex *vfe_state_ex;
520     VAPictureParameterBufferH264 *pic_param;
521     int mbaff_frame_flag;
522
523     assert(media_context->private_context);
524     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
525
526     assert(decode_state->pic_param && decode_state->pic_param->buffer);
527     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
528     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
529                         !pic_param->pic_fields.bits.field_pic_flag);
530
531     assert(media_context->extended_state.bo);
532     dri_bo_map(media_context->extended_state.bo, 1);
533     assert(media_context->extended_state.bo->virtual);
534     vfe_state_ex = media_context->extended_state.bo->virtual;
535     memset(vfe_state_ex, 0, sizeof(*vfe_state_ex));
536
537     /*
538      * Indirect data buffer:
539      * --------------------------------------------------------
540      * | Motion Vectors | Weight/Offset data | Residual data |
541      * --------------------------------------------------------
542      * R4-R7: Motion Vectors
543      * R8-R9: Weight/Offset
544      * R10-R33: Residual data
545      */
546     vfe_state_ex->vfex1.avc.residual_data_fix_offset_flag = !!RESIDUAL_DATA_OFFSET;
547     vfe_state_ex->vfex1.avc.residual_data_offset = RESIDUAL_DATA_OFFSET;
548
549     if (i965_h264_context->picture.i_flag) {
550         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_NOMV; /* NoMV */
551         vfe_state_ex->vfex1.avc.weight_grf_offset = 0;
552         vfe_state_ex->vfex1.avc.residual_grf_offset = 0;
553     } else {
554         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_MV_WO; /* Both MV and W/O */
555         vfe_state_ex->vfex1.avc.weight_grf_offset = 4;
556         vfe_state_ex->vfex1.avc.residual_grf_offset = 6;
557     }
558
559     if (!pic_param->pic_fields.bits.field_pic_flag) {
560         if (mbaff_frame_flag) {
561             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
562             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
563             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
564             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
565             vfe_state_ex->remap_table0.remap_index_4 = MBAFF_MOTION;
566             vfe_state_ex->remap_table0.remap_index_5 = MBAFF_MOTION;
567             vfe_state_ex->remap_table0.remap_index_6 = MBAFF_MOTION;
568             vfe_state_ex->remap_table0.remap_index_7 = MBAFF_MOTION;
569
570             vfe_state_ex->remap_table1.remap_index_8 = MBAFF_MOTION;
571             vfe_state_ex->remap_table1.remap_index_9 = MBAFF_MOTION;
572             vfe_state_ex->remap_table1.remap_index_10 = MBAFF_MOTION;
573             vfe_state_ex->remap_table1.remap_index_11 = MBAFF_MOTION;
574             vfe_state_ex->remap_table1.remap_index_12 = MBAFF_MOTION;
575             vfe_state_ex->remap_table1.remap_index_13 = MBAFF_MOTION;
576             vfe_state_ex->remap_table1.remap_index_14 = MBAFF_MOTION;
577             vfe_state_ex->remap_table1.remap_index_15 = MBAFF_MOTION;
578         } else {
579             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
580             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
581             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
582             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
583             vfe_state_ex->remap_table0.remap_index_4 = FRAMEMB_MOTION;
584             vfe_state_ex->remap_table0.remap_index_5 = FRAMEMB_MOTION;
585             vfe_state_ex->remap_table0.remap_index_6 = FRAMEMB_MOTION;
586             vfe_state_ex->remap_table0.remap_index_7 = FRAMEMB_MOTION;
587
588             vfe_state_ex->remap_table1.remap_index_8 = FRAMEMB_MOTION;
589             vfe_state_ex->remap_table1.remap_index_9 = FRAMEMB_MOTION;
590             vfe_state_ex->remap_table1.remap_index_10 = FRAMEMB_MOTION;
591             vfe_state_ex->remap_table1.remap_index_11 = FRAMEMB_MOTION;
592             vfe_state_ex->remap_table1.remap_index_12 = FRAMEMB_MOTION;
593             vfe_state_ex->remap_table1.remap_index_13 = FRAMEMB_MOTION;
594             vfe_state_ex->remap_table1.remap_index_14 = FRAMEMB_MOTION;
595             vfe_state_ex->remap_table1.remap_index_15 = FRAMEMB_MOTION;
596         }
597     } else {
598         vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
599         vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
600         vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
601         vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
602         vfe_state_ex->remap_table0.remap_index_4 = FIELDMB_MOTION;
603         vfe_state_ex->remap_table0.remap_index_5 = FIELDMB_MOTION;
604         vfe_state_ex->remap_table0.remap_index_6 = FIELDMB_MOTION;
605         vfe_state_ex->remap_table0.remap_index_7 = FIELDMB_MOTION;
606
607         vfe_state_ex->remap_table1.remap_index_8 = FIELDMB_MOTION;
608         vfe_state_ex->remap_table1.remap_index_9 = FIELDMB_MOTION;
609         vfe_state_ex->remap_table1.remap_index_10 = FIELDMB_MOTION;
610         vfe_state_ex->remap_table1.remap_index_11 = FIELDMB_MOTION;
611         vfe_state_ex->remap_table1.remap_index_12 = FIELDMB_MOTION;
612         vfe_state_ex->remap_table1.remap_index_13 = FIELDMB_MOTION;
613         vfe_state_ex->remap_table1.remap_index_14 = FIELDMB_MOTION;
614         vfe_state_ex->remap_table1.remap_index_15 = FIELDMB_MOTION;
615     }
616
617     if (i965_h264_context->use_avc_hw_scoreboard) {
618         vfe_state_ex->scoreboard0.enable = 1;
619         vfe_state_ex->scoreboard0.type = SCOREBOARD_STALLING;
620         vfe_state_ex->scoreboard0.mask = 0xff;
621
622         vfe_state_ex->scoreboard1.delta_x0 = -1;
623         vfe_state_ex->scoreboard1.delta_y0 = 0;
624         vfe_state_ex->scoreboard1.delta_x1 = 0;
625         vfe_state_ex->scoreboard1.delta_y1 = -1;
626         vfe_state_ex->scoreboard1.delta_x2 = 1;
627         vfe_state_ex->scoreboard1.delta_y2 = -1;
628         vfe_state_ex->scoreboard1.delta_x3 = -1;
629         vfe_state_ex->scoreboard1.delta_y3 = -1;
630
631         vfe_state_ex->scoreboard2.delta_x4 = -1;
632         vfe_state_ex->scoreboard2.delta_y4 = 1;
633         vfe_state_ex->scoreboard2.delta_x5 = 0;
634         vfe_state_ex->scoreboard2.delta_y5 = -2;
635         vfe_state_ex->scoreboard2.delta_x6 = 1;
636         vfe_state_ex->scoreboard2.delta_y6 = -2;
637         vfe_state_ex->scoreboard2.delta_x7 = -1;
638         vfe_state_ex->scoreboard2.delta_y7 = -2;
639     }
640
641     dri_bo_unmap(media_context->extended_state.bo);
642 }
643
644 static void
645 i965_media_h264_upload_constants(VADriverContextP ctx,
646                                  struct decode_state *decode_state,
647                                  struct i965_media_context *media_context)
648 {
649     struct i965_h264_context *i965_h264_context;
650     unsigned char *constant_buffer;
651     VASliceParameterBufferH264 *slice_param;
652
653     assert(media_context->private_context);
654     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
655
656     assert(decode_state->slice_params[0] && decode_state->slice_params[0]->buffer);
657     slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[0]->buffer;
658
659     dri_bo_map(media_context->curbe.bo, 1);
660     assert(media_context->curbe.bo->virtual);
661     constant_buffer = media_context->curbe.bo->virtual;
662
663     /* HW solution for W=128 */
664     if (i965_h264_context->use_hw_w128) {
665         memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
666     } else {
667         if (slice_param->slice_type == SLICE_TYPE_I ||
668             slice_param->slice_type == SLICE_TYPE_SI) {
669             memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
670         } else {
671             /* FIXME: Need to upload CURBE data to inter kernel interface 
672              * to support weighted prediction work-around 
673              */
674             *(short *)constant_buffer = i965_h264_context->weight128_offset0;
675             constant_buffer += 2;
676             *(char *)constant_buffer = i965_h264_context->weight128_offset0_flag;
677             constant_buffer++;
678             *constant_buffer = 0;
679         }
680     }
681
682     dri_bo_unmap(media_context->curbe.bo);
683 }
684
685 static void
686 i965_media_h264_states_setup(VADriverContextP ctx,
687                              struct decode_state *decode_state,
688                              struct i965_media_context *media_context)
689 {
690     struct i965_h264_context *i965_h264_context;
691
692     assert(media_context->private_context);
693     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
694
695     i965_avc_bsd_pipeline(ctx, decode_state, i965_h264_context);
696
697     if (i965_h264_context->use_avc_hw_scoreboard)
698         i965_avc_hw_scoreboard(ctx, decode_state, i965_h264_context);
699
700     i965_media_h264_surfaces_setup(ctx, decode_state, media_context);
701     i965_media_h264_binding_table(ctx, media_context);
702     i965_media_h264_interface_descriptor_remap_table(ctx, media_context);
703     i965_media_h264_vfe_state_extension(ctx, decode_state, media_context);
704     i965_media_h264_vfe_state(ctx, media_context);
705     i965_media_h264_upload_constants(ctx, decode_state, media_context);
706 }
707
708 static void
709 i965_media_h264_objects(VADriverContextP ctx,
710                         struct decode_state *decode_state,
711                         struct i965_media_context *media_context)
712 {
713     struct i965_h264_context *i965_h264_context;
714     unsigned int *object_command;
715
716     assert(media_context->private_context);
717     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
718
719     dri_bo_map(i965_h264_context->avc_it_command_mb_info.bo, True);
720     assert(i965_h264_context->avc_it_command_mb_info.bo->virtual);
721     object_command = i965_h264_context->avc_it_command_mb_info.bo->virtual;
722     memset(object_command, 0, i965_h264_context->avc_it_command_mb_info.mbs * i965_h264_context->use_avc_hw_scoreboard * MB_CMD_IN_BYTES);
723     object_command += i965_h264_context->avc_it_command_mb_info.mbs * (1 + i965_h264_context->use_avc_hw_scoreboard) * MB_CMD_IN_DWS;
724     *object_command++ = 0;
725     *object_command = MI_BATCH_BUFFER_END;
726     dri_bo_unmap(i965_h264_context->avc_it_command_mb_info.bo);
727
728     BEGIN_BATCH(ctx, 2);
729     OUT_BATCH(ctx, MI_BATCH_BUFFER_START | (2 << 6));
730     OUT_RELOC(ctx, i965_h264_context->avc_it_command_mb_info.bo, 
731               I915_GEM_DOMAIN_COMMAND, 0, 
732               0);
733     ADVANCE_BATCH(ctx);
734
735     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
736      * will cause control to pass back to ring buffer 
737      */
738     intel_batchbuffer_end_atomic(ctx);
739     intel_batchbuffer_flush(ctx);
740     intel_batchbuffer_start_atomic(ctx, 0x1000);
741     i965_avc_ildb(ctx, decode_state, i965_h264_context);
742 }
743
744 static void 
745 i965_media_h264_free_private_context(void **data)
746 {
747     struct i965_h264_context *i965_h264_context = *data;
748     int i;
749
750     if (i965_h264_context == NULL)
751         return;
752
753     i965_avc_ildb_ternimate(&i965_h264_context->avc_ildb_context);
754     i965_avc_hw_scoreboard_ternimate(&i965_h264_context->avc_hw_scoreboard_context);
755     i965_avc_bsd_ternimate(&i965_h264_context->i965_avc_bsd_context);
756     dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
757     dri_bo_unreference(i965_h264_context->avc_it_data.bo);
758     dri_bo_unreference(i965_h264_context->avc_ildb_data.bo);
759
760     for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
761         struct i965_kernel *kernel = &i965_h264_context->avc_kernels[i];
762
763         dri_bo_unreference(kernel->bo);
764         kernel->bo = NULL;
765     }
766
767     free(i965_h264_context);
768     *data = NULL;
769 }
770
771 void
772 i965_media_h264_decode_init(VADriverContextP ctx, 
773                             struct decode_state *decode_state, 
774                             struct i965_media_context *media_context)
775 {
776     struct i965_driver_data *i965 = i965_driver_data(ctx);
777     struct i965_h264_context *i965_h264_context = media_context->private_context;
778     dri_bo *bo;
779     VAPictureParameterBufferH264 *pic_param;
780
781     assert(decode_state->pic_param && decode_state->pic_param->buffer);
782     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
783     i965_h264_context->picture.width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
784     i965_h264_context->picture.height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff) / 
785         (1 + !!pic_param->pic_fields.bits.field_pic_flag); /* picture height */
786     i965_h264_context->picture.mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
787                                                    !pic_param->pic_fields.bits.field_pic_flag);
788     i965_h264_context->avc_it_command_mb_info.mbs = (i965_h264_context->picture.width_in_mbs * 
789                                                      i965_h264_context->picture.height_in_mbs);
790
791     dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
792     bo = dri_bo_alloc(i965->intel.bufmgr,
793                       "avc it command mb info",
794                       i965_h264_context->avc_it_command_mb_info.mbs * MB_CMD_IN_BYTES * (1 + i965_h264_context->use_avc_hw_scoreboard) + 8,
795                       0x1000);
796     assert(bo);
797     i965_h264_context->avc_it_command_mb_info.bo = bo;
798
799     dri_bo_unreference(i965_h264_context->avc_it_data.bo);
800     bo = dri_bo_alloc(i965->intel.bufmgr,
801                       "avc it data",
802                       i965_h264_context->avc_it_command_mb_info.mbs * 
803                       0x800 * 
804                       (1 + !!pic_param->pic_fields.bits.field_pic_flag),
805                       0x1000);
806     assert(bo);
807     i965_h264_context->avc_it_data.bo = bo;
808     i965_h264_context->avc_it_data.write_offset = 0;
809     dri_bo_unreference(media_context->indirect_object.bo);
810     media_context->indirect_object.bo = bo;
811     dri_bo_reference(media_context->indirect_object.bo);
812     media_context->indirect_object.offset = i965_h264_context->avc_it_data.write_offset;
813
814     dri_bo_unreference(i965_h264_context->avc_ildb_data.bo);
815     bo = dri_bo_alloc(i965->intel.bufmgr,
816                       "AVC-ILDB Data Buffer",
817                       i965_h264_context->avc_it_command_mb_info.mbs * 64 * 2,
818                       0x1000);
819     assert(bo);
820     i965_h264_context->avc_ildb_data.bo = bo;
821
822     /* bsd pipeline */
823     i965_avc_bsd_decode_init(ctx, i965_h264_context);
824
825     /* HW scoreboard */
826     if (i965_h264_context->use_avc_hw_scoreboard)
827         i965_avc_hw_scoreboard_decode_init(ctx, i965_h264_context);
828
829     /* ILDB */
830     i965_avc_ildb_decode_init(ctx, i965_h264_context);
831
832     /* for Media pipeline */
833     media_context->extended_state.enabled = 1;
834     dri_bo_unreference(media_context->extended_state.bo);
835     bo = dri_bo_alloc(i965->intel.bufmgr, 
836                       "extened vfe state", 
837                       sizeof(struct i965_vfe_state_ex), 32);
838     assert(bo);
839     media_context->extended_state.bo = bo;
840 }
841
842 void 
843 i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context *media_context)
844 {
845     struct i965_driver_data *i965 = i965_driver_data(ctx);
846     struct i965_h264_context *i965_h264_context;
847     int i;
848
849     i965_h264_context = calloc(1, sizeof(struct i965_h264_context));
850
851     /* kernel */
852     assert(NUM_H264_AVC_KERNELS == (sizeof(h264_avc_kernels_gen5) / 
853                                     sizeof(h264_avc_kernels_gen5[0])));
854     assert(NUM_AVC_MC_INTERFACES == (sizeof(avc_mc_kernel_offset_gen5) /
855                                      sizeof(avc_mc_kernel_offset_gen5[0])));
856     if (IS_IRONLAKE(i965->intel.device_id)) {
857         memcpy(i965_h264_context->avc_kernels, h264_avc_kernels_gen5, sizeof(i965_h264_context->avc_kernels));
858         avc_mc_kernel_offset = avc_mc_kernel_offset_gen5;
859         intra_kernel_header = &intra_kernel_header_gen5;
860         i965_h264_context->use_avc_hw_scoreboard = 1;
861         i965_h264_context->use_hw_w128 = 1;
862     } else {
863         memcpy(i965_h264_context->avc_kernels, h264_avc_kernels_gen4, sizeof(i965_h264_context->avc_kernels));
864         avc_mc_kernel_offset = avc_mc_kernel_offset_gen4;
865         intra_kernel_header = &intra_kernel_header_gen4;
866         i965_h264_context->use_avc_hw_scoreboard = 0;
867         i965_h264_context->use_hw_w128 = 0;
868     }
869
870     for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
871         struct i965_kernel *kernel = &i965_h264_context->avc_kernels[i];
872         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
873                                   kernel->name, 
874                                   kernel->size, 0x1000);
875         assert(kernel->bo);
876         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
877     }
878
879     for (i = 0; i < 16; i++) {
880         i965_h264_context->fsid_list[i].surface_id = VA_INVALID_ID;
881         i965_h264_context->fsid_list[i].frame_store_id = -1;
882     }
883
884     media_context->private_context = i965_h264_context;
885     media_context->free_private_context = i965_media_h264_free_private_context;
886
887     /* URB */
888     if (IS_IRONLAKE(i965->intel.device_id)) {
889         media_context->urb.num_vfe_entries = 63;
890     } else {
891         media_context->urb.num_vfe_entries = 23;
892     }
893
894     media_context->urb.size_vfe_entry = 16;
895
896     media_context->urb.num_cs_entries = 1;
897     media_context->urb.size_cs_entry = 1;
898
899     media_context->urb.vfe_start = 0;
900     media_context->urb.cs_start = media_context->urb.vfe_start + 
901         media_context->urb.num_vfe_entries * media_context->urb.size_vfe_entry;
902     assert(media_context->urb.cs_start + 
903            media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
904
905     /* hook functions */
906     media_context->media_states_setup = i965_media_h264_states_setup;
907     media_context->media_objects = i965_media_h264_objects;
908 }