i965_drv_video: [H.264] track frame store index
[profile/ivi/libva.git] / i965_drv_video / i965_media_h264.c
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <assert.h>
5
6 #include "va_backend.h"
7
8 #include "intel_batchbuffer.h"
9 #include "intel_driver.h"
10
11 #include "i965_defines.h"
12 #include "i965_drv_video.h"
13 #include "i965_media.h"
14 #include "i965_media_h264.h"
15
16 enum {
17     INTRA_16X16 = 0,
18     INTRA_8X8,
19     INTRA_4X4,
20     INTRA_PCM,
21     FRAMEMB_MOTION,
22     FIELDMB_MOTION,
23     MBAFF_MOTION,
24 };
25
26 struct intra_kernel_header
27 {
28     /* R1.0 */
29     unsigned char intra_4x4_luma_mode_0_offset;
30     unsigned char intra_4x4_luma_mode_1_offset;
31     unsigned char intra_4x4_luma_mode_2_offset;
32     unsigned char intra_4x4_luma_mode_3_offset;
33     /* R1.1 */
34     unsigned char intra_4x4_luma_mode_4_offset;
35     unsigned char intra_4x4_luma_mode_5_offset;
36     unsigned char intra_4x4_luma_mode_6_offset;
37     unsigned char intra_4x4_luma_mode_7_offset;
38     /* R1.2 */
39     unsigned char intra_4x4_luma_mode_8_offset;
40     unsigned char pad0;
41     unsigned short top_reference_offset;
42     /* R1.3 */
43     unsigned char intra_8x8_luma_mode_0_offset;
44     unsigned char intra_8x8_luma_mode_1_offset;
45     unsigned char intra_8x8_luma_mode_2_offset;
46     unsigned char intra_8x8_luma_mode_3_offset;
47     /* R1.4 */
48     unsigned char intra_8x8_luma_mode_4_offset;
49     unsigned char intra_8x8_luma_mode_5_offset;
50     unsigned char intra_8x8_luma_mode_6_offset;
51     unsigned char intra_8x8_luma_mode_7_offset;
52     /* R1.5 */
53     unsigned char intra_8x8_luma_mode_8_offset;
54     unsigned char pad1;
55     unsigned short const_reverse_data_transfer_intra_8x8;
56     /* R1.6 */
57     unsigned char intra_16x16_luma_mode_0_offset;
58     unsigned char intra_16x16_luma_mode_1_offset;
59     unsigned char intra_16x16_luma_mode_2_offset;
60     unsigned char intra_16x16_luma_mode_3_offset;
61     /* R1.7 */
62     unsigned char intra_chroma_mode_0_offset;
63     unsigned char intra_chroma_mode_1_offset;
64     unsigned char intra_chroma_mode_2_offset;
65     unsigned char intra_chroma_mode_3_offset;
66     /* R2.0 */
67     unsigned int const_intra_16x16_plane_0;
68     /* R2.1 */
69     unsigned int const_intra_16x16_chroma_plane_0;
70     /* R2.2 */
71     unsigned int const_intra_16x16_chroma_plane_1;
72     /* R2.3 */
73     unsigned int const_intra_16x16_plane_1;
74     /* R2.4 */
75     unsigned int left_shift_count_reverse_dw_ordering;
76     /* R2.5 */
77     unsigned int const_reverse_data_transfer_intra_4x4;
78     /* R2.6 */
79     unsigned int intra_4x4_pred_mode_offset;
80 };
81
82 struct inter_kernel_header
83 {
84     unsigned short weight_offset;
85     unsigned char weight_offset_flag;
86     unsigned char pad0;
87 };
88
89 #include "shaders/h264/mc/export.inc"
90 static unsigned long avc_mc_kernel_offset_gen4[] = {
91     INTRA_16x16_IP,
92     INTRA_8x8_IP,
93     INTRA_4x4_IP,
94     INTRA_PCM_IP,
95     FRAME_MB_IP,
96     FIELD_MB_IP,
97     MBAFF_MB_IP
98 };
99
100 #define INST_UNIT_GEN4  16
101 struct intra_kernel_header intra_kernel_header_gen4 = {
102     0 / INST_UNIT_GEN4,
103     (INTRA_4X4_HORIZONTAL_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
104     (INTRA_4X4_DC_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
105     (INTRA_4X4_DIAG_DOWN_LEFT_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
106
107     (INTRA_4X4_DIAG_DOWN_RIGHT_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
108     (INTRA_4X4_VERT_RIGHT_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
109     (INTRA_4X4_HOR_DOWN_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
110     (INTRA_4X4_VERT_LEFT_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
111
112     (INTRA_4X4_HOR_UP_IP - INTRA_4X4_VERTICAL_IP) / INST_UNIT_GEN4,
113     0,
114     0xFFFC,
115
116     0 / INST_UNIT_GEN4,
117     (INTRA_8X8_HORIZONTAL_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
118     (INTRA_8X8_DC_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
119     (INTRA_8X8_DIAG_DOWN_LEFT_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
120
121     (INTRA_8X8_DIAG_DOWN_RIGHT_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
122     (INTRA_8X8_VERT_RIGHT_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
123     (INTRA_8X8_HOR_DOWN_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
124     (INTRA_8X8_VERT_LEFT_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
125
126     (INTRA_8X8_HOR_UP_IP - INTRA_8X8_VERTICAL_IP) / INST_UNIT_GEN4,
127     0,
128     0x0001,
129
130     0 / INST_UNIT_GEN4,
131     (INTRA_16x16_HORIZONTAL_IP - INTRA_16x16_VERTICAL_IP) / INST_UNIT_GEN4,
132     (INTRA_16x16_DC_IP - INTRA_16x16_VERTICAL_IP) / INST_UNIT_GEN4,
133     (INTRA_16x16_PLANE_IP - INTRA_16x16_VERTICAL_IP) / INST_UNIT_GEN4,
134
135     0 / INST_UNIT_GEN4,
136     (INTRA_CHROMA_HORIZONTAL_IP - INTRA_CHROMA_DC_IP) / INST_UNIT_GEN4,
137     (INTRA_CHROMA_VERTICAL_IP - INTRA_CHROMA_DC_IP) / INST_UNIT_GEN4,
138     (INTRA_Chroma_PLANE_IP - INTRA_CHROMA_DC_IP) / INST_UNIT_GEN4,
139
140     0xFCFBFAF9,
141
142     0x00FFFEFD,
143
144     0x04030201,
145
146     0x08070605,
147
148     0x18100800,
149
150     0x00020406,
151
152     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB3_IP) / INST_UNIT_GEN4 * 0x1000000 + 
153     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB2_IP) / INST_UNIT_GEN4 * 0x10000 + 
154     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB1_IP) / INST_UNIT_GEN4 * 0x100 + 
155     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB0_IP) / INST_UNIT_GEN4
156 };
157
158 static uint32_t h264_avc_combined_gen4[][4] = {
159 #include "shaders/h264/mc/avc_mc.g4b"
160 };
161
162 static uint32_t h264_avc_null_gen4[][4] = {
163 #include "shaders/h264/mc/null.g4b"
164 };
165
166 static struct media_kernel h264_avc_kernels_gen4[] = {
167     {
168         "AVC combined kernel",
169         H264_AVC_COMBINED,
170         h264_avc_combined_gen4,
171         sizeof(h264_avc_combined_gen4),
172         NULL
173     },
174
175     {
176         "NULL kernel",
177         H264_AVC_NULL,
178         h264_avc_null_gen4,
179         sizeof(h264_avc_null_gen4),
180         NULL
181     }
182 };
183
184 /* On Ironlake */
185 #include "shaders/h264/mc/export.inc.gen5"
186 static unsigned long avc_mc_kernel_offset_gen5[] = {
187     INTRA_16x16_IP_GEN5,
188     INTRA_8x8_IP_GEN5,
189     INTRA_4x4_IP_GEN5,
190     INTRA_PCM_IP_GEN5,
191     FRAME_MB_IP_GEN5,
192     FIELD_MB_IP_GEN5,
193     MBAFF_MB_IP_GEN5
194 };
195
196 #define INST_UNIT_GEN5  8
197
198 struct intra_kernel_header intra_kernel_header_gen5 = {
199     0 / INST_UNIT_GEN5,
200     (INTRA_4X4_HORIZONTAL_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
201     (INTRA_4X4_DC_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
202     (INTRA_4X4_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
203
204     (INTRA_4X4_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
205     (INTRA_4X4_VERT_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
206     (INTRA_4X4_HOR_DOWN_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
207     (INTRA_4X4_VERT_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
208
209     (INTRA_4X4_HOR_UP_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
210     0,
211     0xFFFC,
212
213     0 / INST_UNIT_GEN5,
214     (INTRA_8X8_HORIZONTAL_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
215     (INTRA_8X8_DC_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
216     (INTRA_8X8_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
217
218     (INTRA_8X8_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
219     (INTRA_8X8_VERT_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
220     (INTRA_8X8_HOR_DOWN_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
221     (INTRA_8X8_VERT_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
222
223     (INTRA_8X8_HOR_UP_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
224     0,
225     0x0001,
226
227     0 / INST_UNIT_GEN5,
228     (INTRA_16x16_HORIZONTAL_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
229     (INTRA_16x16_DC_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
230     (INTRA_16x16_PLANE_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5) / INST_UNIT_GEN5,
231
232     0 / INST_UNIT_GEN5,
233     (INTRA_CHROMA_HORIZONTAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5) / INST_UNIT_GEN5,
234     (INTRA_CHROMA_VERTICAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5) / INST_UNIT_GEN5,
235     (INTRA_Chroma_PLANE_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5) / INST_UNIT_GEN5,
236
237     0xFCFBFAF9,
238
239     0x00FFFEFD,
240
241     0x04030201,
242
243     0x08070605,
244
245     0x18100800,
246
247     0x00020406,
248
249     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB3_IP_GEN5) / INST_UNIT_GEN5 * 0x1000000 + 
250     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB2_IP_GEN5) / INST_UNIT_GEN5 * 0x10000 + 
251     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB1_IP_GEN5) / INST_UNIT_GEN5 * 0x100 + 
252     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB0_IP_GEN5) / INST_UNIT_GEN5
253 };
254
255 static uint32_t h264_avc_combined_gen5[][4] = {
256 #include "shaders/h264/mc/avc_mc.g4b.gen5"
257 };
258
259 static uint32_t h264_avc_null_gen5[][4] = {
260 #include "shaders/h264/mc/null.g4b.gen5"
261 };
262
263 static struct media_kernel h264_avc_kernels_gen5[] = {
264     {
265         "AVC combined kernel",
266         H264_AVC_COMBINED,
267         h264_avc_combined_gen5,
268         sizeof(h264_avc_combined_gen5),
269         NULL
270     },
271
272     {
273         "NULL kernel",
274         H264_AVC_NULL,
275         h264_avc_null_gen5,
276         sizeof(h264_avc_null_gen5),
277         NULL
278     }
279 };
280
281 #define NUM_H264_AVC_KERNELS (sizeof(h264_avc_kernels_gen4) / sizeof(h264_avc_kernels_gen4[0]))
282 struct media_kernel *h264_avc_kernels = NULL;
283
284 #define NUM_AVC_MC_INTERFACES (sizeof(avc_mc_kernel_offset_gen4) / sizeof(avc_mc_kernel_offset_gen4[0]))
285 static unsigned long *avc_mc_kernel_offset = NULL;
286
287 static struct intra_kernel_header *intra_kernel_header = NULL;
288
289 static void
290 i965_media_h264_surface_state(VADriverContextP ctx, 
291                               int index,
292                               struct object_surface *obj_surface,
293                               unsigned long offset, 
294                               int w, int h, int pitch,
295                               Bool is_dst,
296                               int vert_line_stride,
297                               int vert_line_stride_ofs,
298                               int format)
299 {
300     struct i965_driver_data *i965 = i965_driver_data(ctx);  
301     struct i965_media_state *media_state = &i965->media_state;
302     struct i965_surface_state *ss;
303     dri_bo *bo;
304     uint32_t write_domain, read_domain;
305
306     bo = dri_bo_alloc(i965->intel.bufmgr, 
307                       "surface state", 
308                       sizeof(struct i965_surface_state), 32);
309     assert(bo);
310     dri_bo_map(bo, 1);
311     assert(bo->virtual);
312     ss = bo->virtual;
313     memset(ss, 0, sizeof(*ss));
314     ss->ss0.surface_type = I965_SURFACE_2D;
315     ss->ss0.surface_format = format;
316     ss->ss0.vert_line_stride = vert_line_stride;
317     ss->ss0.vert_line_stride_ofs = vert_line_stride_ofs;
318     ss->ss1.base_addr = obj_surface->bo->offset + offset;
319     ss->ss2.width = w - 1;
320     ss->ss2.height = h - 1;
321     ss->ss3.pitch = pitch - 1;
322
323     if (is_dst) {
324         write_domain = I915_GEM_DOMAIN_RENDER;
325         read_domain = I915_GEM_DOMAIN_RENDER;
326     } else {
327         write_domain = 0;
328         read_domain = I915_GEM_DOMAIN_SAMPLER;
329     }
330
331     dri_bo_emit_reloc(bo,
332                       read_domain, write_domain,
333                       offset,
334                       offsetof(struct i965_surface_state, ss1),
335                       obj_surface->bo);
336     dri_bo_unmap(bo);
337
338     assert(index < MAX_MEDIA_SURFACES);
339     media_state->surface_state[index].bo = bo;
340 }
341
342 static void 
343 i965_media_h264_surfaces_setup(VADriverContextP ctx, 
344                                struct decode_state *decode_state)
345 {
346     struct i965_driver_data *i965 = i965_driver_data(ctx);  
347     struct i965_media_state *media_state = &i965->media_state;
348     struct i965_h264_context *i965_h264_context;
349     struct object_surface *obj_surface;
350     VAPictureParameterBufferH264 *pic_param;
351     VAPictureH264 *va_pic;
352     int i, j, w, h;
353     int field_picture;
354
355     assert(media_state->private_context);
356     i965_h264_context = (struct i965_h264_context *)media_state->private_context;
357
358     assert(decode_state->pic_param && decode_state->pic_param->buffer);
359     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
360
361     /* Target Picture */
362     va_pic = &pic_param->CurrPic;
363     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
364     obj_surface = SURFACE(va_pic->picture_id);
365     assert(obj_surface);
366     w = obj_surface->width;
367     h = obj_surface->height;
368     field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
369     i965_media_h264_surface_state(ctx, 0, obj_surface,
370                                   0, w / 4, h / (1 + field_picture), w,
371                                   1, 
372                                   field_picture,
373                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
374                                   I965_SURFACEFORMAT_R8_SINT); /* Y */
375     i965_media_h264_surface_state(ctx, 1, obj_surface,
376                                   w * h, w / 4, h / 2 / (1 + field_picture), w,
377                                   1, 
378                                   field_picture,
379                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
380                                   I965_SURFACEFORMAT_R8G8_SINT);  /* INTERLEAVED U/V */
381
382     /* Reference Pictures */
383     for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
384         if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID) {
385             int found = 0;
386             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
387                 va_pic = &pic_param->ReferenceFrames[j];
388                 
389                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
390                     continue;
391
392                 if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) {
393                     found = 1;
394                     break;
395                 }
396             }
397
398             assert(found == 1);
399
400             obj_surface = SURFACE(va_pic->picture_id);
401             assert(obj_surface);
402             w = obj_surface->width;
403             h = obj_surface->height;
404             field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
405             i965_media_h264_surface_state(ctx, 2 + i, obj_surface,
406                                           0, w / 4, h / (1 + field_picture), w,
407                                           0, 
408                                           field_picture,
409                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
410                                           I965_SURFACEFORMAT_R8_SINT); /* Y */
411             i965_media_h264_surface_state(ctx, 18 + i, obj_surface,
412                                           w * h, w / 4, h / 2 / (1 + field_picture), w,
413                                           0, 
414                                           field_picture,
415                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
416                                           I965_SURFACEFORMAT_R8G8_SINT);  /* INTERLEAVED U/V */
417         }
418     }
419 }
420
421 static void
422 i965_media_h264_binding_table(VADriverContextP ctx)
423 {
424     struct i965_driver_data *i965 = i965_driver_data(ctx);
425     struct i965_media_state *media_state = &i965->media_state;
426     int i;
427     unsigned int *binding_table;
428     dri_bo *bo = media_state->binding_table.bo;
429
430     dri_bo_map(bo, 1);
431     assert(bo->virtual);
432     binding_table = bo->virtual;
433     memset(binding_table, 0, bo->size);
434
435     for (i = 0; i < MAX_MEDIA_SURFACES; i++) {
436         if (media_state->surface_state[i].bo) {
437             binding_table[i] = media_state->surface_state[i].bo->offset;
438             dri_bo_emit_reloc(bo,
439                               I915_GEM_DOMAIN_INSTRUCTION, 0,
440                               0,
441                               i * sizeof(*binding_table),
442                               media_state->surface_state[i].bo);
443         }
444     }
445
446     dri_bo_unmap(media_state->binding_table.bo);
447 }
448
449 static void 
450 i965_media_h264_interface_descriptor_remap_table(VADriverContextP ctx)
451 {
452     struct i965_driver_data *i965 = i965_driver_data(ctx);
453     struct i965_media_state *media_state = &i965->media_state;
454     struct i965_interface_descriptor *desc;
455     int i;
456     dri_bo *bo;
457
458     bo = media_state->idrt.bo;
459     dri_bo_map(bo, 1);
460     assert(bo->virtual);
461     desc = bo->virtual;
462
463     for (i = 0; i < NUM_AVC_MC_INTERFACES; i++) {
464         int kernel_offset = avc_mc_kernel_offset[i];
465         memset(desc, 0, sizeof(*desc));
466         desc->desc0.grf_reg_blocks = 7; 
467         desc->desc0.kernel_start_pointer = (h264_avc_kernels[H264_AVC_COMBINED].bo->offset + kernel_offset) >> 6; /* reloc */
468         desc->desc1.const_urb_entry_read_offset = 0;
469         desc->desc1.const_urb_entry_read_len = 2;
470         desc->desc3.binding_table_entry_count = 0;
471         desc->desc3.binding_table_pointer = 
472             media_state->binding_table.bo->offset >> 5; /*reloc */
473
474         dri_bo_emit_reloc(bo,
475                           I915_GEM_DOMAIN_INSTRUCTION, 0,
476                           desc->desc0.grf_reg_blocks + kernel_offset,
477                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc0),
478                           h264_avc_kernels[H264_AVC_COMBINED].bo);
479
480         dri_bo_emit_reloc(bo,
481                           I915_GEM_DOMAIN_INSTRUCTION, 0,
482                           desc->desc3.binding_table_entry_count,
483                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc3),
484                           media_state->binding_table.bo);
485         desc++;
486     }
487
488     dri_bo_unmap(bo);
489 }
490
491 static void
492 i965_media_h264_vfe_state(VADriverContextP ctx)
493 {
494     struct i965_driver_data *i965 = i965_driver_data(ctx);
495     struct i965_media_state *media_state = &i965->media_state;
496     struct i965_vfe_state *vfe_state;
497     dri_bo *bo;
498
499     bo = media_state->vfe_state.bo;
500     dri_bo_map(bo, 1);
501     assert(bo->virtual);
502     vfe_state = bo->virtual;
503     memset(vfe_state, 0, sizeof(*vfe_state));
504     vfe_state->vfe0.extend_vfe_state_present = 1;
505     vfe_state->vfe1.max_threads = media_state->urb.num_vfe_entries - 1;
506     vfe_state->vfe1.urb_entry_alloc_size = media_state->urb.size_vfe_entry - 1;
507     vfe_state->vfe1.num_urb_entries = media_state->urb.num_vfe_entries;
508     vfe_state->vfe1.vfe_mode = VFE_AVC_IT_MODE;
509     vfe_state->vfe1.children_present = 0;
510     vfe_state->vfe2.interface_descriptor_base = 
511         media_state->idrt.bo->offset >> 4; /* reloc */
512     dri_bo_emit_reloc(bo,
513                       I915_GEM_DOMAIN_INSTRUCTION, 0,
514                       0,
515                       offsetof(struct i965_vfe_state, vfe2),
516                       media_state->idrt.bo);
517     dri_bo_unmap(bo);
518 }
519
520 static void 
521 i965_media_h264_vfe_state_extension(VADriverContextP ctx, 
522                                     struct decode_state *decode_state)
523 {
524     struct i965_driver_data *i965 = i965_driver_data(ctx);
525     struct i965_media_state *media_state = &i965->media_state;
526     struct i965_h264_context *i965_h264_context;
527     struct i965_vfe_state_ex *vfe_state_ex;
528     VAPictureParameterBufferH264 *pic_param;
529     VASliceParameterBufferH264 *slice_param;
530     int mbaff_frame_flag;
531
532     assert(media_state->private_context);
533     i965_h264_context = (struct i965_h264_context *)media_state->private_context;
534
535     assert(decode_state->pic_param && decode_state->pic_param->buffer);
536     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
537
538     assert(decode_state->slice_params[0] && decode_state->slice_params[0]->buffer);
539     slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[0]->buffer;
540
541     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
542                         !pic_param->pic_fields.bits.field_pic_flag);
543
544     assert(media_state->extended_state.bo);
545     dri_bo_map(media_state->extended_state.bo, 1);
546     assert(media_state->extended_state.bo->virtual);
547     vfe_state_ex = media_state->extended_state.bo->virtual;
548     memset(vfe_state_ex, 0, sizeof(*vfe_state_ex));
549
550     /*
551      * Indirect data buffer:
552      * --------------------------------------------------------
553      * | Motion Vectors | Weight/Offset data | Residual data |
554      * --------------------------------------------------------
555      * R4-R7: Motion Vectors
556      * R8-R9: Weight/Offset
557      * R10-R33: Residual data
558      */
559     vfe_state_ex->vfex1.avc.residual_data_fix_offset_flag = !!RESIDUAL_DATA_OFFSET;
560     vfe_state_ex->vfex1.avc.residual_data_offset = RESIDUAL_DATA_OFFSET;
561
562     if (slice_param->slice_type == SLICE_TYPE_I ||
563         slice_param->slice_type == SLICE_TYPE_SI) 
564         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_NOMV; /* NoMV */
565     else 
566         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_MV_WO; /* Both MV and W/O */
567
568     if (vfe_state_ex->vfex1.avc.sub_field_present_flag == 0) {
569         vfe_state_ex->vfex1.avc.weight_grf_offset = 0;
570         vfe_state_ex->vfex1.avc.residual_grf_offset = 0;
571     } else {
572         vfe_state_ex->vfex1.avc.weight_grf_offset = 4;
573         vfe_state_ex->vfex1.avc.residual_grf_offset = 6;
574     }
575
576     if (!pic_param->pic_fields.bits.field_pic_flag) {
577         if (mbaff_frame_flag) {
578             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
579             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
580             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
581             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
582             vfe_state_ex->remap_table0.remap_index_4 = MBAFF_MOTION;
583             vfe_state_ex->remap_table0.remap_index_5 = MBAFF_MOTION;
584             vfe_state_ex->remap_table0.remap_index_6 = MBAFF_MOTION;
585             vfe_state_ex->remap_table0.remap_index_7 = MBAFF_MOTION;
586
587             vfe_state_ex->remap_table1.remap_index_8 = MBAFF_MOTION;
588             vfe_state_ex->remap_table1.remap_index_9 = MBAFF_MOTION;
589             vfe_state_ex->remap_table1.remap_index_10 = MBAFF_MOTION;
590             vfe_state_ex->remap_table1.remap_index_11 = MBAFF_MOTION;
591             vfe_state_ex->remap_table1.remap_index_12 = MBAFF_MOTION;
592             vfe_state_ex->remap_table1.remap_index_13 = MBAFF_MOTION;
593             vfe_state_ex->remap_table1.remap_index_14 = MBAFF_MOTION;
594             vfe_state_ex->remap_table1.remap_index_15 = MBAFF_MOTION;
595         } else {
596             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
597             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
598             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
599             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
600             vfe_state_ex->remap_table0.remap_index_4 = FRAMEMB_MOTION;
601             vfe_state_ex->remap_table0.remap_index_5 = FRAMEMB_MOTION;
602             vfe_state_ex->remap_table0.remap_index_6 = FRAMEMB_MOTION;
603             vfe_state_ex->remap_table0.remap_index_7 = FRAMEMB_MOTION;
604
605             vfe_state_ex->remap_table1.remap_index_8 = FRAMEMB_MOTION;
606             vfe_state_ex->remap_table1.remap_index_9 = FRAMEMB_MOTION;
607             vfe_state_ex->remap_table1.remap_index_10 = FRAMEMB_MOTION;
608             vfe_state_ex->remap_table1.remap_index_11 = FRAMEMB_MOTION;
609             vfe_state_ex->remap_table1.remap_index_12 = FRAMEMB_MOTION;
610             vfe_state_ex->remap_table1.remap_index_13 = FRAMEMB_MOTION;
611             vfe_state_ex->remap_table1.remap_index_14 = FRAMEMB_MOTION;
612             vfe_state_ex->remap_table1.remap_index_15 = FRAMEMB_MOTION;
613         }
614     } else {
615         vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
616         vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
617         vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
618         vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
619         vfe_state_ex->remap_table0.remap_index_4 = FIELDMB_MOTION;
620         vfe_state_ex->remap_table0.remap_index_5 = FIELDMB_MOTION;
621         vfe_state_ex->remap_table0.remap_index_6 = FIELDMB_MOTION;
622         vfe_state_ex->remap_table0.remap_index_7 = FIELDMB_MOTION;
623
624         vfe_state_ex->remap_table1.remap_index_8 = FIELDMB_MOTION;
625         vfe_state_ex->remap_table1.remap_index_9 = FIELDMB_MOTION;
626         vfe_state_ex->remap_table1.remap_index_10 = FIELDMB_MOTION;
627         vfe_state_ex->remap_table1.remap_index_11 = FIELDMB_MOTION;
628         vfe_state_ex->remap_table1.remap_index_12 = FIELDMB_MOTION;
629         vfe_state_ex->remap_table1.remap_index_13 = FIELDMB_MOTION;
630         vfe_state_ex->remap_table1.remap_index_14 = FIELDMB_MOTION;
631         vfe_state_ex->remap_table1.remap_index_15 = FIELDMB_MOTION;
632     }
633
634     if (i965_h264_context->use_avc_hw_scoreboard) {
635         vfe_state_ex->scoreboard0.enable = 1;
636         vfe_state_ex->scoreboard0.type = SCOREBOARD_STALLING;
637         vfe_state_ex->scoreboard0.mask = 0xff;
638
639         vfe_state_ex->scoreboard1.delta_x0 = -1;
640         vfe_state_ex->scoreboard1.delta_y0 = 0;
641         vfe_state_ex->scoreboard1.delta_x1 = 0;
642         vfe_state_ex->scoreboard1.delta_y1 = -1;
643         vfe_state_ex->scoreboard1.delta_x2 = 1;
644         vfe_state_ex->scoreboard1.delta_y2 = -1;
645         vfe_state_ex->scoreboard1.delta_x3 = -1;
646         vfe_state_ex->scoreboard1.delta_y3 = -1;
647
648         vfe_state_ex->scoreboard2.delta_x4 = -1;
649         vfe_state_ex->scoreboard2.delta_y4 = 1;
650         vfe_state_ex->scoreboard2.delta_x5 = 0;
651         vfe_state_ex->scoreboard2.delta_y5 = -2;
652         vfe_state_ex->scoreboard2.delta_x6 = 1;
653         vfe_state_ex->scoreboard2.delta_y6 = -2;
654         vfe_state_ex->scoreboard2.delta_x7 = -1;
655         vfe_state_ex->scoreboard2.delta_y7 = -2;
656     }
657
658     dri_bo_unmap(media_state->extended_state.bo);
659 }
660
661 static void
662 i965_media_h264_upload_constants(VADriverContextP ctx, struct decode_state *decode_state)
663 {
664     struct i965_driver_data *i965 = i965_driver_data(ctx);
665     struct i965_media_state *media_state = &i965->media_state;
666     struct i965_h264_context *i965_h264_context;
667     unsigned char *constant_buffer;
668     VASliceParameterBufferH264 *slice_param;
669
670     assert(media_state->private_context);
671     i965_h264_context = (struct i965_h264_context *)media_state->private_context;
672
673     assert(decode_state->slice_params[0] && decode_state->slice_params[0]->buffer);
674     slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[0]->buffer;
675
676     dri_bo_map(media_state->curbe.bo, 1);
677     assert(media_state->curbe.bo->virtual);
678     constant_buffer = media_state->curbe.bo->virtual;
679
680     /* HW solution for W=128 */
681     if (i965_h264_context->use_hw_w128) {
682         memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
683     } else {
684         if (slice_param->slice_type == SLICE_TYPE_I ||
685             slice_param->slice_type == SLICE_TYPE_SI) {
686             memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
687         } else {
688             /* FIXME: Need to upload CURBE data to inter kernel interface 
689              * to support weighted prediction work-around 
690              */
691             *(short *)constant_buffer = i965_h264_context->weight128_offset0;
692             constant_buffer += 2;
693             *(char *)constant_buffer = i965_h264_context->weight128_offset0_flag;
694             constant_buffer++;
695             *constant_buffer = 0;
696         }
697     }
698
699     dri_bo_unmap(media_state->curbe.bo);
700 }
701
702 static void
703 i965_media_h264_states_setup(VADriverContextP ctx, struct decode_state *decode_state)
704 {
705     struct i965_driver_data *i965 = i965_driver_data(ctx);
706     struct i965_media_state *media_state = &i965->media_state;
707     struct i965_h264_context *i965_h264_context;
708     VAPictureParameterBufferH264 *pic_param;
709     unsigned int *object_command;
710
711     assert(media_state->private_context);
712     i965_h264_context = (struct i965_h264_context *)media_state->private_context;
713     assert(decode_state->pic_param && decode_state->pic_param->buffer);
714     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
715
716     i965_h264_context->picture.width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
717     i965_h264_context->picture.height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff) / 
718         (1 + !!pic_param->pic_fields.bits.field_pic_flag); /* picture height */
719     i965_h264_context->picture.mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
720                                                    !pic_param->pic_fields.bits.field_pic_flag);
721
722     i965_h264_context->avc_it_command_mb_info.mbs = (i965_h264_context->picture.width_in_mbs * 
723                                                      i965_h264_context->picture.height_in_mbs);
724
725     i965_avc_bsd_pipeline(ctx, decode_state);
726
727     dri_bo_map(i965_h264_context->avc_it_command_mb_info.bo, True);
728     assert(i965_h264_context->avc_it_command_mb_info.bo->virtual);
729     object_command = i965_h264_context->avc_it_command_mb_info.bo->virtual;
730     memset(object_command, 0, i965_h264_context->avc_it_command_mb_info.mbs * i965_h264_context->use_avc_hw_scoreboard * MB_CMD_IN_BYTES);
731     object_command += i965_h264_context->avc_it_command_mb_info.mbs * (1 + i965_h264_context->use_avc_hw_scoreboard) * MB_CMD_IN_DWS;
732     *object_command = MI_BATCH_BUFFER_END;
733     dri_bo_unmap(i965_h264_context->avc_it_command_mb_info.bo);
734
735     i965_avc_hw_scoreboard(ctx, decode_state);
736
737     i965_media_h264_surfaces_setup(ctx, decode_state);
738     i965_media_h264_binding_table(ctx);
739     i965_media_h264_interface_descriptor_remap_table(ctx);
740     i965_media_h264_vfe_state_extension(ctx, decode_state);
741     i965_media_h264_vfe_state(ctx);
742     i965_media_h264_upload_constants(ctx, decode_state);
743 }
744
745 static void
746 i965_media_h264_objects(VADriverContextP ctx, struct decode_state *decode_state)
747 {
748     struct i965_driver_data *i965 = i965_driver_data(ctx);
749     struct i965_media_state *media_state = &i965->media_state;
750     struct i965_h264_context *i965_h264_context;
751
752     assert(media_state->private_context);
753     i965_h264_context = (struct i965_h264_context *)media_state->private_context;
754     
755     BEGIN_BATCH(ctx, 2);
756     OUT_BATCH(ctx, MI_BATCH_BUFFER_START | (2 << 6));
757     OUT_RELOC(ctx, i965_h264_context->avc_it_command_mb_info.bo, 
758               I915_GEM_DOMAIN_COMMAND, 0, 
759               0);
760     ADVANCE_BATCH(ctx);
761 }
762
763 void
764 i965_media_h264_decode_init(VADriverContextP ctx)
765 {
766     struct i965_driver_data *i965 = i965_driver_data(ctx);
767     struct i965_media_state *media_state = &i965->media_state;
768     struct i965_h264_context *i965_h264_context;
769     dri_bo *bo;
770
771     assert(media_state->private_context);
772     i965_h264_context = media_state->private_context;
773
774     dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
775     bo = dri_bo_alloc(i965->intel.bufmgr,
776                       "avc it command mb info",
777                       0x80000 * (1 + i965_h264_context->use_avc_hw_scoreboard),  /* at least 522240 bytes */
778                       0x1000);
779     assert(bo);
780     i965_h264_context->avc_it_command_mb_info.bo = bo;
781
782     dri_bo_unreference(i965_h264_context->avc_it_data.bo);
783     bo = dri_bo_alloc(i965->intel.bufmgr,
784                       "avc it data",
785                       0x1000000, /* at least 16711680 bytes */
786                       4096);
787     assert(bo);
788     i965_h264_context->avc_it_data.bo = bo;
789     i965_h264_context->avc_it_data.write_offset = 0;
790     dri_bo_unreference(media_state->indirect_object.bo);
791     media_state->indirect_object.bo = bo;
792     dri_bo_reference(media_state->indirect_object.bo);
793     media_state->indirect_object.offset = i965_h264_context->avc_it_data.write_offset;
794
795     /* bsd pipeline */
796     i965_avc_bsd_decode_init(ctx);
797
798     /* HW scoreboard */
799     i965_avc_hw_scoreboard_decode_init(ctx);
800
801     /* for Media pipeline */
802     media_state->extended_state.enabled = 1;
803     dri_bo_unreference(media_state->extended_state.bo);
804     bo = dri_bo_alloc(i965->intel.bufmgr, 
805                       "extened vfe state", 
806                       sizeof(struct i965_vfe_state_ex), 32);
807     assert(bo);
808     media_state->extended_state.bo = bo;
809
810     /* URB */
811     if (IS_IRONLAKE(i965->intel.device_id)) {
812         media_state->urb.num_vfe_entries = 63;
813     } else {
814         media_state->urb.num_vfe_entries = 23;
815     }
816
817     media_state->urb.size_vfe_entry = 16;
818
819     media_state->urb.num_cs_entries = 1;
820     media_state->urb.size_cs_entry = 1;
821
822     media_state->urb.vfe_start = 0;
823     media_state->urb.cs_start = media_state->urb.vfe_start + 
824         media_state->urb.num_vfe_entries * media_state->urb.size_vfe_entry;
825     assert(media_state->urb.cs_start + 
826            media_state->urb.num_cs_entries * media_state->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
827
828     /* hook functions */
829     media_state->media_states_setup = i965_media_h264_states_setup;
830     media_state->media_objects = i965_media_h264_objects;
831 }
832
833 Bool 
834 i965_media_h264_init(VADriverContextP ctx)
835 {
836     struct i965_driver_data *i965 = i965_driver_data(ctx);
837     struct i965_media_state *media_state = &i965->media_state;
838     struct i965_h264_context *i965_h264_context;
839     int i;
840
841     i965_h264_context = calloc(1, sizeof(struct i965_h264_context));
842
843     /* kernel */
844     assert(NUM_H264_AVC_KERNELS == (sizeof(h264_avc_kernels_gen5) / 
845                                     sizeof(h264_avc_kernels_gen5[0])));
846     assert(NUM_AVC_MC_INTERFACES == (sizeof(avc_mc_kernel_offset_gen5) /
847                                      sizeof(avc_mc_kernel_offset_gen5[0])));
848
849     if (IS_IRONLAKE(i965->intel.device_id)) {
850         h264_avc_kernels = h264_avc_kernels_gen5;
851         avc_mc_kernel_offset = avc_mc_kernel_offset_gen5;
852         intra_kernel_header = &intra_kernel_header_gen5;
853         i965_h264_context->use_avc_hw_scoreboard = 1;
854         i965_h264_context->use_hw_w128 = 1;
855     } else {
856         h264_avc_kernels = h264_avc_kernels_gen4;
857         avc_mc_kernel_offset = avc_mc_kernel_offset_gen4;
858         intra_kernel_header = &intra_kernel_header_gen4;
859         i965_h264_context->use_avc_hw_scoreboard = 0;
860         i965_h264_context->use_hw_w128 = 0;
861     }
862
863     for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
864         struct media_kernel *kernel = &h264_avc_kernels[i];
865         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
866                                   kernel->name, 
867                                   kernel->size, 64);
868         assert(kernel->bo);
869         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
870     }
871
872     for (i = 0; i < 16; i++) {
873         i965_h264_context->fsid_list[i].surface_id = VA_INVALID_ID;
874         i965_h264_context->fsid_list[i].frame_store_id = -1;
875     }
876
877     media_state->private_context = i965_h264_context;
878     return True;
879 }
880
881 Bool 
882 i965_media_h264_ternimate(VADriverContextP ctx)
883 {
884     struct i965_driver_data *i965 = i965_driver_data(ctx);
885     struct i965_media_state *media_state = &i965->media_state;
886     struct i965_h264_context *i965_h264_context;
887     int i;
888
889     if (media_state->private_context) {
890         i965_h264_context = (struct i965_h264_context *)media_state->private_context;
891         i965_avc_hw_scoreboard_ternimate(&i965_h264_context->avc_hw_scoreboard_context);
892         i965_avc_bsd_ternimate(&i965_h264_context->i965_avc_bsd_context);
893         dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
894         dri_bo_unreference(i965_h264_context->avc_it_data.bo);
895         free(i965_h264_context);
896         media_state->private_context = NULL;
897     }
898
899     for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
900         struct media_kernel *kernel = &h264_avc_kernels[i];
901
902         dri_bo_unreference(kernel->bo);
903         kernel->bo = NULL;
904     }
905
906     return True;
907 }