VPP: Set the alpha channel when doing the conversion from NV12 to RGBA on Ivy/Haswell/BDW
[platform/upstream/libva-intel-driver.git] / src / i965_avc_bsd.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28 #include "sysdeps.h"
29
30 #include "intel_batchbuffer.h"
31 #include "intel_driver.h"
32
33 #include "i965_defines.h"
34 #include "i965_drv_video.h"
35 #include "i965_avc_bsd.h"
36 #include "i965_media_h264.h"
37 #include "i965_media.h"
38 #include "i965_decoder_utils.h"
39 #include "intel_media.h"
40
41 static void
42 i965_avc_bsd_init_avc_bsd_surface(VADriverContextP ctx, 
43                                   struct object_surface *obj_surface,
44                                   VAPictureParameterBufferH264 *pic_param,
45                                   struct i965_h264_context *i965_h264_context)
46 {
47     struct i965_driver_data *i965 = i965_driver_data(ctx);
48     GenAvcSurface *avc_bsd_surface = obj_surface->private_data;
49
50     obj_surface->free_private_data = gen_free_avc_surface;
51
52     if (!avc_bsd_surface) {
53         avc_bsd_surface = calloc(sizeof(GenAvcSurface), 1);
54         assert((obj_surface->size & 0x3f) == 0);
55         obj_surface->private_data = avc_bsd_surface;
56     }
57
58     avc_bsd_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
59                                         !pic_param->seq_fields.bits.direct_8x8_inference_flag);
60
61     if (avc_bsd_surface->dmv_top == NULL) {
62         avc_bsd_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
63                                                 "direct mv w/r buffer",
64                                                 DMV_SIZE,
65                                                 0x1000);
66     }
67
68     if (avc_bsd_surface->dmv_bottom_flag &&
69         avc_bsd_surface->dmv_bottom == NULL) {
70         avc_bsd_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
71                                                    "direct mv w/r buffer",
72                                                    DMV_SIZE,
73                                                    0x1000);
74     }
75 }
76
77 static void
78 i965_bsd_ind_obj_base_address(VADriverContextP ctx,
79                               struct decode_state *decode_state,
80                               int slice,
81                               struct i965_h264_context *i965_h264_context)
82                               
83 {
84     struct intel_batchbuffer *batch = i965_h264_context->batch;
85
86     dri_bo *ind_bo = decode_state->slice_datas[slice]->bo;
87
88     BEGIN_BCS_BATCH(batch, 3);
89     OUT_BCS_BATCH(batch, CMD_BSD_IND_OBJ_BASE_ADDR | (3 - 2));
90     OUT_BCS_RELOC(batch, ind_bo,
91                   I915_GEM_DOMAIN_INSTRUCTION, 0,
92                   0);
93     OUT_BCS_BATCH(batch, 0);
94     ADVANCE_BCS_BATCH(batch);
95 }
96
97 static void
98 i965_avc_bsd_img_state(VADriverContextP ctx,
99                        struct decode_state *decode_state,
100                        struct i965_h264_context *i965_h264_context)
101 {
102     struct intel_batchbuffer *batch = i965_h264_context->batch;
103     int qm_present_flag;
104     int img_struct;
105     int mbaff_frame_flag;
106     unsigned int avc_it_command_header;
107     unsigned int width_in_mbs, height_in_mbs;
108     VAPictureParameterBufferH264 *pic_param;
109
110     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
111         qm_present_flag = 1;
112     else
113         qm_present_flag = 0; /* built-in QM matrices */
114
115     assert(decode_state->pic_param && decode_state->pic_param->buffer);
116     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
117
118     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
119         img_struct = 1;
120     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
121         img_struct = 3;
122     else
123         img_struct = 0;
124
125     if ((img_struct & 0x1) == 0x1) {
126         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
127     } else {
128         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
129     }
130
131     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
132         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
133         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
134     } else {
135         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
136     }
137
138     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
139                         !pic_param->pic_fields.bits.field_pic_flag);
140
141     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
142     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
143                                                                                
144     assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */
145
146     /* BSD unit doesn't support 4:2:2 and 4:4:4 picture */
147     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
148            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
149     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
150
151     avc_it_command_header = (CMD_MEDIA_OBJECT_EX | (12 - 2));
152
153     BEGIN_BCS_BATCH(batch, 6);
154     OUT_BCS_BATCH(batch, CMD_AVC_BSD_IMG_STATE | (6 - 2));
155     OUT_BCS_BATCH(batch, 
156                   ((width_in_mbs * height_in_mbs) & 0x7fff));
157     OUT_BCS_BATCH(batch, 
158                   (height_in_mbs << 16) | 
159                   (width_in_mbs << 0));
160     OUT_BCS_BATCH(batch, 
161                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
162                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) | 
163                   (SCAN_RASTER_ORDER << 15) | /* AVC ILDB Data */
164                   (SCAN_SPECIAL_ORDER << 14) | /* AVC IT Command */
165                   (SCAN_RASTER_ORDER << 13) | /* AVC IT Data */
166                   (1 << 12) | /* always 1, hardware requirement */
167                   (qm_present_flag << 10) |
168                   (img_struct << 8) |
169                   (16 << 0)); /* FIXME: always support 16 reference frames ??? */
170     OUT_BCS_BATCH(batch,
171                   (RESIDUAL_DATA_OFFSET << 24) | /* residual data offset */
172                   (0 << 17) | /* don't overwrite SRT */
173                   (0 << 16) | /* Un-SRT (Unsynchronized Root Thread) */
174                   (0 << 12) | /* FIXME: no 16MV ??? */
175                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
176                   (i965_h264_context->enable_avc_ildb << 8)  | /* Enable ILDB writing output */
177                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
178                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
179                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
180                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
181                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
182                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
183                   (mbaff_frame_flag << 1) |
184                   (pic_param->pic_fields.bits.field_pic_flag << 0));
185     OUT_BCS_BATCH(batch, avc_it_command_header);
186     ADVANCE_BCS_BATCH(batch);
187 }
188
189 static void
190 i965_avc_bsd_qm_state(VADriverContextP ctx,
191                       struct decode_state *decode_state,
192                       struct i965_h264_context *i965_h264_context)
193 {
194     struct intel_batchbuffer *batch = i965_h264_context->batch;
195     int cmd_len;
196     VAIQMatrixBufferH264 *iq_matrix;
197     VAPictureParameterBufferH264 *pic_param;
198
199     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
200         return;
201
202     iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
203
204     assert(decode_state->pic_param && decode_state->pic_param->buffer);
205     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
206
207     cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */
208
209     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
210         cmd_len += 2 * 16; /* load two 8x8 scaling matrices */
211
212     BEGIN_BCS_BATCH(batch, cmd_len);
213     OUT_BCS_BATCH(batch, CMD_AVC_BSD_QM_STATE | (cmd_len - 2));
214
215     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
216         OUT_BCS_BATCH(batch, 
217                       (0x0  << 8) | /* don't use default built-in matrices */
218                       (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */
219     else
220         OUT_BCS_BATCH(batch, 
221                       (0x0  << 8) | /* don't use default built-in matrices */
222                       (0x3f << 0)); /* six 4x4 scaling matrices */
223
224     intel_batchbuffer_data(batch, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4);
225
226     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
227         intel_batchbuffer_data(batch, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4);
228
229     ADVANCE_BCS_BATCH(batch);
230 }
231
232 static void
233 i965_avc_bsd_slice_state(VADriverContextP ctx, 
234                          VAPictureParameterBufferH264 *pic_param, 
235                          VASliceParameterBufferH264 *slice_param,
236                          struct i965_h264_context *i965_h264_context)
237 {
238     struct intel_batchbuffer *batch = i965_h264_context->batch;
239     int present_flag, cmd_len, list, j;
240     uint8_t ref_idx_state[32];
241     char weightoffsets[32 * 6];
242
243     /* don't issue SLICE_STATE for intra-prediction decoding */
244     if (slice_param->slice_type == SLICE_TYPE_I ||
245         slice_param->slice_type == SLICE_TYPE_SI)
246         return;
247
248     cmd_len = 2;
249
250     if (slice_param->slice_type == SLICE_TYPE_P ||
251         slice_param->slice_type == SLICE_TYPE_SP) {
252         present_flag = PRESENT_REF_LIST0;
253         cmd_len += 8;
254     } else { 
255         present_flag = PRESENT_REF_LIST0 | PRESENT_REF_LIST1;
256         cmd_len += 16;
257     }
258
259     if ((slice_param->slice_type == SLICE_TYPE_P ||
260          slice_param->slice_type == SLICE_TYPE_SP) && 
261         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
262         present_flag |= PRESENT_WEIGHT_OFFSET_L0;
263         cmd_len += 48;
264     }
265
266     if ((slice_param->slice_type == SLICE_TYPE_B) &&
267         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
268         present_flag |= PRESENT_WEIGHT_OFFSET_L0 | PRESENT_WEIGHT_OFFSET_L1;
269         cmd_len += 96;
270     }
271
272     BEGIN_BCS_BATCH(batch, cmd_len);
273     OUT_BCS_BATCH(batch, CMD_AVC_BSD_SLICE_STATE | (cmd_len - 2));
274     OUT_BCS_BATCH(batch, present_flag);
275
276     for (list = 0; list < 2; list++) {
277         int flag, num_va_pics;
278         VAPictureH264 *va_pic;
279
280         if (list == 0) {
281             flag        = PRESENT_REF_LIST0;
282             va_pic      = slice_param->RefPicList0;
283             num_va_pics = slice_param->num_ref_idx_l0_active_minus1 + 1;
284         } else {
285             flag        = PRESENT_REF_LIST1;
286             va_pic      = slice_param->RefPicList1;
287             num_va_pics = slice_param->num_ref_idx_l1_active_minus1 + 1;
288         }
289
290         if (!(present_flag & flag))
291             continue;
292
293         gen5_fill_avc_ref_idx_state(
294             ref_idx_state,
295             va_pic, num_va_pics,
296             i965_h264_context->fsid_list
297         );            
298         intel_batchbuffer_data(batch, ref_idx_state, sizeof(ref_idx_state));
299     }
300
301     i965_h264_context->weight128_luma_l0 = 0;
302     i965_h264_context->weight128_luma_l1 = 0;
303     i965_h264_context->weight128_chroma_l0 = 0;
304     i965_h264_context->weight128_chroma_l1 = 0;
305
306     i965_h264_context->weight128_offset0_flag = 0;
307     i965_h264_context->weight128_offset0 = 0;
308
309     if (present_flag & PRESENT_WEIGHT_OFFSET_L0) {
310         for (j = 0; j < 32; j++) {
311             weightoffsets[j * 6 + 0] = slice_param->luma_offset_l0[j];
312             weightoffsets[j * 6 + 1] = slice_param->luma_weight_l0[j];
313             weightoffsets[j * 6 + 2] = slice_param->chroma_offset_l0[j][0];
314             weightoffsets[j * 6 + 3] = slice_param->chroma_weight_l0[j][0];
315             weightoffsets[j * 6 + 4] = slice_param->chroma_offset_l0[j][1];
316             weightoffsets[j * 6 + 5] = slice_param->chroma_weight_l0[j][1];
317
318             if (pic_param->pic_fields.bits.weighted_pred_flag == 1 ||
319                 pic_param->pic_fields.bits.weighted_bipred_idc == 1) {
320                 if (i965_h264_context->use_hw_w128) {
321                     if (slice_param->luma_weight_l0[j] == 128)
322                         i965_h264_context->weight128_luma_l0 |= (1 << j);
323
324                     if (slice_param->chroma_weight_l0[j][0] == 128 ||
325                         slice_param->chroma_weight_l0[j][1] == 128)
326                         i965_h264_context->weight128_chroma_l0 |= (1 << j);
327                 } else {
328                     /* FIXME: workaround for weight 128 */
329                     if (slice_param->luma_weight_l0[j] == 128 ||
330                         slice_param->chroma_weight_l0[j][0] == 128 ||
331                         slice_param->chroma_weight_l0[j][1] == 128)
332                         i965_h264_context->weight128_offset0_flag = 1;
333                 }
334             }
335         }
336
337         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
338     }
339
340     if (present_flag & PRESENT_WEIGHT_OFFSET_L1) {
341         for (j = 0; j < 32; j++) {
342             weightoffsets[j * 6 + 0] = slice_param->luma_offset_l1[j];
343             weightoffsets[j * 6 + 1] = slice_param->luma_weight_l1[j];
344             weightoffsets[j * 6 + 2] = slice_param->chroma_offset_l1[j][0];
345             weightoffsets[j * 6 + 3] = slice_param->chroma_weight_l1[j][0];
346             weightoffsets[j * 6 + 4] = slice_param->chroma_offset_l1[j][1];
347             weightoffsets[j * 6 + 5] = slice_param->chroma_weight_l1[j][1];
348
349             if (pic_param->pic_fields.bits.weighted_bipred_idc == 1) {
350                 if (i965_h264_context->use_hw_w128) {
351                     if (slice_param->luma_weight_l1[j] == 128)
352                         i965_h264_context->weight128_luma_l1 |= (1 << j);
353
354                     if (slice_param->chroma_weight_l1[j][0] == 128 ||
355                         slice_param->chroma_weight_l1[j][1] == 128)
356                         i965_h264_context->weight128_chroma_l1 |= (1 << j);
357                 } else {
358                     if (slice_param->luma_weight_l0[j] == 128 ||
359                         slice_param->chroma_weight_l0[j][0] == 128 ||
360                         slice_param->chroma_weight_l0[j][1] == 128)
361                         i965_h264_context->weight128_offset0_flag = 1;
362                 }
363             }
364         }
365
366         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
367     }
368
369     ADVANCE_BCS_BATCH(batch);
370 }
371
372 static void
373 i965_avc_bsd_buf_base_state(VADriverContextP ctx,
374                             struct decode_state *decode_state,
375                             VAPictureParameterBufferH264 *pic_param, 
376                             VASliceParameterBufferH264 *slice_param,
377                             struct i965_h264_context *i965_h264_context)
378 {
379     struct intel_batchbuffer *batch = i965_h264_context->batch;
380     struct i965_avc_bsd_context *i965_avc_bsd_context;
381     int i, j;
382     VAPictureH264 *va_pic;
383     struct object_surface *obj_surface;
384     GenAvcSurface *avc_bsd_surface;
385
386     i965_avc_bsd_context = &i965_h264_context->i965_avc_bsd_context;
387
388     BEGIN_BCS_BATCH(batch, 74);
389     OUT_BCS_BATCH(batch, CMD_AVC_BSD_BUF_BASE_STATE | (74 - 2));
390     OUT_BCS_RELOC(batch, i965_avc_bsd_context->bsd_raw_store.bo,
391                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
392                   0);
393     OUT_BCS_RELOC(batch, i965_avc_bsd_context->mpr_row_store.bo,
394                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
395                   0);
396     OUT_BCS_RELOC(batch, i965_h264_context->avc_it_command_mb_info.bo,
397                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
398                   i965_h264_context->avc_it_command_mb_info.mbs * i965_h264_context->use_avc_hw_scoreboard * MB_CMD_IN_BYTES);
399     OUT_BCS_RELOC(batch, i965_h264_context->avc_it_data.bo,
400                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
401                   (i965_h264_context->avc_it_data.write_offset << 6));
402
403     if (i965_h264_context->enable_avc_ildb)
404         OUT_BCS_RELOC(batch, i965_h264_context->avc_ildb_data.bo,
405                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
406                       0);
407     else
408         OUT_BCS_BATCH(batch, 0);
409
410     for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
411         if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID &&
412             i965_h264_context->fsid_list[i].obj_surface &&
413             i965_h264_context->fsid_list[i].obj_surface->private_data) {
414             int found = 0;
415             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
416                 va_pic = &pic_param->ReferenceFrames[j];
417                 
418                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
419                     continue;
420
421                 if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) {
422                     found = 1;
423                     break;
424                 }
425             }
426
427             assert(found == 1);
428             obj_surface = i965_h264_context->fsid_list[i].obj_surface;
429             avc_bsd_surface = obj_surface->private_data;
430             
431             OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
432                           I915_GEM_DOMAIN_INSTRUCTION, 0,
433                           0);
434
435             if (avc_bsd_surface->dmv_bottom_flag == 1)
436                 OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_bottom,
437                               I915_GEM_DOMAIN_INSTRUCTION, 0,
438                               0);
439             else
440                 OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
441                               I915_GEM_DOMAIN_INSTRUCTION, 0,
442                               0);
443         } else {
444             OUT_BCS_BATCH(batch, 0);
445             OUT_BCS_BATCH(batch, 0);
446         }
447     }
448
449     va_pic = &pic_param->CurrPic;
450     obj_surface = decode_state->render_object;
451     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
452     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
453     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
454
455     /* initial uv component for YUV400 case */
456     if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
457          unsigned int uv_offset = obj_surface->width * obj_surface->height; 
458          unsigned int uv_size   = obj_surface->width * obj_surface->height / 2; 
459
460          dri_bo_map(obj_surface->bo, 1);
461          memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
462          dri_bo_unmap(obj_surface->bo);
463     }
464
465     i965_avc_bsd_init_avc_bsd_surface(ctx, obj_surface, pic_param, i965_h264_context);
466     avc_bsd_surface = obj_surface->private_data;
467
468     OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
469                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
470                   0);
471
472     if (avc_bsd_surface->dmv_bottom_flag == 1)
473         OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_bottom,
474                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
475                       0);
476     else
477         OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
478                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
479                       0);
480
481     /* POC List */
482     for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
483         if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID) {
484             int found = 0;
485             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
486                 va_pic = &pic_param->ReferenceFrames[j];
487                 
488                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
489                     continue;
490
491                 if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) {
492                     found = 1;
493                     break;
494                 }
495             }
496
497             assert(found == 1);
498
499             if (!(va_pic->flags & VA_PICTURE_H264_INVALID)) {
500                 OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
501                 OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
502             } 
503         } else {
504             OUT_BCS_BATCH(batch, 0);
505             OUT_BCS_BATCH(batch, 0);
506         }
507     }
508
509     va_pic = &pic_param->CurrPic;
510     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
511     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
512
513     ADVANCE_BCS_BATCH(batch);
514 }
515
516 static void
517 g4x_avc_bsd_object(VADriverContextP ctx, 
518                    struct decode_state *decode_state,
519                    VAPictureParameterBufferH264 *pic_param,
520                    VASliceParameterBufferH264 *slice_param,
521                    int slice_index,
522                    struct i965_h264_context *i965_h264_context)
523 {
524     struct intel_batchbuffer *batch = i965_h264_context->batch;
525     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
526     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
527
528     if (slice_param) {
529         int encrypted, counter_value, cmd_len;
530         int slice_hor_pos, slice_ver_pos;
531         int num_ref_idx_l0, num_ref_idx_l1;
532         int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
533                              pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
534         unsigned int slice_data_bit_offset;
535         int weighted_pred_idc = 0;
536         int first_mb_in_slice = 0;
537         int slice_type;
538
539         encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
540
541         if (encrypted) {
542             cmd_len = 9;
543             counter_value = 0; /* FIXME: ??? */
544         } else 
545             cmd_len = 8;
546
547
548         slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb(
549             decode_state->slice_datas[slice_index]->bo,
550             slice_param,
551             pic_param->pic_fields.bits.entropy_coding_mode_flag
552         );
553
554         if (slice_param->slice_type == SLICE_TYPE_I ||
555             slice_param->slice_type == SLICE_TYPE_SI)
556             slice_type = SLICE_TYPE_I;
557         else if (slice_param->slice_type == SLICE_TYPE_P ||
558                  slice_param->slice_type == SLICE_TYPE_SP)
559             slice_type = SLICE_TYPE_P;
560         else {
561             assert(slice_param->slice_type == SLICE_TYPE_B);
562             slice_type = SLICE_TYPE_B;
563         }
564
565         if (slice_type == SLICE_TYPE_I) {
566             assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
567             assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
568             num_ref_idx_l0 = 0;
569             num_ref_idx_l1 = 0;
570         } else if (slice_type == SLICE_TYPE_P) {
571             assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
572             num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
573             num_ref_idx_l1 = 0;
574         } else {
575             num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
576             num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
577         }
578
579         if (slice_type == SLICE_TYPE_P)
580             weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
581         else if (slice_type == SLICE_TYPE_B)
582             weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
583
584         first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
585         slice_hor_pos = first_mb_in_slice % width_in_mbs; 
586         slice_ver_pos = first_mb_in_slice / width_in_mbs;
587
588         BEGIN_BCS_BATCH(batch, cmd_len);
589         OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (cmd_len - 2));
590         OUT_BCS_BATCH(batch, 
591                       (encrypted << 31) |
592                       ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
593         OUT_BCS_BATCH(batch, 
594                       (slice_param->slice_data_offset +
595                        (slice_data_bit_offset >> 3)));
596         OUT_BCS_BATCH(batch, 
597                       (0 << 31) | /* concealment mode: 0->intra 16x16 prediction, 1->inter P Copy */
598                       (0 << 14) | /* ignore BSDPrematureComplete Error handling */
599                       (0 << 13) | /* FIXME: ??? */
600                       (0 << 12) | /* ignore MPR Error handling */
601                       (0 << 10) | /* ignore Entropy Error handling */
602                       (0 << 8)  | /* ignore MB Header Error handling */
603                       (slice_type << 0));
604         OUT_BCS_BATCH(batch, 
605                       (num_ref_idx_l1 << 24) |
606                       (num_ref_idx_l0 << 16) |
607                       (slice_param->chroma_log2_weight_denom << 8) |
608                       (slice_param->luma_log2_weight_denom << 0));
609         OUT_BCS_BATCH(batch, 
610                       (weighted_pred_idc << 30) |
611                       (slice_param->direct_spatial_mv_pred_flag << 29) |
612                       (slice_param->disable_deblocking_filter_idc << 27) |
613                       (slice_param->cabac_init_idc << 24) |
614                       ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
615                       ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
616                       ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
617         OUT_BCS_BATCH(batch, 
618                       (slice_ver_pos << 24) |
619                       (slice_hor_pos << 16) | 
620                       (first_mb_in_slice << 0));
621         OUT_BCS_BATCH(batch, 
622                       (1 << 7) |
623                       ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
624
625         if (encrypted) {
626             OUT_BCS_BATCH(batch, counter_value);
627         }
628
629         ADVANCE_BCS_BATCH(batch); 
630     } else {
631         BEGIN_BCS_BATCH(batch, 8); 
632         OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (8 - 2));
633         OUT_BCS_BATCH(batch, 0); /* indirect data length for phantom slice is 0 */
634         OUT_BCS_BATCH(batch, 0); /* indirect data start address for phantom slice is 0 */
635         OUT_BCS_BATCH(batch, 0);
636         OUT_BCS_BATCH(batch, 0);
637         OUT_BCS_BATCH(batch, 0);
638         OUT_BCS_BATCH(batch, width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
639         OUT_BCS_BATCH(batch, 0);
640         ADVANCE_BCS_BATCH(batch);
641     }
642 }
643
644 static void
645 ironlake_avc_bsd_object(VADriverContextP ctx, 
646                         struct decode_state *decode_state,
647                         VAPictureParameterBufferH264 *pic_param,
648                         VASliceParameterBufferH264 *slice_param,
649                         int slice_index,
650                         struct i965_h264_context *i965_h264_context)
651 {
652     struct intel_batchbuffer *batch = i965_h264_context->batch;
653     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
654     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
655
656     if (slice_param) {
657         int encrypted, counter_value;
658         int slice_hor_pos, slice_ver_pos;
659         int num_ref_idx_l0, num_ref_idx_l1;
660         int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
661                              pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
662         unsigned int slice_data_bit_offset;
663         int weighted_pred_idc = 0;
664         int first_mb_in_slice;
665         int slice_type;
666
667         encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
668
669         if (encrypted) {
670             counter_value = 0; /* FIXME: ??? */
671         } else 
672             counter_value = 0;
673
674         slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb(
675             decode_state->slice_datas[slice_index]->bo,
676             slice_param,
677             pic_param->pic_fields.bits.entropy_coding_mode_flag
678         );
679
680         if (slice_param->slice_type == SLICE_TYPE_I ||
681             slice_param->slice_type == SLICE_TYPE_SI)
682             slice_type = SLICE_TYPE_I;
683         else if (slice_param->slice_type == SLICE_TYPE_P ||
684                  slice_param->slice_type == SLICE_TYPE_SP)
685             slice_type = SLICE_TYPE_P;
686         else {
687             assert(slice_param->slice_type == SLICE_TYPE_B);
688             slice_type = SLICE_TYPE_B;
689         }
690
691         if (slice_type == SLICE_TYPE_I) {
692             assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
693             assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
694             num_ref_idx_l0 = 0;
695             num_ref_idx_l1 = 0;
696         } else if (slice_type == SLICE_TYPE_P) {
697             assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
698             num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
699             num_ref_idx_l1 = 0;
700         } else {
701             num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
702             num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
703         }
704
705         if (slice_type == SLICE_TYPE_P)
706             weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
707         else if (slice_type == SLICE_TYPE_B)
708             weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
709
710         first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
711         slice_hor_pos = first_mb_in_slice % width_in_mbs; 
712         slice_ver_pos = first_mb_in_slice / width_in_mbs;
713
714         BEGIN_BCS_BATCH(batch, 16);
715         OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (16 - 2));
716         OUT_BCS_BATCH(batch, 
717                       (encrypted << 31) |
718                       (0 << 30) | /* FIXME: packet based bit stream */
719                       (0 << 29) | /* FIXME: packet format */
720                       ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
721         OUT_BCS_BATCH(batch, 
722                       (slice_param->slice_data_offset +
723                        (slice_data_bit_offset >> 3)));
724         OUT_BCS_BATCH(batch, 
725                       (0 << 31) | /* concealment mode: 0->intra 16x16 prediction, 1->inter P Copy */
726                       (0 << 14) | /* ignore BSDPrematureComplete Error handling */
727                       (0 << 13) | /* FIXME: ??? */
728                       (0 << 12) | /* ignore MPR Error handling */
729                       (0 << 10) | /* ignore Entropy Error handling */
730                       (0 << 8)  | /* ignore MB Header Error handling */
731                       (slice_type << 0));
732         OUT_BCS_BATCH(batch, 
733                       (num_ref_idx_l1 << 24) |
734                       (num_ref_idx_l0 << 16) |
735                       (slice_param->chroma_log2_weight_denom << 8) |
736                       (slice_param->luma_log2_weight_denom << 0));
737         OUT_BCS_BATCH(batch, 
738                       (weighted_pred_idc << 30) |
739                       (slice_param->direct_spatial_mv_pred_flag << 29) |
740                       (slice_param->disable_deblocking_filter_idc << 27) |
741                       (slice_param->cabac_init_idc << 24) |
742                       ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
743                       ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
744                       ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
745         OUT_BCS_BATCH(batch, 
746                       (slice_ver_pos << 24) |
747                       (slice_hor_pos << 16) | 
748                       (first_mb_in_slice << 0));
749         OUT_BCS_BATCH(batch, 
750                       (1 << 7) |
751                       ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
752         OUT_BCS_BATCH(batch, counter_value);
753         
754         /* FIXME: dw9-dw11 */
755         OUT_BCS_BATCH(batch, 0);
756         OUT_BCS_BATCH(batch, 0);
757         OUT_BCS_BATCH(batch, 0);
758         OUT_BCS_BATCH(batch, i965_h264_context->weight128_luma_l0);
759         OUT_BCS_BATCH(batch, i965_h264_context->weight128_luma_l1);
760         OUT_BCS_BATCH(batch, i965_h264_context->weight128_chroma_l0);
761         OUT_BCS_BATCH(batch, i965_h264_context->weight128_chroma_l1);
762
763         ADVANCE_BCS_BATCH(batch); 
764     } else {
765         BEGIN_BCS_BATCH(batch, 16);
766         OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (16 - 2));
767         OUT_BCS_BATCH(batch, 0); /* indirect data length for phantom slice is 0 */
768         OUT_BCS_BATCH(batch, 0); /* indirect data start address for phantom slice is 0 */
769         OUT_BCS_BATCH(batch, 0);
770         OUT_BCS_BATCH(batch, 0);
771         OUT_BCS_BATCH(batch, 0);
772         OUT_BCS_BATCH(batch, width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
773         OUT_BCS_BATCH(batch, 0);
774         OUT_BCS_BATCH(batch, 0);
775         OUT_BCS_BATCH(batch, 0);
776         OUT_BCS_BATCH(batch, 0);
777         OUT_BCS_BATCH(batch, 0);
778         OUT_BCS_BATCH(batch, 0);
779         OUT_BCS_BATCH(batch, 0);
780         OUT_BCS_BATCH(batch, 0);
781         OUT_BCS_BATCH(batch, 0);
782         ADVANCE_BCS_BATCH(batch);
783     }
784 }
785
786 static void
787 i965_avc_bsd_object(VADriverContextP ctx, 
788                     struct decode_state *decode_state,
789                     VAPictureParameterBufferH264 *pic_param,
790                     VASliceParameterBufferH264 *slice_param,
791                     int slice_index,
792                     struct i965_h264_context *i965_h264_context)
793 {
794     struct i965_driver_data *i965 = i965_driver_data(ctx);
795
796     if (IS_IRONLAKE(i965->intel.device_id))
797         ironlake_avc_bsd_object(ctx, decode_state, pic_param, slice_param, slice_index, i965_h264_context);
798     else
799         g4x_avc_bsd_object(ctx, decode_state, pic_param, slice_param, slice_index, i965_h264_context);
800 }
801
802 static void
803 i965_avc_bsd_phantom_slice(VADriverContextP ctx, 
804                            struct decode_state *decode_state,
805                            VAPictureParameterBufferH264 *pic_param,
806                            struct i965_h264_context *i965_h264_context)
807 {
808     i965_avc_bsd_object(ctx, decode_state, pic_param, NULL, 0, i965_h264_context);
809 }
810
811 void 
812 i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state, void *h264_context)
813 {
814     struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context;
815     struct intel_batchbuffer *batch = i965_h264_context->batch;
816     VAPictureParameterBufferH264 *pic_param;
817     VASliceParameterBufferH264 *slice_param;
818     int i, j;
819
820     assert(decode_state->pic_param && decode_state->pic_param->buffer);
821     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
822     intel_update_avc_frame_store_index(ctx, decode_state, pic_param, i965_h264_context->fsid_list);
823
824     i965_h264_context->enable_avc_ildb = 0;
825     i965_h264_context->picture.i_flag = 1;
826
827     for (j = 0; j < decode_state->num_slice_params && i965_h264_context->enable_avc_ildb == 0; j++) {
828         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
829         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
830
831         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
832             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
833             assert((slice_param->slice_type == SLICE_TYPE_I) ||
834                    (slice_param->slice_type == SLICE_TYPE_SI) ||
835                    (slice_param->slice_type == SLICE_TYPE_P) ||
836                    (slice_param->slice_type == SLICE_TYPE_SP) ||
837                    (slice_param->slice_type == SLICE_TYPE_B));
838
839             if (slice_param->disable_deblocking_filter_idc != 1) {
840                 i965_h264_context->enable_avc_ildb = 1;
841                 break;
842             }
843
844             slice_param++;
845         }
846     }
847
848     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
849
850     i965_avc_bsd_img_state(ctx, decode_state, i965_h264_context);
851     i965_avc_bsd_qm_state(ctx, decode_state, i965_h264_context);
852
853     for (j = 0; j < decode_state->num_slice_params; j++) {
854         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
855         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
856
857         i965_bsd_ind_obj_base_address(ctx, decode_state, j, i965_h264_context);
858
859         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
860             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
861             assert((slice_param->slice_type == SLICE_TYPE_I) ||
862                    (slice_param->slice_type == SLICE_TYPE_SI) ||
863                    (slice_param->slice_type == SLICE_TYPE_P) ||
864                    (slice_param->slice_type == SLICE_TYPE_SP) ||
865                    (slice_param->slice_type == SLICE_TYPE_B));
866
867             if (i965_h264_context->picture.i_flag && 
868                 (slice_param->slice_type != SLICE_TYPE_I ||
869                  slice_param->slice_type != SLICE_TYPE_SI))
870                 i965_h264_context->picture.i_flag = 0;
871
872             i965_avc_bsd_slice_state(ctx, pic_param, slice_param, i965_h264_context);
873             i965_avc_bsd_buf_base_state(ctx, decode_state, pic_param, slice_param, i965_h264_context);
874             i965_avc_bsd_object(ctx, decode_state, pic_param, slice_param, j, i965_h264_context);
875             slice_param++;
876         }
877     }
878
879     i965_avc_bsd_phantom_slice(ctx, decode_state, pic_param, i965_h264_context);
880     intel_batchbuffer_emit_mi_flush(batch);
881     intel_batchbuffer_end_atomic(batch);
882     intel_batchbuffer_flush(batch);
883 }
884
885 void 
886 i965_avc_bsd_decode_init(VADriverContextP ctx, void *h264_context)
887 {
888     struct i965_driver_data *i965 = i965_driver_data(ctx);
889     struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context;
890     struct i965_avc_bsd_context *i965_avc_bsd_context;
891     dri_bo *bo;
892
893     assert(i965_h264_context);
894     i965_avc_bsd_context = &i965_h264_context->i965_avc_bsd_context;
895
896     dri_bo_unreference(i965_avc_bsd_context->bsd_raw_store.bo);
897     bo = dri_bo_alloc(i965->intel.bufmgr,
898                       "bsd raw store",
899                       0x3000, /* at least 11520 bytes to support 120 MBs per row */
900                       64);
901     assert(bo);
902     i965_avc_bsd_context->bsd_raw_store.bo = bo;
903
904     dri_bo_unreference(i965_avc_bsd_context->mpr_row_store.bo);
905     bo = dri_bo_alloc(i965->intel.bufmgr,
906                       "mpr row store",
907                       0x2000, /* at least 7680 bytes to support 120 MBs per row */
908                       64);
909     assert(bo);
910     i965_avc_bsd_context->mpr_row_store.bo = bo;
911 }
912
913 Bool 
914 i965_avc_bsd_ternimate(struct i965_avc_bsd_context *i965_avc_bsd_context)
915 {
916     dri_bo_unreference(i965_avc_bsd_context->bsd_raw_store.bo);
917     dri_bo_unreference(i965_avc_bsd_context->mpr_row_store.bo);
918
919     return True;
920 }