ebeb2a6ed31e6280af152844b621f2b2b32f2ee1
[platform/upstream/libva-intel-driver.git] / src / i965_avc_bsd.c
1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sub license, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial portions
14  * of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Xiang Haihao <haihao.xiang@intel.com>
26  *
27  */
28 #include "sysdeps.h"
29
30 #include "intel_batchbuffer.h"
31 #include "intel_driver.h"
32
33 #include "i965_defines.h"
34 #include "i965_drv_video.h"
35 #include "i965_avc_bsd.h"
36 #include "i965_media_h264.h"
37 #include "i965_media.h"
38 #include "i965_decoder_utils.h"
39 #include "intel_media.h"
40
41 static void
42 i965_avc_bsd_init_avc_bsd_surface(VADriverContextP ctx, 
43                                   struct object_surface *obj_surface,
44                                   VAPictureParameterBufferH264 *pic_param,
45                                   struct i965_h264_context *i965_h264_context)
46 {
47     struct i965_driver_data *i965 = i965_driver_data(ctx);
48     GenAvcSurface *avc_bsd_surface = obj_surface->private_data;
49
50     obj_surface->free_private_data = gen_free_avc_surface;
51
52     if (!avc_bsd_surface) {
53         avc_bsd_surface = calloc(sizeof(GenAvcSurface), 1);
54         avc_bsd_surface->frame_store_id = -1;
55         assert((obj_surface->size & 0x3f) == 0);
56         obj_surface->private_data = avc_bsd_surface;
57     }
58
59     avc_bsd_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
60                                         !pic_param->seq_fields.bits.direct_8x8_inference_flag);
61
62     if (avc_bsd_surface->dmv_top == NULL) {
63         avc_bsd_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
64                                                 "direct mv w/r buffer",
65                                                 DMV_SIZE,
66                                                 0x1000);
67     }
68
69     if (avc_bsd_surface->dmv_bottom_flag &&
70         avc_bsd_surface->dmv_bottom == NULL) {
71         avc_bsd_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
72                                                    "direct mv w/r buffer",
73                                                    DMV_SIZE,
74                                                    0x1000);
75     }
76 }
77
78 static void
79 i965_bsd_ind_obj_base_address(VADriverContextP ctx,
80                               struct decode_state *decode_state,
81                               int slice,
82                               struct i965_h264_context *i965_h264_context)
83                               
84 {
85     struct intel_batchbuffer *batch = i965_h264_context->batch;
86
87     dri_bo *ind_bo = decode_state->slice_datas[slice]->bo;
88
89     BEGIN_BCS_BATCH(batch, 3);
90     OUT_BCS_BATCH(batch, CMD_BSD_IND_OBJ_BASE_ADDR | (3 - 2));
91     OUT_BCS_RELOC(batch, ind_bo,
92                   I915_GEM_DOMAIN_INSTRUCTION, 0,
93                   0);
94     OUT_BCS_BATCH(batch, 0);
95     ADVANCE_BCS_BATCH(batch);
96 }
97
98 static void
99 i965_avc_bsd_img_state(VADriverContextP ctx,
100                        struct decode_state *decode_state,
101                        struct i965_h264_context *i965_h264_context)
102 {
103     struct intel_batchbuffer *batch = i965_h264_context->batch;
104     int qm_present_flag;
105     int img_struct;
106     int mbaff_frame_flag;
107     unsigned int avc_it_command_header;
108     unsigned int width_in_mbs, height_in_mbs;
109     VAPictureParameterBufferH264 *pic_param;
110
111     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
112         qm_present_flag = 1;
113     else
114         qm_present_flag = 0; /* built-in QM matrices */
115
116     assert(decode_state->pic_param && decode_state->pic_param->buffer);
117     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
118
119     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
120         img_struct = 1;
121     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
122         img_struct = 3;
123     else
124         img_struct = 0;
125
126     if ((img_struct & 0x1) == 0x1) {
127         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
128     } else {
129         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
130     }
131
132     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
133         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
134         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
135     } else {
136         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
137     }
138
139     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
140                         !pic_param->pic_fields.bits.field_pic_flag);
141
142     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
143     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
144                                                                                
145     assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */
146
147     /* BSD unit doesn't support 4:2:2 and 4:4:4 picture */
148     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
149            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
150     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
151
152     avc_it_command_header = (CMD_MEDIA_OBJECT_EX | (12 - 2));
153
154     BEGIN_BCS_BATCH(batch, 6);
155     OUT_BCS_BATCH(batch, CMD_AVC_BSD_IMG_STATE | (6 - 2));
156     OUT_BCS_BATCH(batch, 
157                   ((width_in_mbs * height_in_mbs) & 0x7fff));
158     OUT_BCS_BATCH(batch, 
159                   (height_in_mbs << 16) | 
160                   (width_in_mbs << 0));
161     OUT_BCS_BATCH(batch, 
162                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
163                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) | 
164                   (SCAN_RASTER_ORDER << 15) | /* AVC ILDB Data */
165                   (SCAN_SPECIAL_ORDER << 14) | /* AVC IT Command */
166                   (SCAN_RASTER_ORDER << 13) | /* AVC IT Data */
167                   (1 << 12) | /* always 1, hardware requirement */
168                   (qm_present_flag << 10) |
169                   (img_struct << 8) |
170                   (16 << 0)); /* FIXME: always support 16 reference frames ??? */
171     OUT_BCS_BATCH(batch,
172                   (RESIDUAL_DATA_OFFSET << 24) | /* residual data offset */
173                   (0 << 17) | /* don't overwrite SRT */
174                   (0 << 16) | /* Un-SRT (Unsynchronized Root Thread) */
175                   (0 << 12) | /* FIXME: no 16MV ??? */
176                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
177                   (i965_h264_context->enable_avc_ildb << 8)  | /* Enable ILDB writing output */
178                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
179                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
180                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
181                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
182                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
183                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
184                   (mbaff_frame_flag << 1) |
185                   (pic_param->pic_fields.bits.field_pic_flag << 0));
186     OUT_BCS_BATCH(batch, avc_it_command_header);
187     ADVANCE_BCS_BATCH(batch);
188 }
189
190 static void
191 i965_avc_bsd_qm_state(VADriverContextP ctx,
192                       struct decode_state *decode_state,
193                       struct i965_h264_context *i965_h264_context)
194 {
195     struct intel_batchbuffer *batch = i965_h264_context->batch;
196     int cmd_len;
197     VAIQMatrixBufferH264 *iq_matrix;
198     VAPictureParameterBufferH264 *pic_param;
199
200     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
201         return;
202
203     iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
204
205     assert(decode_state->pic_param && decode_state->pic_param->buffer);
206     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
207
208     cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */
209
210     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
211         cmd_len += 2 * 16; /* load two 8x8 scaling matrices */
212
213     BEGIN_BCS_BATCH(batch, cmd_len);
214     OUT_BCS_BATCH(batch, CMD_AVC_BSD_QM_STATE | (cmd_len - 2));
215
216     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
217         OUT_BCS_BATCH(batch, 
218                       (0x0  << 8) | /* don't use default built-in matrices */
219                       (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */
220     else
221         OUT_BCS_BATCH(batch, 
222                       (0x0  << 8) | /* don't use default built-in matrices */
223                       (0x3f << 0)); /* six 4x4 scaling matrices */
224
225     intel_batchbuffer_data(batch, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4);
226
227     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
228         intel_batchbuffer_data(batch, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4);
229
230     ADVANCE_BCS_BATCH(batch);
231 }
232
233 static void
234 i965_avc_bsd_slice_state(VADriverContextP ctx, 
235                          VAPictureParameterBufferH264 *pic_param, 
236                          VASliceParameterBufferH264 *slice_param,
237                          struct i965_h264_context *i965_h264_context)
238 {
239     struct intel_batchbuffer *batch = i965_h264_context->batch;
240     int present_flag, cmd_len, list, j;
241     uint8_t ref_idx_state[32];
242     char weightoffsets[32 * 6];
243
244     /* don't issue SLICE_STATE for intra-prediction decoding */
245     if (slice_param->slice_type == SLICE_TYPE_I ||
246         slice_param->slice_type == SLICE_TYPE_SI)
247         return;
248
249     cmd_len = 2;
250
251     if (slice_param->slice_type == SLICE_TYPE_P ||
252         slice_param->slice_type == SLICE_TYPE_SP) {
253         present_flag = PRESENT_REF_LIST0;
254         cmd_len += 8;
255     } else { 
256         present_flag = PRESENT_REF_LIST0 | PRESENT_REF_LIST1;
257         cmd_len += 16;
258     }
259
260     if ((slice_param->slice_type == SLICE_TYPE_P ||
261          slice_param->slice_type == SLICE_TYPE_SP) && 
262         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
263         present_flag |= PRESENT_WEIGHT_OFFSET_L0;
264         cmd_len += 48;
265     }
266
267     if ((slice_param->slice_type == SLICE_TYPE_B) &&
268         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
269         present_flag |= PRESENT_WEIGHT_OFFSET_L0 | PRESENT_WEIGHT_OFFSET_L1;
270         cmd_len += 96;
271     }
272
273     BEGIN_BCS_BATCH(batch, cmd_len);
274     OUT_BCS_BATCH(batch, CMD_AVC_BSD_SLICE_STATE | (cmd_len - 2));
275     OUT_BCS_BATCH(batch, present_flag);
276
277     for (list = 0; list < 2; list++) {
278         int flag, num_va_pics;
279         VAPictureH264 *va_pic;
280
281         if (list == 0) {
282             flag        = PRESENT_REF_LIST0;
283             va_pic      = slice_param->RefPicList0;
284             num_va_pics = slice_param->num_ref_idx_l0_active_minus1 + 1;
285         } else {
286             flag        = PRESENT_REF_LIST1;
287             va_pic      = slice_param->RefPicList1;
288             num_va_pics = slice_param->num_ref_idx_l1_active_minus1 + 1;
289         }
290
291         if (!(present_flag & flag))
292             continue;
293
294         gen5_fill_avc_ref_idx_state(
295             ref_idx_state,
296             va_pic, num_va_pics,
297             i965_h264_context->fsid_list
298         );            
299         intel_batchbuffer_data(batch, ref_idx_state, sizeof(ref_idx_state));
300     }
301
302     i965_h264_context->weight128_luma_l0 = 0;
303     i965_h264_context->weight128_luma_l1 = 0;
304     i965_h264_context->weight128_chroma_l0 = 0;
305     i965_h264_context->weight128_chroma_l1 = 0;
306
307     i965_h264_context->weight128_offset0_flag = 0;
308     i965_h264_context->weight128_offset0 = 0;
309
310     if (present_flag & PRESENT_WEIGHT_OFFSET_L0) {
311         for (j = 0; j < 32; j++) {
312             weightoffsets[j * 6 + 0] = slice_param->luma_offset_l0[j];
313             weightoffsets[j * 6 + 1] = slice_param->luma_weight_l0[j];
314             weightoffsets[j * 6 + 2] = slice_param->chroma_offset_l0[j][0];
315             weightoffsets[j * 6 + 3] = slice_param->chroma_weight_l0[j][0];
316             weightoffsets[j * 6 + 4] = slice_param->chroma_offset_l0[j][1];
317             weightoffsets[j * 6 + 5] = slice_param->chroma_weight_l0[j][1];
318
319             if (pic_param->pic_fields.bits.weighted_pred_flag == 1 ||
320                 pic_param->pic_fields.bits.weighted_bipred_idc == 1) {
321                 if (i965_h264_context->use_hw_w128) {
322                     if (slice_param->luma_weight_l0[j] == 128)
323                         i965_h264_context->weight128_luma_l0 |= (1 << j);
324
325                     if (slice_param->chroma_weight_l0[j][0] == 128 ||
326                         slice_param->chroma_weight_l0[j][1] == 128)
327                         i965_h264_context->weight128_chroma_l0 |= (1 << j);
328                 } else {
329                     /* FIXME: workaround for weight 128 */
330                     if (slice_param->luma_weight_l0[j] == 128 ||
331                         slice_param->chroma_weight_l0[j][0] == 128 ||
332                         slice_param->chroma_weight_l0[j][1] == 128)
333                         i965_h264_context->weight128_offset0_flag = 1;
334                 }
335             }
336         }
337
338         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
339     }
340
341     if (present_flag & PRESENT_WEIGHT_OFFSET_L1) {
342         for (j = 0; j < 32; j++) {
343             weightoffsets[j * 6 + 0] = slice_param->luma_offset_l1[j];
344             weightoffsets[j * 6 + 1] = slice_param->luma_weight_l1[j];
345             weightoffsets[j * 6 + 2] = slice_param->chroma_offset_l1[j][0];
346             weightoffsets[j * 6 + 3] = slice_param->chroma_weight_l1[j][0];
347             weightoffsets[j * 6 + 4] = slice_param->chroma_offset_l1[j][1];
348             weightoffsets[j * 6 + 5] = slice_param->chroma_weight_l1[j][1];
349
350             if (pic_param->pic_fields.bits.weighted_bipred_idc == 1) {
351                 if (i965_h264_context->use_hw_w128) {
352                     if (slice_param->luma_weight_l1[j] == 128)
353                         i965_h264_context->weight128_luma_l1 |= (1 << j);
354
355                     if (slice_param->chroma_weight_l1[j][0] == 128 ||
356                         slice_param->chroma_weight_l1[j][1] == 128)
357                         i965_h264_context->weight128_chroma_l1 |= (1 << j);
358                 } else {
359                     if (slice_param->luma_weight_l0[j] == 128 ||
360                         slice_param->chroma_weight_l0[j][0] == 128 ||
361                         slice_param->chroma_weight_l0[j][1] == 128)
362                         i965_h264_context->weight128_offset0_flag = 1;
363                 }
364             }
365         }
366
367         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
368     }
369
370     ADVANCE_BCS_BATCH(batch);
371 }
372
373 static void
374 i965_avc_bsd_buf_base_state(VADriverContextP ctx,
375                             struct decode_state *decode_state,
376                             VAPictureParameterBufferH264 *pic_param, 
377                             VASliceParameterBufferH264 *slice_param,
378                             struct i965_h264_context *i965_h264_context)
379 {
380     struct intel_batchbuffer *batch = i965_h264_context->batch;
381     struct i965_avc_bsd_context *i965_avc_bsd_context;
382     int i;
383     VAPictureH264 *va_pic;
384     struct object_surface *obj_surface;
385     GenAvcSurface *avc_bsd_surface;
386
387     i965_avc_bsd_context = &i965_h264_context->i965_avc_bsd_context;
388
389     BEGIN_BCS_BATCH(batch, 74);
390     OUT_BCS_BATCH(batch, CMD_AVC_BSD_BUF_BASE_STATE | (74 - 2));
391     OUT_BCS_RELOC(batch, i965_avc_bsd_context->bsd_raw_store.bo,
392                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
393                   0);
394     OUT_BCS_RELOC(batch, i965_avc_bsd_context->mpr_row_store.bo,
395                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
396                   0);
397     OUT_BCS_RELOC(batch, i965_h264_context->avc_it_command_mb_info.bo,
398                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
399                   i965_h264_context->avc_it_command_mb_info.mbs * i965_h264_context->use_avc_hw_scoreboard * MB_CMD_IN_BYTES);
400     OUT_BCS_RELOC(batch, i965_h264_context->avc_it_data.bo,
401                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
402                   (i965_h264_context->avc_it_data.write_offset << 6));
403
404     if (i965_h264_context->enable_avc_ildb)
405         OUT_BCS_RELOC(batch, i965_h264_context->avc_ildb_data.bo,
406                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
407                       0);
408     else
409         OUT_BCS_BATCH(batch, 0);
410
411     for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
412         obj_surface = i965_h264_context->fsid_list[i].obj_surface;
413         if (obj_surface && obj_surface->private_data) {
414             avc_bsd_surface = obj_surface->private_data;
415             
416             OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
417                           I915_GEM_DOMAIN_INSTRUCTION, 0,
418                           0);
419
420             if (avc_bsd_surface->dmv_bottom_flag == 1)
421                 OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_bottom,
422                               I915_GEM_DOMAIN_INSTRUCTION, 0,
423                               0);
424             else
425                 OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
426                               I915_GEM_DOMAIN_INSTRUCTION, 0,
427                               0);
428         } else {
429             OUT_BCS_BATCH(batch, 0);
430             OUT_BCS_BATCH(batch, 0);
431         }
432     }
433
434     va_pic = &pic_param->CurrPic;
435     obj_surface = decode_state->render_object;
436     if (pic_param->pic_fields.bits.reference_pic_flag)
437         obj_surface->flags |= SURFACE_REFERENCED;
438     else
439         obj_surface->flags &= ~SURFACE_REFERENCED;
440     i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
441
442     /* initial uv component for YUV400 case */
443     if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
444          unsigned int uv_offset = obj_surface->width * obj_surface->height; 
445          unsigned int uv_size   = obj_surface->width * obj_surface->height / 2; 
446
447          dri_bo_map(obj_surface->bo, 1);
448          memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
449          dri_bo_unmap(obj_surface->bo);
450     }
451
452     i965_avc_bsd_init_avc_bsd_surface(ctx, obj_surface, pic_param, i965_h264_context);
453     avc_bsd_surface = obj_surface->private_data;
454
455     OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
456                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
457                   0);
458
459     if (avc_bsd_surface->dmv_bottom_flag == 1)
460         OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_bottom,
461                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
462                       0);
463     else
464         OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
465                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
466                       0);
467
468     /* POC List */
469     for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
470         obj_surface = i965_h264_context->fsid_list[i].obj_surface;
471
472         if (obj_surface) {
473             const VAPictureH264 * const va_pic = avc_find_picture(
474                 obj_surface->base.id, pic_param->ReferenceFrames,
475                 ARRAY_ELEMS(pic_param->ReferenceFrames));
476
477             assert(va_pic != NULL);
478             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
479             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
480         } else {
481             OUT_BCS_BATCH(batch, 0);
482             OUT_BCS_BATCH(batch, 0);
483         }
484     }
485
486     va_pic = &pic_param->CurrPic;
487     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
488     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
489
490     ADVANCE_BCS_BATCH(batch);
491 }
492
493 static void
494 g4x_avc_bsd_object(VADriverContextP ctx, 
495                    struct decode_state *decode_state,
496                    VAPictureParameterBufferH264 *pic_param,
497                    VASliceParameterBufferH264 *slice_param,
498                    int slice_index,
499                    struct i965_h264_context *i965_h264_context)
500 {
501     struct intel_batchbuffer *batch = i965_h264_context->batch;
502     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
503     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
504
505     if (slice_param) {
506         int encrypted, counter_value, cmd_len;
507         int slice_hor_pos, slice_ver_pos;
508         int num_ref_idx_l0, num_ref_idx_l1;
509         int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
510                              pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
511         unsigned int slice_data_bit_offset;
512         int weighted_pred_idc = 0;
513         int first_mb_in_slice = 0;
514         int slice_type;
515
516         encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
517
518         if (encrypted) {
519             cmd_len = 9;
520             counter_value = 0; /* FIXME: ??? */
521         } else 
522             cmd_len = 8;
523
524
525         slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb(
526             decode_state->slice_datas[slice_index]->bo,
527             slice_param,
528             pic_param->pic_fields.bits.entropy_coding_mode_flag
529         );
530
531         if (slice_param->slice_type == SLICE_TYPE_I ||
532             slice_param->slice_type == SLICE_TYPE_SI)
533             slice_type = SLICE_TYPE_I;
534         else if (slice_param->slice_type == SLICE_TYPE_P ||
535                  slice_param->slice_type == SLICE_TYPE_SP)
536             slice_type = SLICE_TYPE_P;
537         else {
538             assert(slice_param->slice_type == SLICE_TYPE_B);
539             slice_type = SLICE_TYPE_B;
540         }
541
542         if (slice_type == SLICE_TYPE_I) {
543             assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
544             assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
545             num_ref_idx_l0 = 0;
546             num_ref_idx_l1 = 0;
547         } else if (slice_type == SLICE_TYPE_P) {
548             assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
549             num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
550             num_ref_idx_l1 = 0;
551         } else {
552             num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
553             num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
554         }
555
556         if (slice_type == SLICE_TYPE_P)
557             weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
558         else if (slice_type == SLICE_TYPE_B)
559             weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
560
561         first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
562         slice_hor_pos = first_mb_in_slice % width_in_mbs; 
563         slice_ver_pos = first_mb_in_slice / width_in_mbs;
564
565         BEGIN_BCS_BATCH(batch, cmd_len);
566         OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (cmd_len - 2));
567         OUT_BCS_BATCH(batch, 
568                       (encrypted << 31) |
569                       ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
570         OUT_BCS_BATCH(batch, 
571                       (slice_param->slice_data_offset +
572                        (slice_data_bit_offset >> 3)));
573         OUT_BCS_BATCH(batch, 
574                       (0 << 31) | /* concealment mode: 0->intra 16x16 prediction, 1->inter P Copy */
575                       (0 << 14) | /* ignore BSDPrematureComplete Error handling */
576                       (0 << 13) | /* FIXME: ??? */
577                       (0 << 12) | /* ignore MPR Error handling */
578                       (0 << 10) | /* ignore Entropy Error handling */
579                       (0 << 8)  | /* ignore MB Header Error handling */
580                       (slice_type << 0));
581         OUT_BCS_BATCH(batch, 
582                       (num_ref_idx_l1 << 24) |
583                       (num_ref_idx_l0 << 16) |
584                       (slice_param->chroma_log2_weight_denom << 8) |
585                       (slice_param->luma_log2_weight_denom << 0));
586         OUT_BCS_BATCH(batch, 
587                       (weighted_pred_idc << 30) |
588                       (slice_param->direct_spatial_mv_pred_flag << 29) |
589                       (slice_param->disable_deblocking_filter_idc << 27) |
590                       (slice_param->cabac_init_idc << 24) |
591                       ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
592                       ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
593                       ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
594         OUT_BCS_BATCH(batch, 
595                       (slice_ver_pos << 24) |
596                       (slice_hor_pos << 16) | 
597                       (first_mb_in_slice << 0));
598         OUT_BCS_BATCH(batch, 
599                       (1 << 7) |
600                       ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
601
602         if (encrypted) {
603             OUT_BCS_BATCH(batch, counter_value);
604         }
605
606         ADVANCE_BCS_BATCH(batch); 
607     } else {
608         BEGIN_BCS_BATCH(batch, 8); 
609         OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (8 - 2));
610         OUT_BCS_BATCH(batch, 0); /* indirect data length for phantom slice is 0 */
611         OUT_BCS_BATCH(batch, 0); /* indirect data start address for phantom slice is 0 */
612         OUT_BCS_BATCH(batch, 0);
613         OUT_BCS_BATCH(batch, 0);
614         OUT_BCS_BATCH(batch, 0);
615         OUT_BCS_BATCH(batch, width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
616         OUT_BCS_BATCH(batch, 0);
617         ADVANCE_BCS_BATCH(batch);
618     }
619 }
620
621 static void
622 ironlake_avc_bsd_object(VADriverContextP ctx, 
623                         struct decode_state *decode_state,
624                         VAPictureParameterBufferH264 *pic_param,
625                         VASliceParameterBufferH264 *slice_param,
626                         int slice_index,
627                         struct i965_h264_context *i965_h264_context)
628 {
629     struct intel_batchbuffer *batch = i965_h264_context->batch;
630     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
631     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
632
633     if (slice_param) {
634         int encrypted, counter_value;
635         int slice_hor_pos, slice_ver_pos;
636         int num_ref_idx_l0, num_ref_idx_l1;
637         int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
638                              pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
639         unsigned int slice_data_bit_offset;
640         int weighted_pred_idc = 0;
641         int first_mb_in_slice;
642         int slice_type;
643
644         encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
645
646         if (encrypted) {
647             counter_value = 0; /* FIXME: ??? */
648         } else 
649             counter_value = 0;
650
651         slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb(
652             decode_state->slice_datas[slice_index]->bo,
653             slice_param,
654             pic_param->pic_fields.bits.entropy_coding_mode_flag
655         );
656
657         if (slice_param->slice_type == SLICE_TYPE_I ||
658             slice_param->slice_type == SLICE_TYPE_SI)
659             slice_type = SLICE_TYPE_I;
660         else if (slice_param->slice_type == SLICE_TYPE_P ||
661                  slice_param->slice_type == SLICE_TYPE_SP)
662             slice_type = SLICE_TYPE_P;
663         else {
664             assert(slice_param->slice_type == SLICE_TYPE_B);
665             slice_type = SLICE_TYPE_B;
666         }
667
668         if (slice_type == SLICE_TYPE_I) {
669             assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
670             assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
671             num_ref_idx_l0 = 0;
672             num_ref_idx_l1 = 0;
673         } else if (slice_type == SLICE_TYPE_P) {
674             assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
675             num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
676             num_ref_idx_l1 = 0;
677         } else {
678             num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
679             num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
680         }
681
682         if (slice_type == SLICE_TYPE_P)
683             weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
684         else if (slice_type == SLICE_TYPE_B)
685             weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
686
687         first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
688         slice_hor_pos = first_mb_in_slice % width_in_mbs; 
689         slice_ver_pos = first_mb_in_slice / width_in_mbs;
690
691         BEGIN_BCS_BATCH(batch, 16);
692         OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (16 - 2));
693         OUT_BCS_BATCH(batch, 
694                       (encrypted << 31) |
695                       (0 << 30) | /* FIXME: packet based bit stream */
696                       (0 << 29) | /* FIXME: packet format */
697                       ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
698         OUT_BCS_BATCH(batch, 
699                       (slice_param->slice_data_offset +
700                        (slice_data_bit_offset >> 3)));
701         OUT_BCS_BATCH(batch, 
702                       (0 << 31) | /* concealment mode: 0->intra 16x16 prediction, 1->inter P Copy */
703                       (0 << 14) | /* ignore BSDPrematureComplete Error handling */
704                       (0 << 13) | /* FIXME: ??? */
705                       (0 << 12) | /* ignore MPR Error handling */
706                       (0 << 10) | /* ignore Entropy Error handling */
707                       (0 << 8)  | /* ignore MB Header Error handling */
708                       (slice_type << 0));
709         OUT_BCS_BATCH(batch, 
710                       (num_ref_idx_l1 << 24) |
711                       (num_ref_idx_l0 << 16) |
712                       (slice_param->chroma_log2_weight_denom << 8) |
713                       (slice_param->luma_log2_weight_denom << 0));
714         OUT_BCS_BATCH(batch, 
715                       (weighted_pred_idc << 30) |
716                       (slice_param->direct_spatial_mv_pred_flag << 29) |
717                       (slice_param->disable_deblocking_filter_idc << 27) |
718                       (slice_param->cabac_init_idc << 24) |
719                       ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
720                       ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
721                       ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
722         OUT_BCS_BATCH(batch, 
723                       (slice_ver_pos << 24) |
724                       (slice_hor_pos << 16) | 
725                       (first_mb_in_slice << 0));
726         OUT_BCS_BATCH(batch, 
727                       (1 << 7) |
728                       ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
729         OUT_BCS_BATCH(batch, counter_value);
730         
731         /* FIXME: dw9-dw11 */
732         OUT_BCS_BATCH(batch, 0);
733         OUT_BCS_BATCH(batch, 0);
734         OUT_BCS_BATCH(batch, 0);
735         OUT_BCS_BATCH(batch, i965_h264_context->weight128_luma_l0);
736         OUT_BCS_BATCH(batch, i965_h264_context->weight128_luma_l1);
737         OUT_BCS_BATCH(batch, i965_h264_context->weight128_chroma_l0);
738         OUT_BCS_BATCH(batch, i965_h264_context->weight128_chroma_l1);
739
740         ADVANCE_BCS_BATCH(batch); 
741     } else {
742         BEGIN_BCS_BATCH(batch, 16);
743         OUT_BCS_BATCH(batch, CMD_AVC_BSD_OBJECT | (16 - 2));
744         OUT_BCS_BATCH(batch, 0); /* indirect data length for phantom slice is 0 */
745         OUT_BCS_BATCH(batch, 0); /* indirect data start address for phantom slice is 0 */
746         OUT_BCS_BATCH(batch, 0);
747         OUT_BCS_BATCH(batch, 0);
748         OUT_BCS_BATCH(batch, 0);
749         OUT_BCS_BATCH(batch, width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
750         OUT_BCS_BATCH(batch, 0);
751         OUT_BCS_BATCH(batch, 0);
752         OUT_BCS_BATCH(batch, 0);
753         OUT_BCS_BATCH(batch, 0);
754         OUT_BCS_BATCH(batch, 0);
755         OUT_BCS_BATCH(batch, 0);
756         OUT_BCS_BATCH(batch, 0);
757         OUT_BCS_BATCH(batch, 0);
758         OUT_BCS_BATCH(batch, 0);
759         ADVANCE_BCS_BATCH(batch);
760     }
761 }
762
763 static void
764 i965_avc_bsd_object(VADriverContextP ctx, 
765                     struct decode_state *decode_state,
766                     VAPictureParameterBufferH264 *pic_param,
767                     VASliceParameterBufferH264 *slice_param,
768                     int slice_index,
769                     struct i965_h264_context *i965_h264_context)
770 {
771     struct i965_driver_data *i965 = i965_driver_data(ctx);
772
773     if (IS_IRONLAKE(i965->intel.device_info))
774         ironlake_avc_bsd_object(ctx, decode_state, pic_param, slice_param, slice_index, i965_h264_context);
775     else
776         g4x_avc_bsd_object(ctx, decode_state, pic_param, slice_param, slice_index, i965_h264_context);
777 }
778
779 static void
780 i965_avc_bsd_phantom_slice(VADriverContextP ctx, 
781                            struct decode_state *decode_state,
782                            VAPictureParameterBufferH264 *pic_param,
783                            struct i965_h264_context *i965_h264_context)
784 {
785     i965_avc_bsd_object(ctx, decode_state, pic_param, NULL, 0, i965_h264_context);
786 }
787
788 void 
789 i965_avc_bsd_pipeline(VADriverContextP ctx, struct decode_state *decode_state, void *h264_context)
790 {
791     struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context;
792     struct intel_batchbuffer *batch = i965_h264_context->batch;
793     VAPictureParameterBufferH264 *pic_param;
794     VASliceParameterBufferH264 *slice_param;
795     int i, j;
796
797     assert(decode_state->pic_param && decode_state->pic_param->buffer);
798     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
799     intel_update_avc_frame_store_index(ctx, decode_state, pic_param,
800         i965_h264_context->fsid_list, &i965_h264_context->fs_ctx);
801
802     i965_h264_context->enable_avc_ildb = 0;
803     i965_h264_context->picture.i_flag = 1;
804
805     for (j = 0; j < decode_state->num_slice_params && i965_h264_context->enable_avc_ildb == 0; j++) {
806         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
807         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
808
809         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
810             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
811             assert((slice_param->slice_type == SLICE_TYPE_I) ||
812                    (slice_param->slice_type == SLICE_TYPE_SI) ||
813                    (slice_param->slice_type == SLICE_TYPE_P) ||
814                    (slice_param->slice_type == SLICE_TYPE_SP) ||
815                    (slice_param->slice_type == SLICE_TYPE_B));
816
817             if (slice_param->disable_deblocking_filter_idc != 1) {
818                 i965_h264_context->enable_avc_ildb = 1;
819                 break;
820             }
821
822             slice_param++;
823         }
824     }
825
826     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
827
828     i965_avc_bsd_img_state(ctx, decode_state, i965_h264_context);
829     i965_avc_bsd_qm_state(ctx, decode_state, i965_h264_context);
830
831     for (j = 0; j < decode_state->num_slice_params; j++) {
832         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
833         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
834
835         i965_bsd_ind_obj_base_address(ctx, decode_state, j, i965_h264_context);
836
837         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
838             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
839             assert((slice_param->slice_type == SLICE_TYPE_I) ||
840                    (slice_param->slice_type == SLICE_TYPE_SI) ||
841                    (slice_param->slice_type == SLICE_TYPE_P) ||
842                    (slice_param->slice_type == SLICE_TYPE_SP) ||
843                    (slice_param->slice_type == SLICE_TYPE_B));
844
845             if (i965_h264_context->picture.i_flag && 
846                 (slice_param->slice_type != SLICE_TYPE_I ||
847                  slice_param->slice_type != SLICE_TYPE_SI))
848                 i965_h264_context->picture.i_flag = 0;
849
850             i965_avc_bsd_slice_state(ctx, pic_param, slice_param, i965_h264_context);
851             i965_avc_bsd_buf_base_state(ctx, decode_state, pic_param, slice_param, i965_h264_context);
852             i965_avc_bsd_object(ctx, decode_state, pic_param, slice_param, j, i965_h264_context);
853             slice_param++;
854         }
855     }
856
857     i965_avc_bsd_phantom_slice(ctx, decode_state, pic_param, i965_h264_context);
858     intel_batchbuffer_emit_mi_flush(batch);
859     intel_batchbuffer_end_atomic(batch);
860     intel_batchbuffer_flush(batch);
861 }
862
863 void 
864 i965_avc_bsd_decode_init(VADriverContextP ctx, void *h264_context)
865 {
866     struct i965_driver_data *i965 = i965_driver_data(ctx);
867     struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)h264_context;
868     struct i965_avc_bsd_context *i965_avc_bsd_context;
869     dri_bo *bo;
870
871     assert(i965_h264_context);
872     i965_avc_bsd_context = &i965_h264_context->i965_avc_bsd_context;
873
874     dri_bo_unreference(i965_avc_bsd_context->bsd_raw_store.bo);
875     bo = dri_bo_alloc(i965->intel.bufmgr,
876                       "bsd raw store",
877                       0x3000, /* at least 11520 bytes to support 120 MBs per row */
878                       64);
879     assert(bo);
880     i965_avc_bsd_context->bsd_raw_store.bo = bo;
881
882     dri_bo_unreference(i965_avc_bsd_context->mpr_row_store.bo);
883     bo = dri_bo_alloc(i965->intel.bufmgr,
884                       "mpr row store",
885                       0x2000, /* at least 7680 bytes to support 120 MBs per row */
886                       64);
887     assert(bo);
888     i965_avc_bsd_context->mpr_row_store.bo = bo;
889 }
890
891 Bool 
892 i965_avc_bsd_ternimate(struct i965_avc_bsd_context *i965_avc_bsd_context)
893 {
894     dri_bo_unreference(i965_avc_bsd_context->bsd_raw_store.bo);
895     dri_bo_unreference(i965_avc_bsd_context->mpr_row_store.bo);
896
897     return True;
898 }