h264: fix first macroblock bit offset calculation (ILK, SNB, IVB).
authorGwenole Beauchesne <gwenole.beauchesne@intel.com>
Thu, 1 Mar 2012 17:04:56 +0000 (18:04 +0100)
committerXiang, Haihao <haihao.xiang@intel.com>
Fri, 28 Dec 2012 01:14:37 +0000 (09:14 +0800)
Fix and simplify the scan for emulation_prevention_bytes, thus avoiding
a read beyond the end of the slice data buffer. Besides, if slice_header()
bytes are needed, use dri_bo_get_subdata() instead.

HW specific changes:
- SNB: make the HW skip the emulation prevention bytes itself.
- IVB: fix MFD_AVC_BSD_OBJECT to report the actual slice data buffer size.

Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne@intel.com>
(cherry picked from commit 9b7863bf49dcf8bf1de9b45ce4e986dfd1cca418)

NEWS
src/gen6_mfd.c
src/gen7_mfd.c
src/i965_avc_bsd.c
src/i965_decoder_utils.c
src/i965_decoder_utils.h

diff --git a/NEWS b/NEWS
index 8537681..f4b841a 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,7 @@ Version 1.0.16 - DD.Feb.2012
 * Fix VC-1 motion vector modes for Ivy Bridge
 * Fix MFX_QM_STATE for H.264 flat scaling lists (IVB)
 * Fix and simplify AVC_REF_IDX_STATE setup (ILK, SNB, IVB)
+* Fix and simplify first macroblock bit offset calculation (ILK, SNB, IVB)
 
 Version 1.0.15 - 28.Oct.2011
 * Add auto-generated Debian packaging
index e8b394c..3457566 100755 (executable)
@@ -832,27 +832,6 @@ gen6_mfd_avc_weightoffset_state(VADriverContextP ctx,
     }
 }
 
-static int
-gen6_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
-{
-    int out_slice_data_bit_offset;
-    int slice_header_size = in_slice_data_bit_offset / 8;
-    int i, j;
-
-    for (i = 0, j = 0; i < slice_header_size; i++, j++) {
-        if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
-            i++, j += 2;
-        }
-    }
-
-    out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
-
-    if (mode_flag == ENTROPY_CABAC)
-        out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
-
-    return out_slice_data_bit_offset;
-}
-
 static void
 gen6_mfd_avc_bsd_object(VADriverContextP ctx,
                         VAPictureParameterBufferH264 *pic_param,
@@ -861,21 +840,19 @@ gen6_mfd_avc_bsd_object(VADriverContextP ctx,
                         struct gen6_mfd_context *gen6_mfd_context)
 {
     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
-    int slice_data_bit_offset;
-    uint8_t *slice_data = NULL;
+    unsigned int slice_data_bit_offset;
 
-    dri_bo_map(slice_data_bo, 0);
-    slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
-    slice_data_bit_offset = gen6_mfd_avc_get_slice_bit_offset(slice_data,
-                                                              pic_param->pic_fields.bits.entropy_coding_mode_flag,
-                                                              slice_param->slice_data_bit_offset);
-    dri_bo_unmap(slice_data_bo);
+    slice_data_bit_offset = avc_get_first_mb_bit_offset(
+        slice_data_bo,
+        slice_param,
+        pic_param->pic_fields.bits.entropy_coding_mode_flag
+    );
 
     BEGIN_BCS_BATCH(batch, 6);
     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
     OUT_BCS_BATCH(batch, 
-                  ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
-    OUT_BCS_BATCH(batch, slice_param->slice_data_offset + (slice_data_bit_offset >> 3));
+                  (slice_param->slice_data_size - slice_param->slice_data_offset));
+    OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
     OUT_BCS_BATCH(batch,
                   (0 << 31) |
                   (0 << 14) |
@@ -883,8 +860,8 @@ gen6_mfd_avc_bsd_object(VADriverContextP ctx,
                   (0 << 10) |
                   (0 << 8));
     OUT_BCS_BATCH(batch,
-                  (0 << 16) |
-                  (0 << 6)  |
+                  ((slice_data_bit_offset >> 3) << 16) |
+                  (1 << 6)  |
                   ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
     OUT_BCS_BATCH(batch, 0);
     ADVANCE_BCS_BATCH(batch);
index ca4b5e0..4a8bc27 100755 (executable)
@@ -810,27 +810,6 @@ gen7_mfd_avc_weightoffset_state(VADriverContextP ctx,
     }
 }
 
-static int
-gen7_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
-{
-    int out_slice_data_bit_offset;
-    int slice_header_size = in_slice_data_bit_offset / 8;
-    int i, j;
-
-    for (i = 0, j = 0; i < slice_header_size; i++, j++) {
-        if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
-            i++, j += 2;
-        }
-    }
-
-    out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
-
-    if (mode_flag == ENTROPY_CABAC)
-        out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
-
-    return out_slice_data_bit_offset;
-}
-
 static void
 gen7_mfd_avc_bsd_object(VADriverContextP ctx,
                         VAPictureParameterBufferH264 *pic_param,
@@ -840,21 +819,19 @@ gen7_mfd_avc_bsd_object(VADriverContextP ctx,
                         struct gen7_mfd_context *gen7_mfd_context)
 {
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
-    int slice_data_bit_offset;
-    uint8_t *slice_data = NULL;
+    unsigned int slice_data_bit_offset;
 
-    dri_bo_map(slice_data_bo, 0);
-    slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
-    slice_data_bit_offset = gen7_mfd_avc_get_slice_bit_offset(slice_data,
-                                                              pic_param->pic_fields.bits.entropy_coding_mode_flag,
-                                                              slice_param->slice_data_bit_offset);
-    dri_bo_unmap(slice_data_bo);
+    slice_data_bit_offset = avc_get_first_mb_bit_offset(
+        slice_data_bo,
+        slice_param,
+        pic_param->pic_fields.bits.entropy_coding_mode_flag
+    );
 
     /* the input bitsteam format on GEN7 differs from GEN6 */
     BEGIN_BCS_BATCH(batch, 6);
     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
     OUT_BCS_BATCH(batch, 
-                  (slice_param->slice_data_size));
+                  (slice_param->slice_data_size - slice_param->slice_data_offset));
     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
     OUT_BCS_BATCH(batch,
                   (0 << 31) |
index 5246c88..29a600b 100644 (file)
@@ -527,35 +527,6 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx,
     ADVANCE_BCS_BATCH(batch);
 }
 
-/*
- * Return the bit offset to the first bit of the slice data
- *
- * VASliceParameterBufferH264.slice_data_bit_offset will point into the part
- * of slice header if there are some escaped bytes in the slice header. The offset 
- * to slice data is needed for BSD unit so that BSD unit can fetch right slice data
- * for processing. This fixes conformance case BASQP1_Sony_C.jsv
- */
-static int
-i965_avc_bsd_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
-{
-    int out_slice_data_bit_offset;
-    int slice_header_size = in_slice_data_bit_offset / 8;
-    int i, j;
-
-    for (i = 0, j = 0; i < slice_header_size; i++, j++) {
-        if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
-            i++, j += 2;
-        }
-    }
-
-    out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
-
-    if (mode_flag == ENTROPY_CABAC)
-        out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
-
-    return out_slice_data_bit_offset;
-}
-
 static void
 g4x_avc_bsd_object(VADriverContextP ctx, 
                    struct decode_state *decode_state,
@@ -574,11 +545,10 @@ g4x_avc_bsd_object(VADriverContextP ctx,
         int num_ref_idx_l0, num_ref_idx_l1;
         int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
                              pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
-        int slice_data_bit_offset;
+        unsigned int slice_data_bit_offset;
         int weighted_pred_idc = 0;
         int first_mb_in_slice = 0;
         int slice_type;
-        uint8_t *slice_data = NULL;
 
         encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
 
@@ -588,12 +558,12 @@ g4x_avc_bsd_object(VADriverContextP ctx,
         } else 
             cmd_len = 8;
 
-        dri_bo_map(decode_state->slice_datas[slice_index]->bo, 0);
-        slice_data = (uint8_t *)(decode_state->slice_datas[slice_index]->bo->virtual + slice_param->slice_data_offset);
-        slice_data_bit_offset = i965_avc_bsd_get_slice_bit_offset(slice_data,
-                                                                  pic_param->pic_fields.bits.entropy_coding_mode_flag,
-                                                                  slice_param->slice_data_bit_offset);
-        dri_bo_unmap(decode_state->slice_datas[slice_index]->bo);
+
+        slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb(
+            decode_state->slice_datas[slice_index]->bo,
+            slice_param,
+            pic_param->pic_fields.bits.entropy_coding_mode_flag
+        );
 
         if (slice_param->slice_type == SLICE_TYPE_I ||
             slice_param->slice_type == SLICE_TYPE_SI)
@@ -703,11 +673,10 @@ ironlake_avc_bsd_object(VADriverContextP ctx,
         int num_ref_idx_l0, num_ref_idx_l1;
         int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
                              pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
-        int slice_data_bit_offset;
+        unsigned int slice_data_bit_offset;
         int weighted_pred_idc = 0;
         int first_mb_in_slice;
         int slice_type;
-        uint8_t *slice_data = NULL;
 
         encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
 
@@ -716,12 +685,11 @@ ironlake_avc_bsd_object(VADriverContextP ctx,
         } else 
             counter_value = 0;
 
-        dri_bo_map(decode_state->slice_datas[slice_index]->bo, 0);
-        slice_data = (uint8_t *)(decode_state->slice_datas[slice_index]->bo->virtual + slice_param->slice_data_offset);
-        slice_data_bit_offset = i965_avc_bsd_get_slice_bit_offset(slice_data,
-                                                                  pic_param->pic_fields.bits.entropy_coding_mode_flag,
-                                                                  slice_param->slice_data_bit_offset);
-        dri_bo_unmap(decode_state->slice_datas[slice_index]->bo);
+        slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb(
+            decode_state->slice_datas[slice_index]->bo,
+            slice_param,
+            pic_param->pic_fields.bits.entropy_coding_mode_flag
+        );
 
         if (slice_param->slice_type == SLICE_TYPE_I ||
             slice_param->slice_type == SLICE_TYPE_SI)
index 68e5b50..e735374 100644 (file)
@@ -24,6 +24,7 @@
 #include <assert.h>
 #include <stddef.h>
 #include <string.h>
+#include <alloca.h>
 #include "intel_batchbuffer.h"
 #include "i965_decoder_utils.h"
 #include "i965_drv_video.h"
@@ -170,6 +171,60 @@ avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix)
     memset(&iq_matrix->ScalingList8x8, 16, sizeof(iq_matrix->ScalingList8x8));
 }
 
+/* Get first macroblock bit offset for BSD, minus EPB count (AVC) */
+/* XXX: slice_data_bit_offset does not account for EPB */
+unsigned int
+avc_get_first_mb_bit_offset(
+    dri_bo                     *slice_data_bo,
+    VASliceParameterBufferH264 *slice_param,
+    unsigned int                mode_flag
+)
+{
+    unsigned int slice_data_bit_offset = slice_param->slice_data_bit_offset;
+
+    if (mode_flag == ENTROPY_CABAC)
+        slice_data_bit_offset = ALIGN(slice_data_bit_offset, 0x8);
+    return slice_data_bit_offset;
+}
+
+/* Get first macroblock bit offset for BSD, with EPB count (AVC) */
+/* XXX: slice_data_bit_offset does not account for EPB */
+unsigned int
+avc_get_first_mb_bit_offset_with_epb(
+    dri_bo                     *slice_data_bo,
+    VASliceParameterBufferH264 *slice_param,
+    unsigned int                mode_flag
+)
+{
+    unsigned int in_slice_data_bit_offset = slice_param->slice_data_bit_offset;
+    unsigned int out_slice_data_bit_offset;
+    unsigned int i, n, buf_size, data_size;
+    uint8_t *buf;
+    int ret;
+
+    buf_size  = slice_param->slice_data_bit_offset / 8;
+    data_size = slice_param->slice_data_size - slice_param->slice_data_offset;
+    if (buf_size > data_size)
+        buf_size = data_size;
+
+    buf = alloca(buf_size);
+    ret = dri_bo_get_subdata(
+        slice_data_bo, slice_param->slice_data_offset,
+        buf_size, buf
+    );
+    assert(ret == 0);
+
+    for (i = 2, n = 0; i < buf_size; i++) {
+        if (!buf[i - 2] && !buf[i - 1] && buf[i] == 3)
+            i += 2, n++;
+    }
+    out_slice_data_bit_offset = in_slice_data_bit_offset + n * 8;
+
+    if (mode_flag == ENTROPY_CABAC)
+        out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
+    return out_slice_data_bit_offset;
+}
+
 static inline uint8_t
 get_ref_idx_state_1(const VAPictureH264 *va_pic, unsigned int frame_store_id)
 {
index 12a4c3b..0d86523 100644 (file)
@@ -46,6 +46,20 @@ mpeg2_set_reference_surfaces(
 void
 avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix);
 
+unsigned int
+avc_get_first_mb_bit_offset(
+    dri_bo                     *slice_data_bo,
+    VASliceParameterBufferH264 *slice_param,
+    unsigned int                mode_flag
+);
+
+unsigned int
+avc_get_first_mb_bit_offset_with_epb(
+    dri_bo                     *slice_data_bo,
+    VASliceParameterBufferH264 *slice_param,
+    unsigned int                mode_flag
+);
+
 void
 gen5_fill_avc_ref_idx_state(
     uint8_t             state[32],