h264: optimize handling of scaling lists.
authorGwenole Beauchesne <gwenole.beauchesne@intel.com>
Tue, 23 Oct 2012 12:50:14 +0000 (14:50 +0200)
committerGwenole Beauchesne <gwenole.beauchesne@intel.com>
Wed, 24 Oct 2012 16:01:14 +0000 (18:01 +0200)
Don't copy scaling lists twice to an intermediate state. Rather, directly
use the scaling lists from GstH264PPS since they would match those provided
by SPS header, if necessary. i.e. if PPS-specific scaling lists are not
available in the bitstream.

gst-libs/gst/vaapi/gstvaapidecoder_h264.c

index 6c4adf7..6f07895 100644 (file)
@@ -279,8 +279,6 @@ struct _GstVaapiDecoderH264Private {
     guint                       mb_y;
     guint                       mb_width;
     guint                       mb_height;
-    guint8                      scaling_list_4x4[6][16];
-    guint8                      scaling_list_8x8[6][64];
     gint32                      field_poc[2];           // 0:TopFieldOrderCnt / 1:BottomFieldOrderCnt
     gint32                      poc_msb;                // PicOrderCntMsb
     gint32                      poc_lsb;                // pic_order_cnt_lsb (from slice_header())
@@ -659,15 +657,52 @@ ensure_context(GstVaapiDecoderH264 *decoder, GstH264SPS *sps)
 }
 
 static GstVaapiDecoderStatus
-ensure_quant_matrix(GstVaapiDecoderH264 *decoder, GstH264PPS *pps)
+ensure_quant_matrix(GstVaapiDecoderH264 *decoder, GstVaapiPictureH264 *picture)
 {
     GstVaapiDecoderH264Private * const priv = decoder->priv;
+    GstH264SPS * const sps = priv->sps;
+    GstH264PPS * const pps = priv->pps;
+    GstVaapiPicture * const base_picture = &picture->base;
+    VAIQMatrixBufferH264 *iq_matrix;
+    guint i, j, n;
 
-    if (priv->pps != pps) {
-        memcpy(priv->scaling_list_4x4, pps->scaling_lists_4x4,
-               sizeof(priv->scaling_list_4x4));
-        memcpy(priv->scaling_list_8x8, pps->scaling_lists_8x8,
-               sizeof(priv->scaling_list_8x8));
+    base_picture->iq_matrix = GST_VAAPI_IQ_MATRIX_NEW(H264, decoder);
+    if (!base_picture->iq_matrix) {
+        GST_ERROR("failed to allocate IQ matrix");
+        return GST_VAAPI_DECODER_STATUS_ERROR_ALLOCATION_FAILED;
+    }
+    iq_matrix = base_picture->iq_matrix->param;
+
+    /* XXX: we can only support 4:2:0 or 4:2:2 since ScalingLists8x8[]
+       is not large enough to hold lists for 4:4:4 */
+    if (sps->chroma_format_idc == 3)
+        return GST_VAAPI_DECODER_STATUS_ERROR_UNSUPPORTED_CHROMA_FORMAT;
+
+    g_assert(G_N_ELEMENTS(iq_matrix->ScalingList4x4[0]) == 16);
+    g_assert(G_N_ELEMENTS(iq_matrix->ScalingList8x8[0]) == 64);
+
+    if (sizeof(iq_matrix->ScalingList4x4) == sizeof(pps->scaling_lists_4x4))
+        memcpy(iq_matrix->ScalingList4x4, pps->scaling_lists_4x4,
+               sizeof(iq_matrix->ScalingList4x4));
+    else {
+        n = MIN(G_N_ELEMENTS(iq_matrix->ScalingList4x4),
+                G_N_ELEMENTS(pps->scaling_lists_4x4));
+        for (i = 0; i < n; i++) {
+            for (j = 0; j < 16; j++)
+                iq_matrix->ScalingList4x4[i][j] = pps->scaling_lists_4x4[i][j];
+        }
+    }
+
+    if (sizeof(iq_matrix->ScalingList8x8) == sizeof(pps->scaling_lists_8x8))
+        memcpy(iq_matrix->ScalingList8x8, pps->scaling_lists_8x8,
+               sizeof(iq_matrix->ScalingList8x8));
+    else {
+        n = MIN(G_N_ELEMENTS(iq_matrix->ScalingList8x8),
+                G_N_ELEMENTS(pps->scaling_lists_8x8));
+        for (i = 0; i < n; i++) {
+            for (j = 0; j < 16; j++)
+                iq_matrix->ScalingList8x8[i][j] = pps->scaling_lists_8x8[i][j];
+        }
     }
     return GST_VAAPI_DECODER_STATUS_SUCCESS;
 }
@@ -1993,26 +2028,6 @@ fill_picture(
     return TRUE;
 }
 
-static gboolean
-fill_quant_matrix(GstVaapiDecoderH264 *decoder, GstVaapiPictureH264 *picture)
-{
-    GstVaapiDecoderH264Private * const priv = decoder->priv;
-    VAIQMatrixBufferH264 * const iq_matrix = picture->base.iq_matrix->param;
-
-    /* XXX: we can only support 4:2:0 or 4:2:2 since ScalingLists8x8[]
-       is not large enough to hold lists for 4:4:4 */
-    if (priv->sps->chroma_format_idc == 3 &&
-        sizeof(iq_matrix->ScalingList8x8) != sizeof(priv->scaling_list_8x8))
-        return FALSE;
-
-    /* Fill in VAIQMatrixBufferH264 */
-    memcpy(iq_matrix->ScalingList4x4, priv->scaling_list_4x4,
-           sizeof(iq_matrix->ScalingList4x4));
-    memcpy(iq_matrix->ScalingList8x8, priv->scaling_list_8x8,
-           sizeof(iq_matrix->ScalingList8x8));
-    return TRUE;
-}
-
 static GstVaapiDecoderStatus
 decode_picture(GstVaapiDecoderH264 *decoder, GstH264NalUnit *nalu, GstH264SliceHdr *slice_hdr)
 {
@@ -2038,21 +2053,15 @@ decode_picture(GstVaapiDecoderH264 *decoder, GstH264NalUnit *nalu, GstH264SliceH
     }
     priv->current_picture = picture;
 
-    picture->base.iq_matrix = GST_VAAPI_IQ_MATRIX_NEW(H264, decoder);
-    if (!picture->base.iq_matrix) {
-        GST_ERROR("failed to allocate IQ matrix");
-        return GST_VAAPI_DECODER_STATUS_ERROR_ALLOCATION_FAILED;
-    }
+    priv->sps = sps;
+    priv->pps = pps;
 
-    status = ensure_quant_matrix(decoder, pps);
+    status = ensure_quant_matrix(decoder, picture);
     if (status != GST_VAAPI_DECODER_STATUS_SUCCESS) {
         GST_ERROR("failed to reset quantizer matrix");
         return status;
     }
 
-    priv->sps = sps;
-    priv->pps = pps;
-
     if (!init_picture(decoder, picture, slice_hdr, nalu))
         return GST_VAAPI_DECODER_STATUS_ERROR_UNKNOWN;
     if (!fill_picture(decoder, picture, slice_hdr, nalu))
@@ -2063,8 +2072,6 @@ decode_picture(GstVaapiDecoderH264 *decoder, GstH264NalUnit *nalu, GstH264SliceH
 static gboolean
 decode_picture_end(GstVaapiDecoderH264 *decoder, GstVaapiPictureH264 *picture)
 {
-    if (!fill_quant_matrix(decoder, picture))
-        return FALSE;
     if (!exec_ref_pic_marking(decoder, picture))
         return FALSE;
     if (!dpb_add(decoder, picture))