From 8ae56d60a3e176b8b11a62526fcc479551250077 Mon Sep 17 00:00:00 2001 From: Mathieu Duponchelle Date: Fri, 22 Jan 2021 03:26:29 +0100 Subject: [PATCH] h264parse: fix timestamping of interlaced fields in output Instead of relying on GstBaseParse default behaviour of computing the duration of a parsed buffer based on the framerate passed to gst_base_parse_set_framerate(), we instead compute the duration ourselves, as we have more information available. In particular, this means we now output buffers with a duration that matches that of raw interlaced buffers when each field is output in a separate buffer. This fixes DTS interpolation performed by GstBaseParse, as the previous behaviour of outputting each field with the duration of a full frame was messing up the base class calculations. When not enough information is available, h264parse simply falls back to calculating the duration based on the framerate and hope for the best as was the case previously. Part-of: --- gst/videoparsers/gsth264parse.c | 103 +++++++++++++++++++++++++++++++- 1 file changed, 100 insertions(+), 3 deletions(-) diff --git a/gst/videoparsers/gsth264parse.c b/gst/videoparsers/gsth264parse.c index a7808b4de3..a1e6af89c0 100644 --- a/gst/videoparsers/gsth264parse.c +++ b/gst/videoparsers/gsth264parse.c @@ -2213,8 +2213,6 @@ gst_h264_parse_update_src_caps (GstH264Parse * h264parse, GstCaps * caps) s2 = gst_caps_get_structure (caps, 0); gst_structure_get_fraction (s2, "framerate", &h264parse->parsed_fps_n, &h264parse->parsed_fps_d); - gst_base_parse_set_frame_rate (GST_BASE_PARSE (h264parse), fps_num, - fps_den, 0, 0); /* If we know the frame duration, and if we are not in one of the zero * latency pattern, add one frame of latency */ @@ -2401,6 +2399,96 @@ gst_h264_parse_update_src_caps (GstH264Parse * h264parse, GstCaps * caps) gst_buffer_unref (buf); } +static GstClockTime +gst_h264_parse_get_duration (GstH264Parse * h264parse, gboolean frame) +{ + GstClockTime ret = GST_CLOCK_TIME_NONE; + GstH264SPS *sps = h264parse->nalparser->last_sps; + gint duration = 1; + + if (!frame) { + GST_LOG_OBJECT (h264parse, "no frame data -> 0 duration"); + ret = 0; + goto done; + } + + if (!sps) { + GST_DEBUG_OBJECT (h264parse, "referred SPS invalid"); + goto fps_duration; + } else if (!sps->vui_parameters_present_flag) { + GST_DEBUG_OBJECT (h264parse, "unable to compute duration: VUI not present"); + goto fps_duration; + } else if (!sps->vui_parameters.timing_info_present_flag) { + GST_DEBUG_OBJECT (h264parse, + "unable to compute duration: timing info not present"); + goto fps_duration; + } else if (sps->vui_parameters.time_scale == 0) { + GST_DEBUG_OBJECT (h264parse, + "unable to compute duration: time_scale = 0 " + "(this is forbidden in spec; bitstream probably contains error)"); + goto fps_duration; + } + + if (h264parse->sei_pic_struct_pres_flag && + h264parse->sei_pic_struct != (guint8) - 1) { + /* Note that when h264parse->sei_pic_struct == -1 (unspecified), there + * are ways to infer its value. This is related to computing the + * TopFieldOrderCnt and BottomFieldOrderCnt, which looks + * complicated and thus not implemented for the time being. Yet + * the value we have here is correct for many applications + */ + switch (h264parse->sei_pic_struct) { + case GST_H264_SEI_PIC_STRUCT_TOP_FIELD: + case GST_H264_SEI_PIC_STRUCT_BOTTOM_FIELD: + duration = 1; + break; + case GST_H264_SEI_PIC_STRUCT_FRAME: + case GST_H264_SEI_PIC_STRUCT_TOP_BOTTOM: + case GST_H264_SEI_PIC_STRUCT_BOTTOM_TOP: + duration = 2; + break; + case GST_H264_SEI_PIC_STRUCT_TOP_BOTTOM_TOP: + case GST_H264_SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM: + duration = 3; + break; + case GST_H264_SEI_PIC_STRUCT_FRAME_DOUBLING: + duration = 4; + break; + case GST_H264_SEI_PIC_STRUCT_FRAME_TRIPLING: + duration = 6; + break; + default: + GST_DEBUG_OBJECT (h264parse, + "h264parse->sei_pic_struct of unknown value %d. Not parsed", + h264parse->sei_pic_struct); + break; + } + } else { + duration = h264parse->field_pic_flag ? 1 : 2; + } + + GST_LOG_OBJECT (h264parse, "frame tick duration %d", duration); + + ret = gst_util_uint64_scale (duration * GST_SECOND, + sps->vui_parameters.num_units_in_tick, sps->vui_parameters.time_scale); + /* sanity check */ + if (ret < GST_MSECOND) { + GST_DEBUG_OBJECT (h264parse, "discarding dur %" GST_TIME_FORMAT, + GST_TIME_ARGS (ret)); + goto fps_duration; + } + +done: + return ret; + +fps_duration: + if (h264parse->parsed_fps_d > 0 && h264parse->parsed_fps_n > 0) + ret = + gst_util_uint64_scale (GST_SECOND, h264parse->parsed_fps_d, + h264parse->parsed_fps_n); + goto done; +} + static void gst_h264_parse_get_timestamp (GstH264Parse * h264parse, GstClockTime * out_ts, GstClockTime * out_dur, gboolean frame) @@ -2547,10 +2635,19 @@ gst_h264_parse_parse_frame (GstBaseParse * parse, GstBaseParseFrame * frame) /* don't mess with timestamps if provided by upstream, * particularly since our ts not that good they handle seeking etc */ - if (h264parse->do_ts) + if (h264parse->do_ts) { gst_h264_parse_get_timestamp (h264parse, &GST_BUFFER_DTS (buffer), &GST_BUFFER_DURATION (buffer), h264parse->frame_start); + } + + /* We don't want to let baseparse select a duration itself based + * solely on the framerate, as we have more per-frame information + * available */ + if (!GST_CLOCK_TIME_IS_VALID (GST_BUFFER_DURATION (buffer))) { + GST_BUFFER_DURATION (buffer) = + gst_h264_parse_get_duration (h264parse, h264parse->frame_start); + } if (h264parse->keyframe) GST_BUFFER_FLAG_UNSET (buffer, GST_BUFFER_FLAG_DELTA_UNIT); -- 2.34.1