X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=gst%2Ftypefind%2Fgsttypefindfunctions.c;h=5f3c227468cca83dcfc545ff2b43280714f13bd4;hb=9791f0aaf164828df7e112ca23ccd88ba0a7f45c;hp=0bffe5303e4210eef107d63bcb3aed452454e715;hpb=e44dd9db8f767f6dd2b157b285774d5938c3fc75;p=platform%2Fupstream%2Fgstreamer.git diff --git a/gst/typefind/gsttypefindfunctions.c b/gst/typefind/gsttypefindfunctions.c index 0bffe53..5f3c227 100644 --- a/gst/typefind/gsttypefindfunctions.c +++ b/gst/typefind/gsttypefindfunctions.c @@ -17,8 +17,8 @@ * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 02111-1307, USA. + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. */ #ifdef HAVE_CONFIG_H @@ -235,18 +235,18 @@ check_utf16 (const guint8 * data, gint len, gint endianness) while (len >= 2) { /* test first for a single 16 bit value in the BMP */ if (endianness == G_BIG_ENDIAN) - gst_byte_reader_get_uint16_be (&br, &high); + high = gst_byte_reader_get_uint16_be_unchecked (&br); else - gst_byte_reader_get_uint16_le (&br, &high); + high = gst_byte_reader_get_uint16_le_unchecked (&br); if (high >= 0xD800 && high <= 0xDBFF) { /* start of a surrogate pair */ if (len < 4) return FALSE; len -= 2; if (endianness == G_BIG_ENDIAN) - gst_byte_reader_get_uint16_be (&br, &low); + low = gst_byte_reader_get_uint16_be_unchecked (&br); else - gst_byte_reader_get_uint16_le (&br, &low); + low = gst_byte_reader_get_uint16_le_unchecked (&br); if (low >= 0xDC00 && low <= 0xDFFF) { /* second half of the surrogate pair */ } else @@ -845,6 +845,90 @@ flac_type_find (GstTypeFind * tf, gpointer unused) #endif } +/* TODO: we could probably make a generic function for this.. */ +static gint +aac_type_find_scan_loas_frames_ep (GstTypeFind * tf, DataScanCtx * scan_ctx, + gint max_frames) +{ + DataScanCtx c = *scan_ctx; + guint16 snc; + guint len; + gint count = 0; + + do { + if (!data_scan_ctx_ensure_data (tf, &c, 5)) + break; + + /* EPAudioSyncStream */ + len = ((c.data[2] & 0x0f) << 9) | (c.data[3] << 1) | + ((c.data[4] & 0x80) >> 7); + + if (len == 0 || !data_scan_ctx_ensure_data (tf, &c, len + 2)) { + GST_DEBUG ("Wrong sync or next frame not within reach, len=%u", len); + break; + } + + /* check length of frame */ + snc = GST_READ_UINT16_BE (c.data + len); + if (snc != 0x4de1) { + GST_DEBUG ("No sync found at 0x%" G_GINT64_MODIFIER "x", c.offset + len); + break; + } + + ++count; + + GST_DEBUG ("Found LOAS syncword #%d at offset 0x%" G_GINT64_MODIFIER "x, " + "framelen %u", count, c.offset, len); + + data_scan_ctx_advance (tf, &c, len); + } while (count < max_frames && (c.offset - scan_ctx->offset) < 64 * 1024); + + GST_DEBUG ("found %d consecutive frames", count); + return count; +} + +static gint +aac_type_find_scan_loas_frames (GstTypeFind * tf, DataScanCtx * scan_ctx, + gint max_frames) +{ + DataScanCtx c = *scan_ctx; + guint16 snc; + guint len; + gint count = 0; + + do { + if (!data_scan_ctx_ensure_data (tf, &c, 3)) + break; + + /* AudioSyncStream */ + len = ((c.data[1] & 0x1f) << 8) | c.data[2]; + /* add size of sync stream header */ + len += 3; + + if (len == 0 || !data_scan_ctx_ensure_data (tf, &c, len)) { + GST_DEBUG ("Wrong sync or next frame not within reach, len=%u", len); + break; + } + + /* check length of frame */ + snc = GST_READ_UINT16_BE (c.data + len); + if ((snc & 0xffe0) != 0x56e0) { + GST_DEBUG ("No sync found at 0x%" G_GINT64_MODIFIER "x", c.offset + len); + break; + } + + ++count; + + GST_DEBUG ("Found LOAS syncword #%d at offset 0x%" G_GINT64_MODIFIER "x, " + "framelen %u", count, c.offset, len); + + data_scan_ctx_advance (tf, &c, len); + } while (count < max_frames && (c.offset - scan_ctx->offset) < 64 * 1024); + + GST_DEBUG ("found %d consecutive frames", count); + return count; +} + /*** audio/mpeg version 2, 4 ***/ static GstStaticCaps aac_caps = GST_STATIC_CAPS ("audio/mpeg, " @@ -854,8 +938,10 @@ static GstStaticCaps aac_caps = GST_STATIC_CAPS ("audio/mpeg, " static void aac_type_find (GstTypeFind * tf, gpointer unused) { - /* LUT to convert the AudioObjectType from the ADTS header to a string */ DataScanCtx c = { 0, NULL, 0 }; + GstTypeFindProbability best_probability = GST_TYPE_FIND_NONE; + GstCaps *best_caps = NULL; + guint best_count = 0; while (c.offset < AAC_AMOUNT) { guint snc, len; @@ -941,45 +1027,29 @@ aac_type_find (GstTypeFind * tf, gpointer unused) } GST_DEBUG ("No next frame found... (should have been at 0x%x)", len); - } else if (G_UNLIKELY (((snc & 0xffe0) == 0x56e0) || (snc == 0x4de1))) { - /* LOAS frame */ - - GST_DEBUG ("Found one LOAS syncword at offset 0x%" G_GINT64_MODIFIER - "x, tracing next...", c.offset); + } else if (G_UNLIKELY ((snc & 0xffe0) == 0x56e0 || snc == 0x4de1)) { + gint count; - /* check length of frame for each type of detectable LOAS streams */ - if (snc == 0x4de1) { - /* EPAudioSyncStream */ - len = ((c.data[2] & 0x0f) << 9) | (c.data[3] << 1) | - ((c.data[4] & 0x80) >> 7); - /* add size of EP sync stream header */ - len += 7; - } else { - /* AudioSyncStream */ - len = ((c.data[1] & 0x1f) << 8) | c.data[2]; - /* add size of sync stream header */ - len += 3; - } - - if (len == 0 || !data_scan_ctx_ensure_data (tf, &c, len + 2)) { - GST_DEBUG ("Wrong sync or next frame not within reach, len=%u", len); - goto next; - } + /* LOAS frame */ + GST_INFO ("Possible LOAS syncword at offset 0x%" G_GINT64_MODIFIER + "x, scanning for more frames...", c.offset); - /* check if there's a second LOAS frame */ - snc = GST_READ_UINT16_BE (c.data + len); - if (((snc & 0xffe0) == 0x56e0) || (snc == 0x4de1)) { - GST_DEBUG ("Found second LOAS syncword at offset 0x%" - G_GINT64_MODIFIER "x, framelen %u", c.offset, len); + if (snc == 0x4de1) + count = aac_type_find_scan_loas_frames_ep (tf, &c, 20); + else + count = aac_type_find_scan_loas_frames (tf, &c, 20); - gst_type_find_suggest_simple (tf, GST_TYPE_FIND_LIKELY, "audio/mpeg", + if (count >= 3 && count > best_count) { + gst_caps_replace (&best_caps, NULL); + best_caps = gst_caps_new_simple ("audio/mpeg", "framed", G_TYPE_BOOLEAN, FALSE, "mpegversion", G_TYPE_INT, 4, "stream-format", G_TYPE_STRING, "loas", NULL); - break; + best_count = count; + best_probability = GST_TYPE_FIND_POSSIBLE - 10 + count * 3; + if (best_probability >= GST_TYPE_FIND_LIKELY) + break; } - - GST_DEBUG ("No next frame found... (should have been at 0x%x)", len); } else if (!memcmp (c.data, "ADIF", 4)) { /* ADIF header */ gst_type_find_suggest_simple (tf, GST_TYPE_FIND_LIKELY, "audio/mpeg", @@ -992,6 +1062,11 @@ aac_type_find (GstTypeFind * tf, gpointer unused) data_scan_ctx_advance (tf, &c, 1); } + + if (best_probability > GST_TYPE_FIND_NONE) { + gst_type_find_suggest (tf, best_probability, best_caps); + gst_caps_unref (best_caps); + } } /*** audio/mpeg version 1 ***/ @@ -1345,7 +1420,8 @@ suggest: g_return_if_fail (layer >= 1 && layer <= 3); gst_type_find_suggest_simple (tf, prob, "audio/mpeg", - "mpegversion", G_TYPE_INT, 1, "layer", G_TYPE_INT, layer, NULL); + "mpegversion", G_TYPE_INT, 1, "layer", G_TYPE_INT, layer, + "parsed", G_TYPE_BOOLEAN, FALSE, NULL); } } @@ -1678,9 +1754,11 @@ GST_STATIC_CAPS ("audio/x-wavpack-correction, framed = (boolean) false"); static void wavpack_type_find (GstTypeFind * tf, gpointer unused) { + GstTypeFindProbability base_prob = GST_TYPE_FIND_POSSIBLE; guint64 offset; guint32 blocksize; const guint8 *data; + guint count_wv, count_wvc; data = gst_type_find_peek (tf, 0, 32); if (!data) @@ -1695,8 +1773,10 @@ wavpack_type_find (GstTypeFind * tf, gpointer unused) * work in pull-mode */ blocksize = GST_READ_UINT32_LE (data + 4); GST_LOG ("wavpack header, blocksize=0x%04x", blocksize); + count_wv = 0; + count_wvc = 0; offset = 32; - while (offset < 32 + blocksize) { + while (offset < 8 + blocksize) { guint32 sublen; /* get chunk header */ @@ -1711,7 +1791,7 @@ wavpack_type_find (GstTypeFind * tf, gpointer unused) } else { sublen += 1 + 1; /* id + length */ } - if (sublen > blocksize - offset + 32) { + if (offset + sublen > 8 + blocksize) { GST_LOG ("chunk length too big (%u > %" G_GUINT64_FORMAT ")", sublen, blocksize - offset); break; @@ -1720,18 +1800,38 @@ wavpack_type_find (GstTypeFind * tf, gpointer unused) switch (data[0] & 0x0f) { case 0xa: /* ID_WV_BITSTREAM */ case 0xc: /* ID_WVX_BITSTREAM */ - gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, WAVPACK_CAPS); - return; + ++count_wv; + break; case 0xb: /* ID_WVC_BITSTREAM */ - gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, - WAVPACK_CORRECTION_CAPS); - return; + ++count_wvc; + break; default: break; } + if (count_wv >= 5 || count_wvc >= 5) + break; } offset += sublen; } + + /* check for second block header */ + data = gst_type_find_peek (tf, 8 + blocksize, 4); + if (data != NULL && memcmp (data, "wvpk", 4) == 0) { + GST_DEBUG ("found second block sync"); + base_prob = GST_TYPE_FIND_LIKELY; + } + + GST_DEBUG ("wvc=%d, wv=%d", count_wvc, count_wv); + + if (count_wvc > 0 && count_wvc > count_wv) { + gst_type_find_suggest (tf, + MIN (base_prob + 5 * count_wvc, GST_TYPE_FIND_NEARLY_CERTAIN), + WAVPACK_CORRECTION_CAPS); + } else if (count_wv > 0) { + gst_type_find_suggest (tf, + MIN (base_prob + 5 * count_wv, GST_TYPE_FIND_NEARLY_CERTAIN), + WAVPACK_CAPS); + } } /*** application/postscrip ***/ @@ -2405,6 +2505,7 @@ h264_video_type_find (GstTypeFind * tf, gpointer unused) gboolean seen_idr = FALSE; gboolean seen_sps = FALSE; gboolean seen_pps = FALSE; + gboolean seen_ssps = FALSE; int nut, ref; int good = 0; int bad = 0; @@ -2439,18 +2540,25 @@ h264_video_type_find (GstTypeFind * tf, gpointer unused) good++; } } else if (nut >= 14 && nut <= 33) { - /* reserved */ - /* Theoretically these are good, since if they exist in the - stream it merely means that a newer backwards-compatible - h.264 stream. But we should be identifying that separately. */ - bad++; + if (nut == 15) { + seen_ssps = TRUE; + good++; + } else if (seen_ssps && (nut == 14 || nut == 20)) { + good++; + } else { + /* reserved */ + /* Theoretically these are good, since if they exist in the + stream it merely means that a newer backwards-compatible + h.264 stream. But we should be identifying that separately. */ + bad++; + } } else { /* unspecified, application specific */ /* don't consider these bad */ } - GST_LOG ("good:%d, bad:%d, pps:%d, sps:%d, idr:%d", good, bad, seen_pps, - seen_sps, seen_idr); + GST_LOG ("good:%d, bad:%d, pps:%d, sps:%d, idr:%d ssps:%d", good, bad, + seen_pps, seen_sps, seen_idr, seen_ssps); if (seen_sps && seen_pps && seen_idr && good >= 10 && bad < 4) { gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, H264_VIDEO_CAPS); @@ -2462,8 +2570,8 @@ h264_video_type_find (GstTypeFind * tf, gpointer unused) data_scan_ctx_advance (tf, &c, 1); } - GST_LOG ("good:%d, bad:%d, pps:%d, sps:%d, idr:%d", good, bad, seen_pps, - seen_sps, seen_idr); + GST_LOG ("good:%d, bad:%d, pps:%d, sps:%d, idr:%d ssps=%d", good, bad, + seen_pps, seen_sps, seen_idr, seen_ssps); if (good >= 2 && bad == 0) { gst_type_find_suggest (tf, GST_TYPE_FIND_POSSIBLE, H264_VIDEO_CAPS); @@ -2574,7 +2682,7 @@ mpeg_video_stream_type_find (GstTypeFind * tf, gpointer unused) gst_type_find_suggest_simple (tf, probability, "video/mpeg", "systemstream", G_TYPE_BOOLEAN, FALSE, - "mpegversion", G_TYPE_INT, 1, NULL); + "mpegversion", G_TYPE_INT, 1, "parsed", G_TYPE_BOOLEAN, FALSE, NULL); } } @@ -2796,6 +2904,12 @@ qt_type_find (GstTypeFind * tf, gpointer unused) break; } + if (STRNCMP (&data[4], "ftypisml", 8) == 0) { + tip = GST_TYPE_FIND_MAXIMUM; + variant = "iso-fragmented"; + break; + } + /* box/atom types that are in common with ISO base media file format */ if (STRNCMP (&data[4], "moov", 4) == 0 || STRNCMP (&data[4], "mdat", 4) == 0 || @@ -3435,76 +3549,256 @@ ircam_type_find (GstTypeFind * tf, gpointer ununsed) } } -/* EBML typefind helper */ -static gboolean -ebml_check_header (GstTypeFind * tf, const gchar * doctype, int doctype_len) +/*** Matroska/WebM ***/ + +#define EBML_HEADER 0x1A45DFA3 +#define EBML_VERSION 0x4286 +#define EBML_DOCTYPE 0x4282 +#define EBML_DOCTYPE_VERSION 0x4287 +#define MATROSKA_SEGMENT 0x18538067 +#define MATROSKA_CLUSTER 0x1F43B675 +#define MATROSKA_TRACKS 0x1654AE6B +#define MATROSKA_TRACK_ENTRY 0xAE +#define MATROSKA_TRACK_TYPE 0x83 +#define MATROSKA_STEREO_MODE 0x53B8 + +#define EBML_MAX_LEN (2 * 1024 * 1024) + +typedef enum { - /* 4 bytes for EBML ID, 1 byte for header length identifier */ - const guint8 *data = gst_type_find_peek (tf, 0, 4 + 1); - gint len_mask = 0x80, size = 1, n = 1, total; + EBML_DOCTYPE_UNKNOWN = 0, + EBML_DOCTYPE_MATROSKA, + EBML_DOCTYPE_WEBM +} GstEbmlDocType; - if (!data) - return FALSE; +typedef struct +{ + GstEbmlDocType doctype; + guint audio; + guint video; + guint other; + guint video_stereo; + guint chunks; + guint tracks_ok; /* if we've seen and fully parsed the TRACKS element */ +} GstMatroskaInfo; - /* ebml header? */ - if (data[0] != 0x1A || data[1] != 0x45 || data[2] != 0xDF || data[3] != 0xA3) - return FALSE; +static inline guint +ebml_read_chunk_header (GstTypeFind * tf, DataScanCtx * c, guint max_size, + guint32 * id, guint64 * size) +{ + guint64 mask; + guint msbit_set, i, len, id_len; + + if (c->size < 12 || max_size < 1) + return 0; - /* length of header */ - total = data[4]; - while (size <= 8 && !(total & len_mask)) { - size++; - len_mask >>= 1; + /* element ID */ + *id = c->data[0]; + if ((c->data[0] & 0x80) == 0x80) { + id_len = 1; + } else if ((c->data[0] & 0xC0) == 0x40) { + id_len = 2; + } else if ((c->data[0] & 0xE0) == 0x20) { + id_len = 3; + } else if ((c->data[0] & 0xF0) == 0x10) { + id_len = 4; + } else { + return 0; } - if (size > 8) - return FALSE; - total &= (len_mask - 1); - while (n < size) - total = (total << 8) | data[4 + n++]; - /* get new data for full header, 4 bytes for EBML ID, - * EBML length tag and the actual header */ - data = gst_type_find_peek (tf, 0, 4 + size + total); - if (!data) + if (max_size < id_len) + return 0; + + for (i = 1; i < id_len; ++i) { + *id = (*id << 8) | c->data[i]; + } + + data_scan_ctx_advance (tf, c, id_len); + max_size -= id_len; + + /* size */ + if (max_size < 1 || c->data[0] == 0) + return 0; + + msbit_set = g_bit_nth_msf (c->data[0], 8); + mask = ((1 << msbit_set) - 1); + *size = c->data[0] & mask; + len = 7 - msbit_set; + + if (max_size < 1 + len) + return 0; + for (i = 0; i < len; ++i) { + mask = (mask << 8) | 0xff; + *size = (*size << 8) | c->data[1 + i]; + } + + data_scan_ctx_advance (tf, c, 1 + len); + + /* undefined/unknown size? (all bits 1) */ + if (*size == mask) { + /* allow unknown size for SEGMENT chunk, bail out otherwise */ + if (*id == MATROSKA_SEGMENT) + *size = G_MAXUINT64; + else + return 0; + } + + return id_len + (1 + len); +} + +static gboolean +ebml_parse_chunk (GstTypeFind * tf, DataScanCtx * ctx, guint32 chunk_id, + guint chunk_size, GstMatroskaInfo * info, guint depth) +{ /* FIXME: make sure input size is clipped to 32 bit */ + static const gchar SPACES[] = " "; + DataScanCtx c = *ctx; + guint64 element_size; + guint32 id, hdr_len; + + if (depth >= 8) /* keep SPACES large enough for depth */ return FALSE; - /* only check doctype if asked to do so */ - if (doctype == NULL || doctype_len == 0) - return TRUE; + while (chunk_size > 0) { + if (c.offset > EBML_MAX_LEN || !data_scan_ctx_ensure_data (tf, &c, 64)) + return FALSE; - /* the header must contain the doctype. For now, we don't parse the - * whole header but simply check for the availability of that array - * of characters inside the header. Not fully fool-proof, but good - * enough. */ - for (n = 4 + size; n <= 4 + size + total - doctype_len; n++) - if (!memcmp (&data[n], doctype, doctype_len)) - return TRUE; + hdr_len = ebml_read_chunk_header (tf, &c, chunk_size, &id, &element_size); + if (hdr_len == 0) + return FALSE; - return FALSE; + g_assert (hdr_len <= chunk_size); + chunk_size -= hdr_len; + + if (element_size > chunk_size) + return FALSE; + + GST_DEBUG ("%s %08x, size %" G_GUINT64_FORMAT " / %" G_GUINT64_FORMAT, + SPACES + sizeof (SPACES) - 1 - (2 * depth), id, element_size, + hdr_len + element_size); + + if (!data_scan_ctx_ensure_data (tf, &c, element_size)) { + GST_DEBUG ("not enough data"); + return FALSE; + } + + switch (id) { + case EBML_DOCTYPE: + if (element_size >= 8 && memcmp (c.data, "matroska", 8) == 0) + info->doctype = EBML_DOCTYPE_MATROSKA; + else if (element_size >= 4 && memcmp (c.data, "webm", 4) == 0) + info->doctype = EBML_DOCTYPE_WEBM; + break; + case MATROSKA_SEGMENT: + GST_LOG ("parsing segment"); + ebml_parse_chunk (tf, &c, id, element_size, info, depth + 1); + GST_LOG ("parsed segment, done"); + return FALSE; + case MATROSKA_TRACKS: + GST_LOG ("parsing tracks"); + info->tracks_ok = + ebml_parse_chunk (tf, &c, id, element_size, info, depth + 1); + GST_LOG ("parsed tracks: %s, done (after %" G_GUINT64_FORMAT " bytes)", + info->tracks_ok ? "ok" : "FAIL", c.offset + element_size); + return FALSE; + case MATROSKA_TRACK_ENTRY: + GST_LOG ("parsing track entry"); + if (!ebml_parse_chunk (tf, &c, id, element_size, info, depth + 1)) + return FALSE; + break; + case MATROSKA_TRACK_TYPE:{ + guint type = 0, i; + + /* is supposed to always be 1-byte, but not everyone's following that */ + for (i = 0; i < element_size; ++i) + type = (type << 8) | c.data[i]; + + GST_DEBUG ("%s track type %u", + SPACES + sizeof (SPACES) - 1 - (2 * depth), type); + + if (type == 1) + ++info->video; + else if (c.data[0] == 2) + ++info->audio; + else + ++info->other; + break; + } + case MATROSKA_STEREO_MODE: + ++info->video_stereo; + break; + case MATROSKA_CLUSTER: + GST_WARNING ("cluster, bailing out (should've found tracks by now)"); + return FALSE; + default: + break; + } + data_scan_ctx_advance (tf, &c, element_size); + chunk_size -= element_size; + ++info->chunks; + } + + return TRUE; } -/*** video/x-matroska ***/ static GstStaticCaps matroska_caps = GST_STATIC_CAPS ("video/x-matroska"); #define MATROSKA_CAPS (gst_static_caps_get(&matroska_caps)) static void matroska_type_find (GstTypeFind * tf, gpointer ununsed) { - if (ebml_check_header (tf, "matroska", 8)) - gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, MATROSKA_CAPS); - else if (ebml_check_header (tf, NULL, 0)) - gst_type_find_suggest (tf, GST_TYPE_FIND_LIKELY, MATROSKA_CAPS); -} + GstTypeFindProbability prob; + GstMatroskaInfo info = { 0, }; + const gchar *type_name; + DataScanCtx c = { 0, NULL, 0 }; + gboolean is_audio; + guint64 size; + guint32 id, hdr_len; -/*** video/webm ***/ -static GstStaticCaps webm_caps = GST_STATIC_CAPS ("video/webm"); + if (!data_scan_ctx_ensure_data (tf, &c, 64)) + return; -#define WEBM_CAPS (gst_static_caps_get(&webm_caps)) -static void -webm_type_find (GstTypeFind * tf, gpointer ununsed) -{ - if (ebml_check_header (tf, "webm", 4)) - gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, WEBM_CAPS); + if (GST_READ_UINT32_BE (c.data) != EBML_HEADER) + return; + + while (c.offset < EBML_MAX_LEN && data_scan_ctx_ensure_data (tf, &c, 64)) { + hdr_len = ebml_read_chunk_header (tf, &c, c.size, &id, &size); + if (hdr_len == 0) + return; + + GST_INFO ("=== top-level chunk %08x, size %" G_GUINT64_FORMAT + " / %" G_GUINT64_FORMAT, id, size, size + hdr_len); + + if (!ebml_parse_chunk (tf, &c, id, size, &info, 0)) + break; + data_scan_ctx_advance (tf, &c, size); + GST_INFO ("=== done with chunk %08x", id); + if (id == MATROSKA_SEGMENT) + break; + } + + GST_INFO ("audio=%u video=%u other=%u chunks=%u doctype=%d all_tracks=%d", + info.audio, info.video, info.other, info.chunks, info.doctype, + info.tracks_ok); + + /* perhaps we should bail out if tracks_ok is FALSE and wait for more data? + * (we would need new API to signal this properly and prevent other + * typefinders from taking over the decision then) */ + is_audio = (info.audio > 0 && info.video == 0 && info.other == 0); + + if (info.doctype == EBML_DOCTYPE_WEBM) { + type_name = (is_audio) ? "audio/webm" : "video/webm"; + } else if (info.video > 0 && info.video_stereo) { + type_name = "video/x-matroska-3d"; + } else { + type_name = (is_audio) ? "audio/x-matroska" : "video/x-matroska"; + } + + if (info.doctype == EBML_DOCTYPE_UNKNOWN) + prob = GST_TYPE_FIND_LIKELY; + else + prob = GST_TYPE_FIND_MAXIMUM; + + gst_type_find_suggest_simple (tf, prob, type_name, NULL); } /*** application/mxf ***/ @@ -3589,30 +3883,116 @@ dv_type_find (GstTypeFind * tf, gpointer private) } -/*** application/ogg and application/x-annodex ***/ -static GstStaticCaps ogg_caps = GST_STATIC_CAPS ("application/ogg"); -static GstStaticCaps annodex_caps = GST_STATIC_CAPS ("application/x-annodex"); -static GstStaticCaps ogg_annodex_caps = - GST_STATIC_CAPS ("application/ogg;application/x-annodex"); +/*** Ogg variants ***/ +static GstStaticCaps ogg_caps = + GST_STATIC_CAPS ("application/ogg;video/ogg;audio/ogg;application/kate"); + +#define OGG_CAPS (gst_static_caps_get(&ogg_caps)) -#define OGGANX_CAPS (gst_static_caps_get(&ogg_annodex_caps)) +typedef enum +{ + OGG_AUDIO = 0, + OGG_VIDEO, + OGG_KATE, + OGG_OTHER, + OGG_SKELETON, + OGG_ANNODEX, + OGG_NUM +} GstOggStreamType; static void ogganx_type_find (GstTypeFind * tf, gpointer private) { - const guint8 *data = gst_type_find_peek (tf, 0, 4); + const gchar *media_type; + DataScanCtx c = { 0, NULL, 0 }; + guint ogg_syncs = 0; + guint hdr_count[OGG_NUM] = { 0, }; + static const struct + { + const gchar marker[10]; + guint8 marker_size; + GstOggStreamType stream_type; + } markers[] = { + { + "\001vorbis", 7, OGG_AUDIO}, { + "\200theora", 7, OGG_VIDEO}, { + "fLaC", 4, OGG_AUDIO}, { + "\177FLAC", 5, OGG_AUDIO}, { + "Speex", 5, OGG_AUDIO}, { + "CMML\0\0\0\0", 8, OGG_OTHER}, { + "PCM ", 8, OGG_AUDIO}, { + "Annodex", 7, OGG_ANNODEX}, { + "fishead", 7, OGG_SKELETON}, { + "AnxData", 7, OGG_ANNODEX}, { + "CELT ", 8, OGG_AUDIO}, { + "\200kate\0\0\0", 8, OGG_KATE}, { + "BBCD\0", 5, OGG_VIDEO}, { + "OVP80\1\1", 7, OGG_VIDEO}, { + "OpusHead", 8, OGG_AUDIO}, { + "\001audio\0\0\0", 9, OGG_AUDIO}, { + "\001video\0\0\0", 9, OGG_VIDEO}, { + "\001text\0\0\0", 9, OGG_OTHER} + }; + + while (c.offset < 4096 && data_scan_ctx_ensure_data (tf, &c, 64)) { + guint size, i; + + if (memcmp (c.data, "OggS", 5) != 0) + break; - if ((data != NULL) && (memcmp (data, "OggS", 4) == 0)) { + ++ogg_syncs; - /* Check for an annodex fishbone header */ - data = gst_type_find_peek (tf, 28, 8); - if (data && memcmp (data, "fishead\0", 8) == 0) - gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, - gst_static_caps_get (&annodex_caps)); + /* check if BOS */ + if (c.data[5] != 0x02) + break; + + /* headers should only have one segment */ + if (c.data[26] != 1) + break; + + size = c.data[27]; + if (size < 8) + break; + + data_scan_ctx_advance (tf, &c, 28); + + if (!data_scan_ctx_ensure_data (tf, &c, MAX (size, 8))) + break; - gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, - gst_static_caps_get (&ogg_caps)); + for (i = 0; i < G_N_ELEMENTS (markers); ++i) { + if (memcmp (c.data, markers[i].marker, markers[i].marker_size) == 0) { + ++hdr_count[markers[i].stream_type]; + break; + } + } + + if (i == G_N_ELEMENTS (markers)) { + GST_MEMDUMP ("unknown Ogg stream marker", c.data, size); + ++hdr_count[OGG_OTHER]; + } + + data_scan_ctx_advance (tf, &c, size); } + + if (ogg_syncs == 0) + return; + + /* We don't bother with annodex types. FIXME: what about XSPF? */ + if (hdr_count[OGG_VIDEO] > 0) { + media_type = "video/ogg"; + } else if (hdr_count[OGG_AUDIO] > 0) { + media_type = "audio/ogg"; + } else if (hdr_count[OGG_KATE] > 0 && hdr_count[OGG_OTHER] == 0) { + media_type = "application/kate"; + } else { + media_type = "application/ogg"; + } + + GST_INFO ("found %s (audio:%u, video:%u, annodex:%u, skeleton:%u, other:%u)", + media_type, hdr_count[OGG_AUDIO], hdr_count[OGG_VIDEO], + hdr_count[OGG_ANNODEX], hdr_count[OGG_SKELETON], hdr_count[OGG_OTHER]); + + gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM, media_type, NULL); } /*** audio/x-vorbis ***/ @@ -3970,6 +4350,66 @@ paris_type_find (GstTypeFind * tf, gpointer unused) } } +/*** audio/x-sbc ***/ +static GstStaticCaps sbc_caps = GST_STATIC_CAPS ("audio/x-sbc"); +#define SBC_CAPS (gst_static_caps_get(&sbc_caps)) + +static gsize +sbc_check_header (const guint8 * data, gsize len, guint * rate, + guint * channels) +{ + static const guint16 sbc_rates[4] = { 16000, 32000, 44100, 48000 }; + static const guint8 sbc_blocks[4] = { 4, 8, 12, 16 }; + guint n_blocks, ch_mode, n_subbands, bitpool; + + if (data[0] != 0x9C || len < 4) + return 0; + + n_blocks = sbc_blocks[(data[1] >> 4) & 0x03]; + ch_mode = (data[1] >> 2) & 0x03; + n_subbands = (data[1] & 0x01) ? 8 : 4; + bitpool = data[2]; + if (bitpool < 2) + return 0; + + *rate = sbc_rates[(data[1] >> 6) & 0x03]; + *channels = (ch_mode == 0) ? 1 : 2; + + if (ch_mode == 0) + return 4 + (n_subbands * 1) / 2 + (n_blocks * 1 * bitpool) / 8; + else if (ch_mode == 1) + return 4 + (n_subbands * 2) / 2 + (n_blocks * 2 * bitpool) / 8; + else if (ch_mode == 2) + return 4 + (n_subbands * 2) / 2 + (n_blocks * bitpool) / 8; + else if (ch_mode == 3) + return 4 + (n_subbands * 2) / 2 + (n_subbands + n_blocks * bitpool) / 8; + + return 0; +} + +static void +sbc_type_find (GstTypeFind * tf, gpointer unused) +{ + const guint8 *data; + gsize frame_len; + guint i, rate, channels, offset = 0; + + for (i = 0; i < 10; ++i) { + data = gst_type_find_peek (tf, offset, 8); + if (data == NULL) + return; + + frame_len = sbc_check_header (data, 8, &rate, &channels); + if (frame_len == 0) + return; + + offset += frame_len; + } + gst_type_find_suggest_simple (tf, GST_TYPE_FIND_POSSIBLE, "audio/x-sbc", + "rate", G_TYPE_INT, rate, "channels", G_TYPE_INT, channels, + "parsed", G_TYPE_BOOLEAN, FALSE, NULL); +} + /*** audio/iLBC-sh ***/ /* NOTE: do not replace this function with two TYPE_FIND_REGISTER_START_WITH */ static GstStaticCaps ilbc_caps = GST_STATIC_CAPS ("audio/iLBC-sh"); @@ -4302,6 +4742,119 @@ degas_type_find (GstTypeFind * tf, gpointer private) } } +/*** DVD ISO images (looks like H.264, see #674069) ***/ +static void +dvdiso_type_find (GstTypeFind * tf, gpointer private) +{ + /* 0x8000 bytes of zeros, then "\001CD001" */ + gint64 len; + const guint8 *data; + + len = gst_type_find_get_length (tf); + if (len < 0x8006) + return; + data = gst_type_find_peek (tf, 0, 0x8006); + if (G_UNLIKELY (data == NULL)) + return; + for (len = 0; len < 0x8000; len++) + if (data[len]) + return; + /* Can the '1' be anything else ? My three samples all have '1'. */ + if (memcmp (data + 0x8000, "\001CD001", 6)) + return; + + /* May need more inspection, we may be able to demux some of them */ + gst_type_find_suggest_simple (tf, GST_TYPE_FIND_LIKELY, + "application/octet-stream", NULL); +} + +/* SSA/ASS subtitles + * + * http://en.wikipedia.org/wiki/SubStation_Alpha + * http://matroska.org/technical/specs/subtitles/ssa.html + */ +static void +ssa_type_find (GstTypeFind * tf, gpointer private) +{ + const gchar *start, *end, *ver_str, *media_type = NULL; + const guint8 *data; + gchar *str, *script_type, *p = NULL; + gint64 len; + + data = gst_type_find_peek (tf, 0, 32); + + if (data == NULL) + return; + + /* there might be a BOM at the beginning */ + if (memcmp (data, "[Script Info]", 13) != 0 && + memcmp (data + 2, "[Script Info]", 13) != 0 && + memcmp (data + 3, "[Script Info]", 13) != 0 && + memcmp (data + 4, "[Script Info]", 13) != 0) { + return; + } + + /* now check if we have SSA or ASS */ + len = gst_type_find_get_length (tf); + if (len > 8192) + len = 8192; + + data = gst_type_find_peek (tf, 0, len); + if (data == NULL) + return; + + /* skip BOM */ + start = (gchar *) memchr (data, '[', 5); + g_assert (start); + len -= (start - (gchar *) data); + + /* ignore anything non-UTF8 for now, in future we might at least allow + * other UTF variants that are clearly prefixed with the appropriate BOM */ + if (!g_utf8_validate (start, len, &end) && (len - (end - start)) > 6) { + GST_FIXME ("non-UTF8 SSA/ASS file"); + return; + } + + /* something at start, but not a UTF-8 BOM? */ + if (data[0] != '[' && (data[0] != 0xEF || data[1] != 0xBB || data[2] != 0xBF)) + return; + + /* ignore any partial UTF-8 characters at the end */ + len = end - start; + + /* create a NUL-terminated string so it's easier to process it safely */ + str = g_strndup (start, len - 1); + script_type = strstr (str, "ScriptType:"); + if (script_type != NULL) { + gdouble version; + + ver_str = script_type + 11; + while (*ver_str == ' ' || *ver_str == 'v' || *ver_str == 'V') + ++ver_str; + version = g_ascii_strtod (ver_str, &p); + if (version == 4.0 && p != NULL && *p == '+') + media_type = "application/x-ass"; + else if (version >= 1.0 && version <= 4.0) + media_type = "application/x-ssa"; + } + + if (media_type == NULL) { + if (strstr (str, "[v4+ Styles]") || strstr (str, "[V4+ Styles]")) + media_type = "application/x-ass"; + else if (strstr (str, "[v4 Styles]") || strstr (str, "[V4 Styles]")) + media_type = "application/x-ssa"; + } + + if (media_type != NULL) { + gst_type_find_suggest_simple (tf, GST_TYPE_FIND_MAXIMUM, + media_type, "parsed", G_TYPE_BOOLEAN, FALSE, NULL); + } else { + GST_WARNING ("could not detect SSA/ASS variant"); + } + + g_free (str); +} + /*** generic typefind for streams that have some data at a specific position***/ typedef struct { @@ -4455,7 +5008,8 @@ plugin_init (GstPlugin * plugin) TYPE_FIND_REGISTER (plugin, "video/mpegts", GST_RANK_PRIMARY, mpeg_ts_type_find, "ts,mts", MPEGTS_CAPS, NULL, NULL); TYPE_FIND_REGISTER (plugin, "application/ogg", GST_RANK_PRIMARY, - ogganx_type_find, "anx,ogg,ogm", OGGANX_CAPS, NULL, NULL); + ogganx_type_find, "ogg,oga,ogv,ogm,ogx,spx,anx,axa,axv", OGG_CAPS, + NULL, NULL); TYPE_FIND_REGISTER (plugin, "video/mpeg-elementary", GST_RANK_MARGINAL, mpeg_video_stream_type_find, "mpv,mpeg,mpg", MPEG_VIDEO_CAPS, NULL, NULL); TYPE_FIND_REGISTER (plugin, "video/mpeg4", GST_RANK_PRIMARY, @@ -4543,9 +5097,7 @@ plugin_init (GstPlugin * plugin) TYPE_FIND_REGISTER (plugin, "image/x-portable-pixmap", GST_RANK_SECONDARY, pnm_type_find, "pnm,ppm,pgm,pbm", PNM_CAPS, NULL, NULL); TYPE_FIND_REGISTER (plugin, "video/x-matroska", GST_RANK_PRIMARY, - matroska_type_find, "mkv,mka", MATROSKA_CAPS, NULL, NULL); - TYPE_FIND_REGISTER (plugin, "video/webm", GST_RANK_PRIMARY, webm_type_find, - "webm", WEBM_CAPS, NULL, NULL); + matroska_type_find, "mkv,mka,mk3d,webm", MATROSKA_CAPS, NULL, NULL); TYPE_FIND_REGISTER (plugin, "application/mxf", GST_RANK_PRIMARY, mxf_type_find, "mxf", MXF_CAPS, NULL, NULL); TYPE_FIND_REGISTER_START_WITH (plugin, "video/x-mve", GST_RANK_SECONDARY, @@ -4559,6 +5111,8 @@ plugin_init (GstPlugin * plugin) "amr", "#!AMR-WB", 7, GST_TYPE_FIND_MAXIMUM); TYPE_FIND_REGISTER (plugin, "audio/iLBC-sh", GST_RANK_PRIMARY, ilbc_type_find, "ilbc", ILBC_CAPS, NULL, NULL); + TYPE_FIND_REGISTER (plugin, "audio/x-sbc", GST_RANK_MARGINAL, sbc_type_find, + "sbc", SBC_CAPS, NULL, NULL); TYPE_FIND_REGISTER_START_WITH (plugin, "audio/x-sid", GST_RANK_MARGINAL, "sid", "PSID", 4, GST_TYPE_FIND_MAXIMUM); TYPE_FIND_REGISTER_START_WITH (plugin, "image/x-xcf", GST_RANK_SECONDARY, @@ -4674,12 +5228,17 @@ plugin_init (GstPlugin * plugin) TYPE_FIND_REGISTER (plugin, "image/x-degas", GST_RANK_MARGINAL, degas_type_find, NULL, NULL, NULL, NULL); + TYPE_FIND_REGISTER (plugin, "application/octet-stream", GST_RANK_MARGINAL, + dvdiso_type_find, NULL, NULL, NULL, NULL); + + TYPE_FIND_REGISTER (plugin, "application/x-ssa", GST_RANK_SECONDARY, + ssa_type_find, "ssa,ass", NULL, NULL, NULL); return TRUE; } GST_PLUGIN_DEFINE (GST_VERSION_MAJOR, GST_VERSION_MINOR, - "typefindfunctions", + typefindfunctions, "default typefind functions", plugin_init, VERSION, GST_LICENSE, GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN)