2 * (c) 2003 Ronald Bultje <rbultje@ronald.bitfreak.net>
4 * ebml-read.c: read EBML data from file/stream
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
28 #include "ebml-read.h"
33 GST_DEBUG_CATEGORY_STATIC (ebmlread_debug);
34 #define GST_CAT_DEFAULT ebmlread_debug
36 static void gst_ebml_read_class_init (GstEbmlReadClass * klass);
38 static void gst_ebml_read_init (GstEbmlRead * ebml);
40 static GstStateChangeReturn gst_ebml_read_change_state (GstElement * element,
41 GstStateChange transition);
43 /* convenience functions */
44 static GstFlowReturn gst_ebml_read_peek_bytes (GstEbmlRead * ebml, guint size,
45 GstBuffer ** p_buf, guint8 ** bytes);
46 static GstFlowReturn gst_ebml_read_pull_bytes (GstEbmlRead * ebml, guint size,
47 GstBuffer ** p_buf, guint8 ** bytes);
50 static GstElementClass *parent_class; /* NULL */
53 gst_ebml_read_get_type (void)
55 static GType gst_ebml_read_type; /* 0 */
57 if (!gst_ebml_read_type) {
58 static const GTypeInfo gst_ebml_read_info = {
59 sizeof (GstEbmlReadClass),
62 (GClassInitFunc) gst_ebml_read_class_init,
67 (GInstanceInitFunc) gst_ebml_read_init,
71 g_type_register_static (GST_TYPE_ELEMENT, "GstEbmlRead",
72 &gst_ebml_read_info, 0);
75 return gst_ebml_read_type;
79 gst_ebml_level_free (GstEbmlLevel * level)
81 g_slice_free (GstEbmlLevel, level);
85 gst_ebml_finalize (GObject * obj)
87 GstEbmlRead *ebml = GST_EBML_READ (obj);
89 g_list_foreach (ebml->level, (GFunc) gst_ebml_level_free, NULL);
90 g_list_free (ebml->level);
92 if (ebml->cached_buffer) {
93 gst_buffer_unref (ebml->cached_buffer);
94 ebml->cached_buffer = NULL;
97 G_OBJECT_CLASS (parent_class)->finalize (obj);
101 gst_ebml_read_class_init (GstEbmlReadClass * klass)
103 GstElementClass *gstelement_class = (GstElementClass *) klass;
105 GObjectClass *gobject_class = (GObjectClass *) klass;
107 parent_class = g_type_class_peek_parent (klass);
109 GST_DEBUG_CATEGORY_INIT (ebmlread_debug, "ebmlread",
110 0, "EBML stream helper class");
112 gobject_class->finalize = gst_ebml_finalize;
114 gstelement_class->change_state =
115 GST_DEBUG_FUNCPTR (gst_ebml_read_change_state);
119 gst_ebml_read_init (GstEbmlRead * ebml)
121 ebml->sinkpad = NULL;
125 static GstStateChangeReturn
126 gst_ebml_read_change_state (GstElement * element, GstStateChange transition)
128 GstStateChangeReturn ret;
130 GstEbmlRead *ebml = GST_EBML_READ (element);
132 switch (transition) {
133 case GST_STATE_CHANGE_READY_TO_PAUSED:
134 if (!ebml->sinkpad) {
135 g_return_val_if_reached (GST_STATE_CHANGE_FAILURE);
142 ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
144 switch (transition) {
145 case GST_STATE_CHANGE_PAUSED_TO_READY:
147 g_list_foreach (ebml->level, (GFunc) gst_ebml_level_free, NULL);
148 g_list_free (ebml->level);
150 if (ebml->cached_buffer) {
151 gst_buffer_unref (ebml->cached_buffer);
152 ebml->cached_buffer = NULL;
165 * Return: the amount of levels in the hierarchy that the
166 * current element lies higher than the previous one.
167 * The opposite isn't done - that's auto-done using master
172 gst_ebml_read_element_level_up (GstEbmlRead * ebml)
176 guint64 pos = ebml->offset;
178 while (ebml->level != NULL) {
179 GstEbmlLevel *level = ebml->level->data;
181 if (pos >= level->start + level->length) {
182 ebml->level = g_list_delete_link (ebml->level, ebml->level);
183 gst_ebml_level_free (level);
194 * Calls pull_range for (offset,size) without advancing our offset
197 gst_ebml_read_peek_bytes (GstEbmlRead * ebml, guint size, GstBuffer ** p_buf,
202 /* Caching here actually makes much less difference than one would expect.
203 * We do it mainly to avoid pulling buffers of 1 byte all the time */
204 if (ebml->cached_buffer) {
205 guint64 cache_offset = GST_BUFFER_OFFSET (ebml->cached_buffer);
207 guint cache_size = GST_BUFFER_SIZE (ebml->cached_buffer);
209 if (cache_offset <= ebml->offset &&
210 (ebml->offset + size) < (cache_offset + cache_size)) {
212 *p_buf = gst_buffer_create_sub (ebml->cached_buffer,
213 ebml->offset - cache_offset, size);
216 GST_BUFFER_DATA (ebml->cached_buffer) + ebml->offset - cache_offset;
219 /* not enough data in the cache, free cache and get a new one */
220 gst_buffer_unref (ebml->cached_buffer);
221 ebml->cached_buffer = NULL;
224 /* refill the cache */
225 ret = gst_pad_pull_range (ebml->sinkpad, ebml->offset, MAX (size, 64 * 1024),
226 &ebml->cached_buffer);
227 if (ret != GST_FLOW_OK) {
228 ebml->cached_buffer = NULL;
232 if (GST_BUFFER_SIZE (ebml->cached_buffer) >= size) {
234 *p_buf = gst_buffer_create_sub (ebml->cached_buffer, 0, size);
236 *bytes = GST_BUFFER_DATA (ebml->cached_buffer);
240 /* Not possible to get enough data, try a last time with
241 * requesting exactly the size we need */
242 gst_buffer_unref (ebml->cached_buffer);
243 ebml->cached_buffer = NULL;
246 gst_pad_pull_range (ebml->sinkpad, ebml->offset, size,
247 &ebml->cached_buffer);
248 if (ret != GST_FLOW_OK) {
249 GST_DEBUG ("pull_range returned %d", ret);
257 if (GST_BUFFER_SIZE (ebml->cached_buffer) < size) {
258 GST_WARNING_OBJECT (ebml, "Dropping short buffer at offset %"
259 G_GUINT64_FORMAT ": wanted %u bytes, got %u bytes", ebml->offset,
260 size, GST_BUFFER_SIZE (ebml->cached_buffer));
262 gst_buffer_unref (ebml->cached_buffer);
263 ebml->cached_buffer = NULL;
268 return GST_FLOW_ERROR;
272 *p_buf = gst_buffer_create_sub (ebml->cached_buffer, 0, size);
274 *bytes = GST_BUFFER_DATA (*p_buf);
280 * Calls pull_range for (offset,size) and advances our offset by size
283 gst_ebml_read_pull_bytes (GstEbmlRead * ebml, guint size, GstBuffer ** p_buf,
288 ret = gst_ebml_read_peek_bytes (ebml, size, p_buf, bytes);
289 if (ret != GST_FLOW_OK)
292 ebml->offset += size;
297 * Read: the element content data ID.
298 * Return: FALSE on error.
302 gst_ebml_read_element_id (GstEbmlRead * ebml, guint32 * id, guint * level_up)
306 gint len_mask = 0x80, read = 1, n = 1;
314 ret = gst_ebml_read_peek_bytes (ebml, 1, NULL, &buf);
315 if (ret != GST_FLOW_OK)
318 b = GST_READ_UINT8 (buf);
322 while (read <= 4 && !(total & len_mask)) {
327 guint64 pos = ebml->offset;
329 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
330 ("Invalid EBML ID size tag (0x%x) at position %" G_GUINT64_FORMAT
331 " (0x%" G_GINT64_MODIFIER "x)", (guint) b, pos, pos));
332 return GST_FLOW_ERROR;
335 ret = gst_ebml_read_peek_bytes (ebml, read, NULL, &buf);
336 if (ret != GST_FLOW_OK)
340 b = GST_READ_UINT8 (buf + n);
341 total = (total << 8) | b;
349 *level_up = gst_ebml_read_element_level_up (ebml);
351 ebml->offset += read;
356 * Read: element content length.
357 * Return: the number of bytes read or -1 on error.
361 gst_ebml_read_element_length (GstEbmlRead * ebml, guint64 * length,
368 gint len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
374 ret = gst_ebml_read_peek_bytes (ebml, 1, NULL, &buf);
375 if (ret != GST_FLOW_OK)
378 b = GST_READ_UINT8 (buf);
382 while (read <= 8 && !(total & len_mask)) {
387 guint64 pos = ebml->offset;
389 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
390 ("Invalid EBML length size tag (0x%x) at position %" G_GUINT64_FORMAT
391 " (0x%" G_GINT64_MODIFIER "x)", (guint) b, pos, pos));
392 return GST_FLOW_ERROR;
395 if ((total &= (len_mask - 1)) == len_mask - 1)
398 ret = gst_ebml_read_peek_bytes (ebml, read, NULL, &buf);
399 if (ret != GST_FLOW_OK)
403 guint8 b = GST_READ_UINT8 (buf + n);
407 total = (total << 8) | b;
412 *length = G_MAXUINT64;
419 ebml->offset += read;
425 * Return: the ID of the next element.
426 * Level_up contains the amount of levels that this
427 * next element lies higher than the previous one.
431 gst_ebml_peek_id (GstEbmlRead * ebml, guint * level_up, guint32 * id)
435 guint level_up_tmp = 0;
445 off = ebml->offset; /* save offset */
447 if ((ret = gst_ebml_read_element_id (ebml, id, &level_up_tmp)) != GST_FLOW_OK)
450 ebml->offset = off; /* restore offset */
452 *level_up += level_up_tmp;
456 case GST_EBML_ID_VOID:
457 GST_DEBUG_OBJECT (ebml, "Skipping EBML Void element");
458 if ((ret = gst_ebml_read_skip (ebml)) != GST_FLOW_OK)
462 case GST_EBML_ID_CRC32:
463 GST_DEBUG_OBJECT (ebml, "Skipping EBML CRC32 element");
464 if ((ret = gst_ebml_read_skip (ebml)) != GST_FLOW_OK)
474 * Return the length of the stream in bytes
478 gst_ebml_read_get_length (GstEbmlRead * ebml)
480 GstFormat fmt = GST_FORMAT_BYTES;
484 /* FIXME: what to do if we don't get the upstream length */
485 if (!gst_pad_query_peer_duration (ebml->sinkpad, &fmt, &end) ||
486 fmt != GST_FORMAT_BYTES || end < 0)
487 g_return_val_if_reached (0);
493 * Seek to a given offset.
497 gst_ebml_read_seek (GstEbmlRead * ebml, guint64 offset)
499 if (offset >= gst_ebml_read_get_length (ebml))
500 return GST_FLOW_UNEXPECTED;
502 ebml->offset = offset;
508 * Skip the next element.
512 gst_ebml_read_skip (GstEbmlRead * ebml)
520 ret = gst_ebml_read_element_id (ebml, &id, NULL);
521 if (ret != GST_FLOW_OK)
524 ret = gst_ebml_read_element_length (ebml, &length, NULL);
525 if (ret != GST_FLOW_OK)
528 ebml->offset += length;
533 * Read the next element as a GstBuffer (binary).
537 gst_ebml_read_buffer (GstEbmlRead * ebml, guint32 * id, GstBuffer ** buf)
543 ret = gst_ebml_read_element_id (ebml, id, NULL);
544 if (ret != GST_FLOW_OK)
547 ret = gst_ebml_read_element_length (ebml, &length, NULL);
548 if (ret != GST_FLOW_OK)
552 *buf = gst_buffer_new ();
557 ret = gst_ebml_read_pull_bytes (ebml, (guint) length, buf, NULL);
563 * Read the next element, return a pointer to it and its size.
567 gst_ebml_read_bytes (GstEbmlRead * ebml, guint32 * id, guint8 ** data,
576 ret = gst_ebml_read_element_id (ebml, id, NULL);
577 if (ret != GST_FLOW_OK)
580 ret = gst_ebml_read_element_length (ebml, &length, NULL);
581 if (ret != GST_FLOW_OK)
590 ret = gst_ebml_read_pull_bytes (ebml, (guint) length, NULL, data);
591 if (ret != GST_FLOW_OK)
594 *size = (guint) length;
600 * Read the next element as an unsigned int.
604 gst_ebml_read_uint (GstEbmlRead * ebml, guint32 * id, guint64 * num)
612 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
613 if (ret != GST_FLOW_OK)
616 if (size < 1 || size > 8) {
617 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
618 ("Invalid integer element size %d at position %" G_GUINT64_FORMAT
619 " (0x%" G_GINT64_MODIFIER "x)",
620 size, ebml->offset - size, ebml->offset - size));
621 return GST_FLOW_ERROR;
625 *num = (*num << 8) | *data;
634 * Read the next element as a signed int.
638 gst_ebml_read_sint (GstEbmlRead * ebml, guint32 * id, gint64 * num)
644 gboolean negative = 0;
648 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
649 if (ret != GST_FLOW_OK)
652 if (size < 1 || size > 8) {
653 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
654 ("Invalid integer element size %d at position %" G_GUINT64_FORMAT
655 " (0x%" G_GINT64_MODIFIER "x)", size, ebml->offset - size,
656 ebml->offset - size));
657 return GST_FLOW_ERROR;
663 *num = *data & ~0x80;
669 *num = (*num << 8) | *data;
682 /* Convert 80 bit extended precision float in big endian format to double.
683 * Code taken from libavutil/intfloat_readwrite.c from ffmpeg,
684 * licensed under LGPL */
693 _ext2dbl (guint8 * data)
695 struct _ext_float *ext = (struct _ext_float *) data;
701 for (i = 0; i < 8; i++)
702 m = (m << 8) + ext->mantissa[i];
703 e = (((gint) ext->exponent[0] & 0x7f) << 8) | ext->exponent[1];
704 if (e == 0x7fff && m)
706 e -= 16383 + 63; /* In IEEE 80 bits, the whole (i.e. 1.xxxx)
707 * mantissa bit is written as opposed to the
708 * single and double precision formats */
709 if (ext->exponent[0] & 0x80)
715 * Read the next element as a float.
719 gst_ebml_read_float (GstEbmlRead * ebml, guint32 * id, gdouble * num)
727 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
728 if (ret != GST_FLOW_OK)
731 if (size != 4 && size != 8 && size != 10) {
732 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
733 ("Invalid float element size %d at position %" G_GUINT64_FORMAT
734 " (0x%" G_GINT64_MODIFIER "x)", size, ebml->offset - size,
735 ebml->offset - size));
736 return GST_FLOW_ERROR;
742 #if (G_BYTE_ORDER == G_BIG_ENDIAN)
743 f = *(gfloat *) data;
746 ((guint8 *) & f)[size - 1] = data[4 - size];
752 } else if (size == 8) {
755 #if (G_BYTE_ORDER == G_BIG_ENDIAN)
756 d = *(gdouble *) data;
759 ((guint8 *) & d)[size - 1] = data[8 - size];
766 *num = _ext2dbl (data);
773 * Read the next element as an ASCII string.
777 gst_ebml_read_ascii (GstEbmlRead * ebml, guint32 * id, gchar ** str)
785 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
786 if (ret != GST_FLOW_OK)
789 *str = g_malloc (size + 1);
790 memcpy (*str, data, size);
797 * Read the next element as a UTF-8 string.
801 gst_ebml_read_utf8 (GstEbmlRead * ebml, guint32 * id, gchar ** str)
805 #ifndef GST_DISABLE_GST_DEBUG
806 guint64 oldoff = ebml->offset;
809 ret = gst_ebml_read_ascii (ebml, id, str);
810 if (ret != GST_FLOW_OK)
813 if (str != NULL && *str != NULL && **str != '\0' &&
814 !g_utf8_validate (*str, -1, NULL)) {
815 GST_WARNING ("Invalid UTF-8 string at offset %" G_GUINT64_FORMAT, oldoff);
822 * Read the next element as a date.
823 * Returns the seconds since the unix epoch.
827 gst_ebml_read_date (GstEbmlRead * ebml, guint32 * id, gint64 * date)
833 ret = gst_ebml_read_sint (ebml, id, &ebml_date);
834 if (ret != GST_FLOW_OK)
837 *date = (ebml_date / GST_SECOND) + GST_EBML_DATE_OFFSET;
843 * Read the next element, but only the header. The contents
844 * are supposed to be sub-elements which can be read separately.
848 gst_ebml_read_master (GstEbmlRead * ebml, guint32 * id)
856 ret = gst_ebml_read_element_id (ebml, id, NULL);
857 if (ret != GST_FLOW_OK)
860 ret = gst_ebml_read_element_length (ebml, &length, NULL);
861 if (ret != GST_FLOW_OK)
865 level = g_slice_new (GstEbmlLevel);
866 level->start = ebml->offset;
867 level->length = length;
868 ebml->level = g_list_prepend (ebml->level, level);
874 * Read the next element as binary data.
878 gst_ebml_read_binary (GstEbmlRead * ebml,
879 guint32 * id, guint8 ** binary, guint64 * length)
887 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
888 if (ret != GST_FLOW_OK)
892 *binary = g_memdup (data, size);
898 * Read an EBML header.
902 gst_ebml_read_header (GstEbmlRead * ebml, gchar ** doctype, guint * version)
904 /* this function is the first to be called */
917 ret = gst_ebml_peek_id (ebml, &level_up, &id);
918 if (ret != GST_FLOW_OK)
921 GST_DEBUG_OBJECT (ebml, "id: %08x", GST_READ_UINT32_BE (&id));
923 if (level_up != 0 || id != GST_EBML_ID_HEADER) {
924 GST_ELEMENT_ERROR (ebml, STREAM, WRONG_TYPE, (NULL), (NULL));
925 return GST_FLOW_ERROR;
927 ret = gst_ebml_read_master (ebml, &id);
928 if (ret != GST_FLOW_OK)
932 ret = gst_ebml_peek_id (ebml, &level_up, &id);
933 if (ret != GST_FLOW_OK)
941 /* is our read version uptodate? */
942 case GST_EBML_ID_EBMLREADVERSION:{
945 ret = gst_ebml_read_uint (ebml, &id, &num);
946 if (ret != GST_FLOW_OK)
948 g_assert (id == GST_EBML_ID_EBMLREADVERSION);
949 if (num != GST_EBML_VERSION) {
950 GST_ELEMENT_ERROR (ebml, STREAM, WRONG_TYPE, (NULL), (NULL));
951 return GST_FLOW_ERROR;
956 /* we only handle 8 byte lengths at max */
957 case GST_EBML_ID_EBMLMAXSIZELENGTH:{
960 ret = gst_ebml_read_uint (ebml, &id, &num);
961 if (ret != GST_FLOW_OK)
963 g_assert (id == GST_EBML_ID_EBMLMAXSIZELENGTH);
964 if (num > sizeof (guint64)) {
965 GST_ELEMENT_ERROR (ebml, STREAM, WRONG_TYPE, (NULL), (NULL));
966 return GST_FLOW_ERROR;
971 /* we handle 4 byte IDs at max */
972 case GST_EBML_ID_EBMLMAXIDLENGTH:{
975 ret = gst_ebml_read_uint (ebml, &id, &num);
976 if (ret != GST_FLOW_OK)
978 g_assert (id == GST_EBML_ID_EBMLMAXIDLENGTH);
979 if (num > sizeof (guint32)) {
980 GST_ELEMENT_ERROR (ebml, STREAM, WRONG_TYPE, (NULL), (NULL));
981 return GST_FLOW_ERROR;
986 case GST_EBML_ID_DOCTYPE:{
989 ret = gst_ebml_read_ascii (ebml, &id, &text);
990 if (ret != GST_FLOW_OK)
992 g_assert (id == GST_EBML_ID_DOCTYPE);
1001 case GST_EBML_ID_DOCTYPEREADVERSION:{
1004 ret = gst_ebml_read_uint (ebml, &id, &num);
1005 if (ret != GST_FLOW_OK)
1007 g_assert (id == GST_EBML_ID_DOCTYPEREADVERSION);
1014 GST_WARNING ("Unknown data type 0x%x in EBML header (ignored)", id);
1017 /* we ignore these two, as they don't tell us anything we care about */
1018 case GST_EBML_ID_EBMLVERSION:
1019 case GST_EBML_ID_DOCTYPEVERSION:
1020 ret = gst_ebml_read_skip (ebml);
1021 if (ret != GST_FLOW_OK)