2 * (c) 2003 Ronald Bultje <rbultje@ronald.bitfreak.net>
4 * ebml-read.c: read EBML data from file/stream
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
28 #include "ebml-read.h"
33 GST_DEBUG_CATEGORY_STATIC (ebmlread_debug);
34 #define GST_CAT_DEFAULT ebmlread_debug
36 static void gst_ebml_read_class_init (GstEbmlReadClass * klass);
37 static void gst_ebml_read_init (GstEbmlRead * ebml);
38 static GstStateChangeReturn gst_ebml_read_change_state (GstElement * element,
39 GstStateChange transition);
41 /* convenience functions */
42 static gboolean gst_ebml_read_peek_bytes (GstEbmlRead * ebml, guint size,
43 GstBuffer ** p_buf, guint8 ** bytes);
44 static gboolean gst_ebml_read_pull_bytes (GstEbmlRead * ebml, guint size,
45 GstBuffer ** p_buf, guint8 ** bytes);
48 static GstElementClass *parent_class; /* NULL */
51 gst_ebml_read_get_type (void)
53 static GType gst_ebml_read_type; /* 0 */
55 if (!gst_ebml_read_type) {
56 static const GTypeInfo gst_ebml_read_info = {
57 sizeof (GstEbmlReadClass),
60 (GClassInitFunc) gst_ebml_read_class_init,
65 (GInstanceInitFunc) gst_ebml_read_init,
69 g_type_register_static (GST_TYPE_ELEMENT, "GstEbmlRead",
70 &gst_ebml_read_info, 0);
73 return gst_ebml_read_type;
77 gst_ebml_finalize (GObject * obj)
79 GstEbmlRead *ebml = GST_EBML_READ (obj);
81 g_list_foreach (ebml->level, (GFunc) g_free, NULL);
82 g_list_free (ebml->level);
84 if (ebml->cached_buffer) {
85 gst_buffer_unref (ebml->cached_buffer);
86 ebml->cached_buffer = NULL;
89 G_OBJECT_CLASS (parent_class)->finalize (obj);
93 gst_ebml_read_class_init (GstEbmlReadClass * klass)
95 GstElementClass *gstelement_class = (GstElementClass *) klass;
96 GObjectClass *gobject_class = (GObjectClass *) klass;
98 parent_class = g_type_class_peek_parent (klass);
100 GST_DEBUG_CATEGORY_INIT (ebmlread_debug, "ebmlread",
101 0, "EBML stream helper class");
103 gobject_class->finalize = gst_ebml_finalize;
105 gstelement_class->change_state =
106 GST_DEBUG_FUNCPTR (gst_ebml_read_change_state);
110 gst_ebml_read_init (GstEbmlRead * ebml)
112 ebml->sinkpad = NULL;
116 static GstStateChangeReturn
117 gst_ebml_read_change_state (GstElement * element, GstStateChange transition)
119 GstStateChangeReturn ret;
120 GstEbmlRead *ebml = GST_EBML_READ (element);
122 switch (transition) {
123 case GST_STATE_CHANGE_READY_TO_PAUSED:
124 if (!ebml->sinkpad) {
125 g_return_val_if_reached (GST_STATE_CHANGE_FAILURE);
132 ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
134 switch (transition) {
135 case GST_STATE_CHANGE_PAUSED_TO_READY:
137 g_list_foreach (ebml->level, (GFunc) g_free, NULL);
138 g_list_free (ebml->level);
140 if (ebml->cached_buffer) {
141 gst_buffer_unref (ebml->cached_buffer);
142 ebml->cached_buffer = NULL;
155 * Return: the amount of levels in the hierarchy that the
156 * current element lies higher than the previous one.
157 * The opposite isn't done - that's auto-done using master
162 gst_ebml_read_element_level_up (GstEbmlRead * ebml)
165 guint64 pos = ebml->offset;
167 while (ebml->level != NULL) {
168 GList *last = g_list_last (ebml->level);
169 GstEbmlLevel *level = last->data;
171 if (pos >= level->start + level->length) {
172 ebml->level = g_list_remove (ebml->level, level);
184 * Calls pull_range for (offset,size) without advancing our offset
187 gst_ebml_read_peek_bytes (GstEbmlRead * ebml, guint size, GstBuffer ** p_buf,
192 /* Caching here actually makes much less difference than one would expect.
193 * We do it mainly to avoid pulling buffers of 1 byte all the time */
194 if (ebml->cached_buffer) {
195 guint64 cache_offset = GST_BUFFER_OFFSET (ebml->cached_buffer);
196 guint cache_size = GST_BUFFER_SIZE (ebml->cached_buffer);
198 if (cache_offset <= ebml->offset &&
199 (ebml->offset + size) < (cache_offset + cache_size)) {
201 *p_buf = gst_buffer_create_sub (ebml->cached_buffer,
202 ebml->offset - cache_offset, size);
205 GST_BUFFER_DATA (ebml->cached_buffer) + ebml->offset - cache_offset;
208 /* not enough data in the cache, free cache and get a new one */
209 gst_buffer_unref (ebml->cached_buffer);
210 ebml->cached_buffer = NULL;
213 /* refill the cache */
214 ret = gst_pad_pull_range (ebml->sinkpad, ebml->offset, MAX (size, 64 * 1024),
215 &ebml->cached_buffer);
216 if (ret != GST_FLOW_OK) {
217 ebml->cached_buffer = NULL;
221 if (GST_BUFFER_SIZE (ebml->cached_buffer) >= size) {
223 *p_buf = gst_buffer_create_sub (ebml->cached_buffer, 0, size);
225 *bytes = GST_BUFFER_DATA (ebml->cached_buffer);
229 /* Not possible to get enough data, try a last time with
230 * requesting exactly the size we need */
231 gst_buffer_unref (ebml->cached_buffer);
232 ebml->cached_buffer = NULL;
235 gst_pad_pull_range (ebml->sinkpad, ebml->offset, size,
236 &ebml->cached_buffer);
237 if (ret != GST_FLOW_OK) {
238 GST_DEBUG ("pull_range returned %d", ret);
246 if (GST_BUFFER_SIZE (ebml->cached_buffer) < size) {
247 GST_WARNING_OBJECT (ebml, "Dropping short buffer at offset %"
248 G_GUINT64_FORMAT ": wanted %u bytes, got %u bytes", ebml->offset,
249 size, GST_BUFFER_SIZE (ebml->cached_buffer));
251 gst_buffer_unref (ebml->cached_buffer);
252 ebml->cached_buffer = NULL;
257 return GST_FLOW_ERROR;
261 *p_buf = gst_buffer_create_sub (ebml->cached_buffer, 0, size);
263 *bytes = GST_BUFFER_DATA (*p_buf);
269 * Calls pull_range for (offset,size) and advances our offset by size
272 gst_ebml_read_pull_bytes (GstEbmlRead * ebml, guint size, GstBuffer ** p_buf,
277 ret = gst_ebml_read_peek_bytes (ebml, size, p_buf, bytes);
278 if (ret != GST_FLOW_OK)
281 ebml->offset += size;
286 * Read: the element content data ID.
287 * Return: FALSE on error.
291 gst_ebml_read_element_id (GstEbmlRead * ebml, guint32 * id, guint * level_up)
294 gint len_mask = 0x80, read = 1, n = 1;
299 ret = gst_ebml_read_peek_bytes (ebml, 1, NULL, &buf);
300 if (ret != GST_FLOW_OK)
303 b = GST_READ_UINT8 (buf);
307 while (read <= 4 && !(total & len_mask)) {
312 guint64 pos = ebml->offset;
314 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
315 ("Invalid EBML ID size tag (0x%x) at position %" G_GUINT64_FORMAT
316 " (0x%" G_GINT64_MODIFIER "x)", (guint) b, pos, pos));
317 return GST_FLOW_ERROR;
320 ret = gst_ebml_read_peek_bytes (ebml, read, NULL, &buf);
321 if (ret != GST_FLOW_OK)
325 b = GST_READ_UINT8 (buf + n);
326 total = (total << 8) | b;
334 *level_up = gst_ebml_read_element_level_up (ebml);
336 ebml->offset += read;
341 * Read: element content length.
342 * Return: the number of bytes read or -1 on error.
346 gst_ebml_read_element_length (GstEbmlRead * ebml, guint64 * length,
351 gint len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
355 ret = gst_ebml_read_peek_bytes (ebml, 1, NULL, &buf);
356 if (ret != GST_FLOW_OK)
359 b = GST_READ_UINT8 (buf);
363 while (read <= 8 && !(total & len_mask)) {
368 guint64 pos = ebml->offset;
370 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
371 ("Invalid EBML length size tag (0x%x) at position %" G_GUINT64_FORMAT
372 " (0x%" G_GINT64_MODIFIER "x)", (guint) b, pos, pos));
373 return GST_FLOW_ERROR;
376 if ((total &= (len_mask - 1)) == len_mask - 1)
379 ret = gst_ebml_read_peek_bytes (ebml, read, NULL, &buf);
380 if (ret != GST_FLOW_OK)
384 guint8 b = GST_READ_UINT8 (buf + n);
388 total = (total << 8) | b;
393 *length = G_MAXUINT64;
400 ebml->offset += read;
406 * Return: the ID of the next element.
407 * Level_up contains the amount of levels that this
408 * next element lies higher than the previous one.
412 gst_ebml_peek_id (GstEbmlRead * ebml, guint * level_up, guint32 * id)
419 off = ebml->offset; /* save offset */
421 if ((ret = gst_ebml_read_element_id (ebml, id, level_up)) != GST_FLOW_OK)
424 ebml->offset = off; /* restore offset */
429 * Return the length of the stream in bytes
433 gst_ebml_read_get_length (GstEbmlRead * ebml)
435 GstFormat fmt = GST_FORMAT_BYTES;
438 /* FIXME: what to do if we don't get the upstream length */
439 if (!gst_pad_query_peer_duration (ebml->sinkpad, &fmt, &end) ||
440 fmt != GST_FORMAT_BYTES || end < 0)
441 g_return_val_if_reached (0);
447 * Seek to a given offset.
451 gst_ebml_read_seek (GstEbmlRead * ebml, guint64 offset)
453 if (offset >= gst_ebml_read_get_length (ebml))
456 ebml->offset = offset;
462 * Skip the next element.
466 gst_ebml_read_skip (GstEbmlRead * ebml)
472 ret = gst_ebml_read_element_id (ebml, &id, NULL);
473 if (ret != GST_FLOW_OK)
476 ret = gst_ebml_read_element_length (ebml, &length, NULL);
477 if (ret != GST_FLOW_OK)
480 ebml->offset += length;
485 * Read the next element as a GstBuffer (binary).
489 gst_ebml_read_buffer (GstEbmlRead * ebml, guint32 * id, GstBuffer ** buf)
494 ret = gst_ebml_read_element_id (ebml, id, NULL);
495 if (ret != GST_FLOW_OK)
498 ret = gst_ebml_read_element_length (ebml, &length, NULL);
499 if (ret != GST_FLOW_OK)
503 *buf = gst_buffer_new ();
508 ret = gst_ebml_read_pull_bytes (ebml, (guint) length, buf, NULL);
514 * Read the next element, return a pointer to it and its size.
518 gst_ebml_read_bytes (GstEbmlRead * ebml, guint32 * id, guint8 ** data,
526 ret = gst_ebml_read_element_id (ebml, id, NULL);
527 if (ret != GST_FLOW_OK)
530 ret = gst_ebml_read_element_length (ebml, &length, NULL);
531 if (ret != GST_FLOW_OK)
540 ret = gst_ebml_read_pull_bytes (ebml, (guint) length, NULL, data);
541 if (ret != GST_FLOW_OK)
544 *size = (guint) length;
550 * Read the next element as an unsigned int.
554 gst_ebml_read_uint (GstEbmlRead * ebml, guint32 * id, guint64 * num)
560 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
561 if (ret != GST_FLOW_OK)
564 if (size < 1 || size > 8) {
565 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
566 ("Invalid integer element size %d at position %" G_GUINT64_FORMAT
567 " (0x%" G_GINT64_MODIFIER "x)",
568 size, ebml->offset - size, ebml->offset - size));
569 return GST_FLOW_ERROR;
573 *num = (*num << 8) | *data;
582 * Read the next element as a signed int.
586 gst_ebml_read_sint (GstEbmlRead * ebml, guint32 * id, gint64 * num)
590 gboolean negative = 0;
593 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
594 if (ret != GST_FLOW_OK)
597 if (size < 1 || size > 8) {
598 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
599 ("Invalid integer element size %d at position %" G_GUINT64_FORMAT
600 " (0x%" G_GINT64_MODIFIER "x)", size, ebml->offset - size,
601 ebml->offset - size));
602 return GST_FLOW_ERROR;
608 *num = *data & ~0x80;
614 *num = (*num << 8) | *data;
627 /* Convert 80 bit extended precision float in big endian format to double.
628 * Code taken from libavutil/intfloat_readwrite.c from ffmpeg,
629 * licensed under LGPL */
638 _ext2dbl (guint8 * data)
640 struct _ext_float *ext = (struct _ext_float *) data;
644 for (i = 0; i < 8; i++)
645 m = (m << 8) + ext->mantissa[i];
646 e = (((gint) ext->exponent[0] & 0x7f) << 8) | ext->exponent[1];
647 if (e == 0x7fff && m)
649 e -= 16383 + 63; /* In IEEE 80 bits, the whole (i.e. 1.xxxx)
650 * mantissa bit is written as opposed to the
651 * single and double precision formats */
652 if (ext->exponent[0] & 0x80)
658 * Read the next element as a float.
662 gst_ebml_read_float (GstEbmlRead * ebml, guint32 * id, gdouble * num)
668 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
669 if (ret != GST_FLOW_OK)
672 if (size != 4 && size != 8 && size != 10) {
673 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
674 ("Invalid float element size %d at position %" G_GUINT64_FORMAT
675 " (0x%" G_GINT64_MODIFIER "x)", size, ebml->offset - size,
676 ebml->offset - size));
677 return GST_FLOW_ERROR;
683 #if (G_BYTE_ORDER == G_BIG_ENDIAN)
684 f = *(gfloat *) data;
687 ((guint8 *) & f)[size - 1] = data[4 - size];
693 } else if (size == 8) {
696 #if (G_BYTE_ORDER == G_BIG_ENDIAN)
697 d = *(gdouble *) data;
700 ((guint8 *) & d)[size - 1] = data[8 - size];
707 *num = _ext2dbl (data);
714 * Read the next element as an ASCII string.
718 gst_ebml_read_ascii (GstEbmlRead * ebml, guint32 * id, gchar ** str)
724 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
725 if (ret != GST_FLOW_OK)
728 *str = g_malloc (size + 1);
729 memcpy (*str, data, size);
736 * Read the next element as a UTF-8 string.
740 gst_ebml_read_utf8 (GstEbmlRead * ebml, guint32 * id, gchar ** str)
744 #ifndef GST_DISABLE_GST_DEBUG
745 guint64 oldoff = ebml->offset;
748 ret = gst_ebml_read_ascii (ebml, id, str);
749 if (ret != GST_FLOW_OK)
752 if (str != NULL && *str != NULL && **str != '\0' &&
753 !g_utf8_validate (*str, -1, NULL)) {
754 GST_WARNING ("Invalid UTF-8 string at offset %" G_GUINT64_FORMAT, oldoff);
761 * Read the next element as a date.
762 * Returns the seconds since the unix epoch.
766 gst_ebml_read_date (GstEbmlRead * ebml, guint32 * id, gint64 * date)
771 ret = gst_ebml_read_sint (ebml, id, &ebml_date);
772 if (ret != GST_FLOW_OK)
775 *date = (ebml_date / GST_SECOND) + GST_EBML_DATE_OFFSET;
781 * Read the next element, but only the header. The contents
782 * are supposed to be sub-elements which can be read separately.
786 gst_ebml_read_master (GstEbmlRead * ebml, guint32 * id)
792 ret = gst_ebml_read_element_id (ebml, id, NULL);
793 if (ret != GST_FLOW_OK)
796 ret = gst_ebml_read_element_length (ebml, &length, NULL);
797 if (ret != GST_FLOW_OK)
801 level = g_new (GstEbmlLevel, 1);
802 level->start = ebml->offset;
803 level->length = length;
804 ebml->level = g_list_append (ebml->level, level);
810 * Read the next element as binary data.
814 gst_ebml_read_binary (GstEbmlRead * ebml,
815 guint32 * id, guint8 ** binary, guint64 * length)
821 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
822 if (ret != GST_FLOW_OK)
826 *binary = g_memdup (data, size);
832 * Read an EBML header.
836 gst_ebml_read_header (GstEbmlRead * ebml, gchar ** doctype, guint * version)
838 /* this function is the first to be called */
849 ret = gst_ebml_peek_id (ebml, &level_up, &id);
850 if (ret != GST_FLOW_OK)
853 GST_DEBUG_OBJECT (ebml, "id: %08x", GST_READ_UINT32_BE (&id));
855 if (level_up != 0 || id != GST_EBML_ID_HEADER) {
856 GST_ELEMENT_ERROR (ebml, STREAM, WRONG_TYPE, (NULL), (NULL));
857 return GST_FLOW_ERROR;
859 ret = gst_ebml_read_master (ebml, &id);
860 if (ret != GST_FLOW_OK)
864 ret = gst_ebml_peek_id (ebml, &level_up, &id);
865 if (ret != GST_FLOW_OK)
873 /* is our read version uptodate? */
874 case GST_EBML_ID_EBMLREADVERSION:{
877 ret = gst_ebml_read_uint (ebml, &id, &num);
878 if (ret != GST_FLOW_OK)
880 g_assert (id == GST_EBML_ID_EBMLREADVERSION);
881 if (num != GST_EBML_VERSION) {
882 GST_ELEMENT_ERROR (ebml, STREAM, WRONG_TYPE, (NULL), (NULL));
883 return GST_FLOW_ERROR;
888 /* we only handle 8 byte lengths at max */
889 case GST_EBML_ID_EBMLMAXSIZELENGTH:{
892 ret = gst_ebml_read_uint (ebml, &id, &num);
893 if (ret != GST_FLOW_OK)
895 g_assert (id == GST_EBML_ID_EBMLMAXSIZELENGTH);
896 if (num > sizeof (guint64)) {
897 GST_ELEMENT_ERROR (ebml, STREAM, WRONG_TYPE, (NULL), (NULL));
898 return GST_FLOW_ERROR;
903 /* we handle 4 byte IDs at max */
904 case GST_EBML_ID_EBMLMAXIDLENGTH:{
907 ret = gst_ebml_read_uint (ebml, &id, &num);
908 if (ret != GST_FLOW_OK)
910 g_assert (id == GST_EBML_ID_EBMLMAXIDLENGTH);
911 if (num > sizeof (guint32)) {
912 GST_ELEMENT_ERROR (ebml, STREAM, WRONG_TYPE, (NULL), (NULL));
913 return GST_FLOW_ERROR;
918 case GST_EBML_ID_DOCTYPE:{
921 ret = gst_ebml_read_ascii (ebml, &id, &text);
922 if (ret != GST_FLOW_OK)
924 g_assert (id == GST_EBML_ID_DOCTYPE);
933 case GST_EBML_ID_DOCTYPEREADVERSION:{
936 ret = gst_ebml_read_uint (ebml, &id, &num);
937 if (ret != GST_FLOW_OK)
939 g_assert (id == GST_EBML_ID_DOCTYPEREADVERSION);
946 GST_WARNING ("Unknown data type 0x%x in EBML header (ignored)", id);
949 /* we ignore these two, as they don't tell us anything we care about */
950 case GST_EBML_ID_VOID:
951 case GST_EBML_ID_EBMLVERSION:
952 case GST_EBML_ID_DOCTYPEVERSION:
953 ret = gst_ebml_read_skip (ebml);
954 if (ret != GST_FLOW_OK)