2 * (c) 2003 Ronald Bultje <rbultje@ronald.bitfreak.net>
4 * ebml-read.c: read EBML data from file/stream
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
27 #include <gst/floatcast/floatcast.h>
29 #include "ebml-read.h"
34 GST_DEBUG_CATEGORY_STATIC (ebmlread_debug);
35 #define GST_CAT_DEFAULT ebmlread_debug
37 static void gst_ebml_read_class_init (GstEbmlReadClass * klass);
39 static void gst_ebml_read_init (GstEbmlRead * ebml);
41 static GstStateChangeReturn gst_ebml_read_change_state (GstElement * element,
42 GstStateChange transition);
44 /* convenience functions */
45 static GstFlowReturn gst_ebml_read_peek_bytes (GstEbmlRead * ebml, guint size,
46 GstBuffer ** p_buf, guint8 ** bytes);
47 static GstFlowReturn gst_ebml_read_pull_bytes (GstEbmlRead * ebml, guint size,
48 GstBuffer ** p_buf, guint8 ** bytes);
51 static GstElementClass *parent_class; /* NULL */
54 gst_ebml_read_get_type (void)
56 static GType gst_ebml_read_type; /* 0 */
58 if (!gst_ebml_read_type) {
59 static const GTypeInfo gst_ebml_read_info = {
60 sizeof (GstEbmlReadClass),
63 (GClassInitFunc) gst_ebml_read_class_init,
68 (GInstanceInitFunc) gst_ebml_read_init,
72 g_type_register_static (GST_TYPE_ELEMENT, "GstEbmlRead",
73 &gst_ebml_read_info, 0);
76 return gst_ebml_read_type;
80 gst_ebml_level_free (GstEbmlLevel * level)
82 g_slice_free (GstEbmlLevel, level);
86 gst_ebml_finalize (GObject * obj)
88 GstEbmlRead *ebml = GST_EBML_READ (obj);
90 g_list_foreach (ebml->level, (GFunc) gst_ebml_level_free, NULL);
91 g_list_free (ebml->level);
93 if (ebml->cached_buffer) {
94 gst_buffer_unref (ebml->cached_buffer);
95 ebml->cached_buffer = NULL;
98 G_OBJECT_CLASS (parent_class)->finalize (obj);
102 gst_ebml_read_class_init (GstEbmlReadClass * klass)
104 GstElementClass *gstelement_class = (GstElementClass *) klass;
106 GObjectClass *gobject_class = (GObjectClass *) klass;
108 parent_class = g_type_class_peek_parent (klass);
110 GST_DEBUG_CATEGORY_INIT (ebmlread_debug, "ebmlread",
111 0, "EBML stream helper class");
113 gobject_class->finalize = gst_ebml_finalize;
115 gstelement_class->change_state =
116 GST_DEBUG_FUNCPTR (gst_ebml_read_change_state);
120 gst_ebml_read_init (GstEbmlRead * ebml)
122 ebml->sinkpad = NULL;
126 static GstStateChangeReturn
127 gst_ebml_read_change_state (GstElement * element, GstStateChange transition)
129 GstStateChangeReturn ret;
131 GstEbmlRead *ebml = GST_EBML_READ (element);
133 switch (transition) {
134 case GST_STATE_CHANGE_READY_TO_PAUSED:
135 if (!ebml->sinkpad) {
136 g_return_val_if_reached (GST_STATE_CHANGE_FAILURE);
143 ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
145 switch (transition) {
146 case GST_STATE_CHANGE_PAUSED_TO_READY:
148 g_list_foreach (ebml->level, (GFunc) gst_ebml_level_free, NULL);
149 g_list_free (ebml->level);
151 if (ebml->cached_buffer) {
152 gst_buffer_unref (ebml->cached_buffer);
153 ebml->cached_buffer = NULL;
166 * Return: the amount of levels in the hierarchy that the
167 * current element lies higher than the previous one.
168 * The opposite isn't done - that's auto-done using master
173 gst_ebml_read_element_level_up (GstEbmlRead * ebml)
177 guint64 pos = ebml->offset;
179 while (ebml->level != NULL) {
180 GstEbmlLevel *level = ebml->level->data;
182 if (pos >= level->start + level->length) {
183 ebml->level = g_list_delete_link (ebml->level, ebml->level);
184 gst_ebml_level_free (level);
195 * Calls pull_range for (offset,size) without advancing our offset
198 gst_ebml_read_peek_bytes (GstEbmlRead * ebml, guint size, GstBuffer ** p_buf,
203 /* Caching here actually makes much less difference than one would expect.
204 * We do it mainly to avoid pulling buffers of 1 byte all the time */
205 if (ebml->cached_buffer) {
206 guint64 cache_offset = GST_BUFFER_OFFSET (ebml->cached_buffer);
208 guint cache_size = GST_BUFFER_SIZE (ebml->cached_buffer);
210 if (cache_offset <= ebml->offset &&
211 (ebml->offset + size) < (cache_offset + cache_size)) {
213 *p_buf = gst_buffer_create_sub (ebml->cached_buffer,
214 ebml->offset - cache_offset, size);
217 GST_BUFFER_DATA (ebml->cached_buffer) + ebml->offset - cache_offset;
220 /* not enough data in the cache, free cache and get a new one */
221 gst_buffer_unref (ebml->cached_buffer);
222 ebml->cached_buffer = NULL;
225 /* refill the cache */
226 ret = gst_pad_pull_range (ebml->sinkpad, ebml->offset, MAX (size, 64 * 1024),
227 &ebml->cached_buffer);
228 if (ret != GST_FLOW_OK) {
229 ebml->cached_buffer = NULL;
233 if (GST_BUFFER_SIZE (ebml->cached_buffer) >= size) {
235 *p_buf = gst_buffer_create_sub (ebml->cached_buffer, 0, size);
237 *bytes = GST_BUFFER_DATA (ebml->cached_buffer);
241 /* Not possible to get enough data, try a last time with
242 * requesting exactly the size we need */
243 gst_buffer_unref (ebml->cached_buffer);
244 ebml->cached_buffer = NULL;
247 gst_pad_pull_range (ebml->sinkpad, ebml->offset, size,
248 &ebml->cached_buffer);
249 if (ret != GST_FLOW_OK) {
250 GST_DEBUG ("pull_range returned %d", ret);
258 if (GST_BUFFER_SIZE (ebml->cached_buffer) < size) {
259 GST_WARNING_OBJECT (ebml, "Dropping short buffer at offset %"
260 G_GUINT64_FORMAT ": wanted %u bytes, got %u bytes", ebml->offset,
261 size, GST_BUFFER_SIZE (ebml->cached_buffer));
263 gst_buffer_unref (ebml->cached_buffer);
264 ebml->cached_buffer = NULL;
269 return GST_FLOW_ERROR;
273 *p_buf = gst_buffer_create_sub (ebml->cached_buffer, 0, size);
275 *bytes = GST_BUFFER_DATA (*p_buf);
281 * Calls pull_range for (offset,size) and advances our offset by size
284 gst_ebml_read_pull_bytes (GstEbmlRead * ebml, guint size, GstBuffer ** p_buf,
289 ret = gst_ebml_read_peek_bytes (ebml, size, p_buf, bytes);
290 if (ret != GST_FLOW_OK)
293 ebml->offset += size;
298 * Read: the element content data ID.
299 * Return: FALSE on error.
303 gst_ebml_read_element_id (GstEbmlRead * ebml, guint32 * id, guint * level_up)
307 gint len_mask = 0x80, read = 1, n = 1;
315 ret = gst_ebml_read_peek_bytes (ebml, 1, NULL, &buf);
316 if (ret != GST_FLOW_OK)
319 b = GST_READ_UINT8 (buf);
323 while (read <= 4 && !(total & len_mask)) {
328 guint64 pos = ebml->offset;
330 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
331 ("Invalid EBML ID size tag (0x%x) at position %" G_GUINT64_FORMAT
332 " (0x%" G_GINT64_MODIFIER "x)", (guint) b, pos, pos));
333 return GST_FLOW_ERROR;
336 ret = gst_ebml_read_peek_bytes (ebml, read, NULL, &buf);
337 if (ret != GST_FLOW_OK)
341 b = GST_READ_UINT8 (buf + n);
342 total = (total << 8) | b;
350 *level_up = gst_ebml_read_element_level_up (ebml);
352 ebml->offset += read;
357 * Read: element content length.
358 * Return: the number of bytes read or -1 on error.
362 gst_ebml_read_element_length (GstEbmlRead * ebml, guint64 * length,
369 gint len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
375 ret = gst_ebml_read_peek_bytes (ebml, 1, NULL, &buf);
376 if (ret != GST_FLOW_OK)
379 b = GST_READ_UINT8 (buf);
383 while (read <= 8 && !(total & len_mask)) {
388 guint64 pos = ebml->offset;
390 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
391 ("Invalid EBML length size tag (0x%x) at position %" G_GUINT64_FORMAT
392 " (0x%" G_GINT64_MODIFIER "x)", (guint) b, pos, pos));
393 return GST_FLOW_ERROR;
396 if ((total &= (len_mask - 1)) == len_mask - 1)
399 ret = gst_ebml_read_peek_bytes (ebml, read, NULL, &buf);
400 if (ret != GST_FLOW_OK)
404 guint8 b = GST_READ_UINT8 (buf + n);
408 total = (total << 8) | b;
413 *length = G_MAXUINT64;
420 ebml->offset += read;
426 * Return: the ID of the next element.
427 * Level_up contains the amount of levels that this
428 * next element lies higher than the previous one.
432 gst_ebml_peek_id (GstEbmlRead * ebml, guint * level_up, guint32 * id)
436 guint level_up_tmp = 0;
446 off = ebml->offset; /* save offset */
448 if ((ret = gst_ebml_read_element_id (ebml, id, &level_up_tmp)) != GST_FLOW_OK)
451 ebml->offset = off; /* restore offset */
453 *level_up += level_up_tmp;
457 case GST_EBML_ID_VOID:
458 GST_DEBUG_OBJECT (ebml, "Skipping EBML Void element");
459 if ((ret = gst_ebml_read_skip (ebml)) != GST_FLOW_OK)
463 case GST_EBML_ID_CRC32:
464 GST_DEBUG_OBJECT (ebml, "Skipping EBML CRC32 element");
465 if ((ret = gst_ebml_read_skip (ebml)) != GST_FLOW_OK)
475 * Return the length of the stream in bytes
479 gst_ebml_read_get_length (GstEbmlRead * ebml)
481 GstFormat fmt = GST_FORMAT_BYTES;
485 /* FIXME: what to do if we don't get the upstream length */
486 if (!gst_pad_query_peer_duration (ebml->sinkpad, &fmt, &end) ||
487 fmt != GST_FORMAT_BYTES || end < 0)
488 g_return_val_if_reached (0);
494 * Seek to a given offset.
498 gst_ebml_read_seek (GstEbmlRead * ebml, guint64 offset)
500 if (offset >= gst_ebml_read_get_length (ebml))
501 return GST_FLOW_UNEXPECTED;
503 ebml->offset = offset;
509 * Skip the next element.
513 gst_ebml_read_skip (GstEbmlRead * ebml)
521 ret = gst_ebml_read_element_id (ebml, &id, NULL);
522 if (ret != GST_FLOW_OK)
525 ret = gst_ebml_read_element_length (ebml, &length, NULL);
526 if (ret != GST_FLOW_OK)
529 ebml->offset += length;
534 * Read the next element as a GstBuffer (binary).
538 gst_ebml_read_buffer (GstEbmlRead * ebml, guint32 * id, GstBuffer ** buf)
544 ret = gst_ebml_read_element_id (ebml, id, NULL);
545 if (ret != GST_FLOW_OK)
548 ret = gst_ebml_read_element_length (ebml, &length, NULL);
549 if (ret != GST_FLOW_OK)
553 *buf = gst_buffer_new ();
558 ret = gst_ebml_read_pull_bytes (ebml, (guint) length, buf, NULL);
564 * Read the next element, return a pointer to it and its size.
568 gst_ebml_read_bytes (GstEbmlRead * ebml, guint32 * id, guint8 ** data,
577 ret = gst_ebml_read_element_id (ebml, id, NULL);
578 if (ret != GST_FLOW_OK)
581 ret = gst_ebml_read_element_length (ebml, &length, NULL);
582 if (ret != GST_FLOW_OK)
591 ret = gst_ebml_read_pull_bytes (ebml, (guint) length, NULL, data);
592 if (ret != GST_FLOW_OK)
595 *size = (guint) length;
601 * Read the next element as an unsigned int.
605 gst_ebml_read_uint (GstEbmlRead * ebml, guint32 * id, guint64 * num)
613 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
614 if (ret != GST_FLOW_OK)
617 if (size < 1 || size > 8) {
618 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
619 ("Invalid integer element size %d at position %" G_GUINT64_FORMAT
620 " (0x%" G_GINT64_MODIFIER "x)",
621 size, ebml->offset - size, ebml->offset - size));
622 return GST_FLOW_ERROR;
626 *num = (*num << 8) | *data;
635 * Read the next element as a signed int.
639 gst_ebml_read_sint (GstEbmlRead * ebml, guint32 * id, gint64 * num)
645 gboolean negative = 0;
649 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
650 if (ret != GST_FLOW_OK)
653 if (size < 1 || size > 8) {
654 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
655 ("Invalid integer element size %d at position %" G_GUINT64_FORMAT
656 " (0x%" G_GINT64_MODIFIER "x)", size, ebml->offset - size,
657 ebml->offset - size));
658 return GST_FLOW_ERROR;
664 *num = *data & ~0x80;
670 *num = (*num << 8) | *data;
683 /* Convert 80 bit extended precision float in big endian format to double.
684 * Code taken from libavutil/intfloat_readwrite.c from ffmpeg,
685 * licensed under LGPL */
694 _ext2dbl (guint8 * data)
696 struct _ext_float ext;
702 memcpy (&ext.exponent, data, 2);
703 memcpy (&ext.mantissa, data + 2, 8);
705 for (i = 0; i < 8; i++)
706 m = (m << 8) + ext.mantissa[i];
707 e = (((gint) ext.exponent[0] & 0x7f) << 8) | ext.exponent[1];
708 if (e == 0x7fff && m)
710 e -= 16383 + 63; /* In IEEE 80 bits, the whole (i.e. 1.xxxx)
711 * mantissa bit is written as opposed to the
712 * single and double precision formats */
713 if (ext.exponent[0] & 0x80)
719 * Read the next element as a float.
723 gst_ebml_read_float (GstEbmlRead * ebml, guint32 * id, gdouble * num)
731 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
732 if (ret != GST_FLOW_OK)
735 if (size != 4 && size != 8 && size != 10) {
736 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
737 ("Invalid float element size %d at position %" G_GUINT64_FORMAT
738 " (0x%" G_GINT64_MODIFIER "x)", size, ebml->offset - size,
739 ebml->offset - size));
740 return GST_FLOW_ERROR;
746 memcpy (&f, data, 4);
747 f = GFLOAT_FROM_BE (f);
750 } else if (size == 8) {
753 memcpy (&d, data, 8);
754 d = GDOUBLE_FROM_BE (d);
758 *num = _ext2dbl (data);
765 * Read the next element as an ASCII string.
769 gst_ebml_read_ascii (GstEbmlRead * ebml, guint32 * id, gchar ** str)
777 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
778 if (ret != GST_FLOW_OK)
781 *str = g_malloc (size + 1);
782 memcpy (*str, data, size);
789 * Read the next element as a UTF-8 string.
793 gst_ebml_read_utf8 (GstEbmlRead * ebml, guint32 * id, gchar ** str)
797 #ifndef GST_DISABLE_GST_DEBUG
798 guint64 oldoff = ebml->offset;
801 ret = gst_ebml_read_ascii (ebml, id, str);
802 if (ret != GST_FLOW_OK)
805 if (str != NULL && *str != NULL && **str != '\0' &&
806 !g_utf8_validate (*str, -1, NULL)) {
807 GST_WARNING ("Invalid UTF-8 string at offset %" G_GUINT64_FORMAT, oldoff);
814 * Read the next element as a date.
815 * Returns the seconds since the unix epoch.
819 gst_ebml_read_date (GstEbmlRead * ebml, guint32 * id, gint64 * date)
825 ret = gst_ebml_read_sint (ebml, id, &ebml_date);
826 if (ret != GST_FLOW_OK)
829 *date = (ebml_date / GST_SECOND) + GST_EBML_DATE_OFFSET;
835 * Read the next element, but only the header. The contents
836 * are supposed to be sub-elements which can be read separately.
840 gst_ebml_read_master (GstEbmlRead * ebml, guint32 * id)
848 ret = gst_ebml_read_element_id (ebml, id, NULL);
849 if (ret != GST_FLOW_OK)
852 ret = gst_ebml_read_element_length (ebml, &length, NULL);
853 if (ret != GST_FLOW_OK)
857 level = g_slice_new (GstEbmlLevel);
858 level->start = ebml->offset;
859 level->length = length;
860 ebml->level = g_list_prepend (ebml->level, level);
866 * Read the next element as binary data.
870 gst_ebml_read_binary (GstEbmlRead * ebml,
871 guint32 * id, guint8 ** binary, guint64 * length)
879 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
880 if (ret != GST_FLOW_OK)
884 *binary = g_memdup (data, size);
890 * Read an EBML header.
894 gst_ebml_read_header (GstEbmlRead * ebml, gchar ** doctype, guint * version)
896 /* this function is the first to be called */
909 ret = gst_ebml_peek_id (ebml, &level_up, &id);
910 if (ret != GST_FLOW_OK)
913 GST_DEBUG_OBJECT (ebml, "id: %08x", GST_READ_UINT32_BE (&id));
915 if (level_up != 0 || id != GST_EBML_ID_HEADER) {
916 GST_ELEMENT_ERROR (ebml, STREAM, WRONG_TYPE, (NULL), (NULL));
917 return GST_FLOW_ERROR;
919 ret = gst_ebml_read_master (ebml, &id);
920 if (ret != GST_FLOW_OK)
924 ret = gst_ebml_peek_id (ebml, &level_up, &id);
925 if (ret != GST_FLOW_OK)
933 /* is our read version uptodate? */
934 case GST_EBML_ID_EBMLREADVERSION:{
937 ret = gst_ebml_read_uint (ebml, &id, &num);
938 if (ret != GST_FLOW_OK)
940 g_assert (id == GST_EBML_ID_EBMLREADVERSION);
941 if (num != GST_EBML_VERSION) {
942 GST_ELEMENT_ERROR (ebml, STREAM, WRONG_TYPE, (NULL), (NULL));
943 return GST_FLOW_ERROR;
948 /* we only handle 8 byte lengths at max */
949 case GST_EBML_ID_EBMLMAXSIZELENGTH:{
952 ret = gst_ebml_read_uint (ebml, &id, &num);
953 if (ret != GST_FLOW_OK)
955 g_assert (id == GST_EBML_ID_EBMLMAXSIZELENGTH);
956 if (num > sizeof (guint64)) {
957 GST_ELEMENT_ERROR (ebml, STREAM, WRONG_TYPE, (NULL), (NULL));
958 return GST_FLOW_ERROR;
963 /* we handle 4 byte IDs at max */
964 case GST_EBML_ID_EBMLMAXIDLENGTH:{
967 ret = gst_ebml_read_uint (ebml, &id, &num);
968 if (ret != GST_FLOW_OK)
970 g_assert (id == GST_EBML_ID_EBMLMAXIDLENGTH);
971 if (num > sizeof (guint32)) {
972 GST_ELEMENT_ERROR (ebml, STREAM, WRONG_TYPE, (NULL), (NULL));
973 return GST_FLOW_ERROR;
978 case GST_EBML_ID_DOCTYPE:{
981 ret = gst_ebml_read_ascii (ebml, &id, &text);
982 if (ret != GST_FLOW_OK)
984 g_assert (id == GST_EBML_ID_DOCTYPE);
993 case GST_EBML_ID_DOCTYPEREADVERSION:{
996 ret = gst_ebml_read_uint (ebml, &id, &num);
997 if (ret != GST_FLOW_OK)
999 g_assert (id == GST_EBML_ID_DOCTYPEREADVERSION);
1006 GST_WARNING ("Unknown data type 0x%x in EBML header (ignored)", id);
1009 /* we ignore these two, as they don't tell us anything we care about */
1010 case GST_EBML_ID_EBMLVERSION:
1011 case GST_EBML_ID_DOCTYPEVERSION:
1012 ret = gst_ebml_read_skip (ebml);
1013 if (ret != GST_FLOW_OK)