2 * (c) 2003 Ronald Bultje <rbultje@ronald.bitfreak.net>
4 * ebml-read.c: read EBML data from file/stream
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
28 #include "ebml-read.h"
33 /* NAN is supposed to be in math.h, Microsoft defines it in xmath.h */
38 /* If everything goes wrong try 0.0/0.0 which should be NAN */
40 #define NAN (0.0 / 0.0)
43 GST_DEBUG_CATEGORY_STATIC (ebmlread_debug);
44 #define GST_CAT_DEFAULT ebmlread_debug
46 static void gst_ebml_read_class_init (GstEbmlReadClass * klass);
48 static void gst_ebml_read_init (GstEbmlRead * ebml);
50 static GstStateChangeReturn gst_ebml_read_change_state (GstElement * element,
51 GstStateChange transition);
53 /* convenience functions */
54 static GstFlowReturn gst_ebml_read_peek_bytes (GstEbmlRead * ebml, guint size,
55 GstBuffer ** p_buf, guint8 ** bytes);
56 static GstFlowReturn gst_ebml_read_pull_bytes (GstEbmlRead * ebml, guint size,
57 GstBuffer ** p_buf, guint8 ** bytes);
60 static GstElementClass *parent_class; /* NULL */
63 gst_ebml_read_get_type (void)
65 static GType gst_ebml_read_type; /* 0 */
67 if (!gst_ebml_read_type) {
68 static const GTypeInfo gst_ebml_read_info = {
69 sizeof (GstEbmlReadClass),
72 (GClassInitFunc) gst_ebml_read_class_init,
77 (GInstanceInitFunc) gst_ebml_read_init,
81 g_type_register_static (GST_TYPE_ELEMENT, "GstEbmlRead",
82 &gst_ebml_read_info, 0);
85 return gst_ebml_read_type;
89 gst_ebml_level_free (GstEbmlLevel * level)
91 g_slice_free (GstEbmlLevel, level);
95 gst_ebml_finalize (GObject * obj)
97 GstEbmlRead *ebml = GST_EBML_READ (obj);
99 g_list_foreach (ebml->level, (GFunc) gst_ebml_level_free, NULL);
100 g_list_free (ebml->level);
102 if (ebml->cached_buffer) {
103 gst_buffer_unref (ebml->cached_buffer);
104 ebml->cached_buffer = NULL;
107 G_OBJECT_CLASS (parent_class)->finalize (obj);
111 gst_ebml_read_class_init (GstEbmlReadClass * klass)
113 GstElementClass *gstelement_class = (GstElementClass *) klass;
114 GObjectClass *gobject_class = (GObjectClass *) klass;
116 parent_class = g_type_class_peek_parent (klass);
118 GST_DEBUG_CATEGORY_INIT (ebmlread_debug, "ebmlread",
119 0, "EBML stream helper class");
121 gobject_class->finalize = gst_ebml_finalize;
123 gstelement_class->change_state =
124 GST_DEBUG_FUNCPTR (gst_ebml_read_change_state);
128 gst_ebml_read_init (GstEbmlRead * ebml)
130 ebml->sinkpad = NULL;
134 static GstStateChangeReturn
135 gst_ebml_read_change_state (GstElement * element, GstStateChange transition)
137 GstStateChangeReturn ret;
138 GstEbmlRead *ebml = GST_EBML_READ (element);
140 switch (transition) {
141 case GST_STATE_CHANGE_READY_TO_PAUSED:
142 if (!ebml->sinkpad) {
143 g_return_val_if_reached (GST_STATE_CHANGE_FAILURE);
150 ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
152 switch (transition) {
153 case GST_STATE_CHANGE_PAUSED_TO_READY:
155 g_list_foreach (ebml->level, (GFunc) gst_ebml_level_free, NULL);
156 g_list_free (ebml->level);
158 if (ebml->cached_buffer) {
159 gst_buffer_unref (ebml->cached_buffer);
160 ebml->cached_buffer = NULL;
174 * Provided buffer is used as cache, based on offset 0, and no further reads
179 gst_ebml_read_reset_cache (GstEbmlRead * ebml, GstBuffer * buffer,
182 if (ebml->cached_buffer)
183 gst_buffer_unref (ebml->cached_buffer);
185 ebml->cached_buffer = buffer;
186 ebml->push_cache = TRUE;
187 buffer = gst_buffer_make_metadata_writable (buffer);
188 GST_BUFFER_OFFSET (buffer) = offset;
189 ebml->offset = offset;
190 g_list_foreach (ebml->level, (GFunc) gst_ebml_level_free, NULL);
191 g_list_free (ebml->level);
196 * Return: the amount of levels in the hierarchy that the
197 * current element lies higher than the previous one.
198 * The opposite isn't done - that's auto-done using master
203 gst_ebml_read_element_level_up (GstEbmlRead * ebml)
206 guint64 pos = ebml->offset;
208 while (ebml->level != NULL) {
209 GstEbmlLevel *level = ebml->level->data;
211 if (pos >= level->start + level->length) {
212 ebml->level = g_list_delete_link (ebml->level, ebml->level);
213 gst_ebml_level_free (level);
224 * Calls pull_range for (offset,size) without advancing our offset
227 gst_ebml_read_peek_bytes (GstEbmlRead * ebml, guint size, GstBuffer ** p_buf,
232 /* Caching here actually makes much less difference than one would expect.
233 * We do it mainly to avoid pulling buffers of 1 byte all the time */
234 if (ebml->cached_buffer) {
235 guint64 cache_offset = GST_BUFFER_OFFSET (ebml->cached_buffer);
236 guint cache_size = GST_BUFFER_SIZE (ebml->cached_buffer);
238 if (cache_offset <= ebml->offset &&
239 (ebml->offset + size) <= (cache_offset + cache_size)) {
241 *p_buf = gst_buffer_create_sub (ebml->cached_buffer,
242 ebml->offset - cache_offset, size);
245 GST_BUFFER_DATA (ebml->cached_buffer) + ebml->offset - cache_offset;
248 /* not enough data in the cache, free cache and get a new one */
249 /* never drop pushed cache */
250 if (ebml->push_cache) {
251 if (ebml->offset == cache_offset + cache_size)
254 return GST_FLOW_UNEXPECTED;
256 gst_buffer_unref (ebml->cached_buffer);
257 ebml->cached_buffer = NULL;
260 /* refill the cache */
261 ret = gst_pad_pull_range (ebml->sinkpad, ebml->offset, MAX (size, 64 * 1024),
262 &ebml->cached_buffer);
263 if (ret != GST_FLOW_OK) {
264 ebml->cached_buffer = NULL;
268 if (GST_BUFFER_SIZE (ebml->cached_buffer) >= size) {
270 *p_buf = gst_buffer_create_sub (ebml->cached_buffer, 0, size);
272 *bytes = GST_BUFFER_DATA (ebml->cached_buffer);
276 /* Not possible to get enough data, try a last time with
277 * requesting exactly the size we need */
278 gst_buffer_unref (ebml->cached_buffer);
279 ebml->cached_buffer = NULL;
282 gst_pad_pull_range (ebml->sinkpad, ebml->offset, size,
283 &ebml->cached_buffer);
284 if (ret != GST_FLOW_OK) {
285 GST_DEBUG_OBJECT (ebml, "pull_range returned %d", ret);
293 if (GST_BUFFER_SIZE (ebml->cached_buffer) < size) {
294 GST_WARNING_OBJECT (ebml, "Dropping short buffer at offset %"
295 G_GUINT64_FORMAT ": wanted %u bytes, got %u bytes", ebml->offset,
296 size, GST_BUFFER_SIZE (ebml->cached_buffer));
298 gst_buffer_unref (ebml->cached_buffer);
299 ebml->cached_buffer = NULL;
304 return GST_FLOW_UNEXPECTED;
308 *p_buf = gst_buffer_create_sub (ebml->cached_buffer, 0, size);
310 *bytes = GST_BUFFER_DATA (*p_buf);
316 * Calls pull_range for (offset,size) and advances our offset by size
319 gst_ebml_read_pull_bytes (GstEbmlRead * ebml, guint size, GstBuffer ** p_buf,
324 ret = gst_ebml_read_peek_bytes (ebml, size, p_buf, bytes);
325 if (ret != GST_FLOW_OK)
328 ebml->offset += size;
333 * Read: the element content data ID.
334 * Return: FALSE on error.
338 gst_ebml_read_element_id (GstEbmlRead * ebml, guint32 * id, guint * level_up)
341 gint len_mask = 0x80, read = 1, n = 1;
346 ret = gst_ebml_read_peek_bytes (ebml, 1, NULL, &buf);
347 if (ret != GST_FLOW_OK)
350 b = GST_READ_UINT8 (buf);
354 while (read <= 4 && !(total & len_mask)) {
359 GST_ERROR_OBJECT (ebml,
360 "Invalid EBML ID size tag (0x%x) at position %" G_GUINT64_FORMAT " (0x%"
361 G_GINT64_MODIFIER "x)", (guint) b, ebml->offset, ebml->offset);
362 return GST_FLOW_ERROR;
365 ret = gst_ebml_read_peek_bytes (ebml, read, NULL, &buf);
366 if (ret != GST_FLOW_OK)
370 b = GST_READ_UINT8 (buf + n);
371 total = (total << 8) | b;
379 *level_up = gst_ebml_read_element_level_up (ebml);
381 ebml->offset += read;
386 * Read: element content length.
387 * Return: the number of bytes read or -1 on error.
391 gst_ebml_read_element_length (GstEbmlRead * ebml, guint64 * length,
396 gint len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
400 ret = gst_ebml_read_peek_bytes (ebml, 1, NULL, &buf);
401 if (ret != GST_FLOW_OK)
404 b = GST_READ_UINT8 (buf);
408 while (read <= 8 && !(total & len_mask)) {
413 GST_ERROR_OBJECT (ebml,
414 "Invalid EBML length size tag (0x%x) at position %" G_GUINT64_FORMAT
415 " (0x%" G_GINT64_MODIFIER "x)", (guint) b, ebml->offset, ebml->offset);
416 return GST_FLOW_ERROR;
419 if ((total &= (len_mask - 1)) == len_mask - 1)
422 ret = gst_ebml_read_peek_bytes (ebml, read, NULL, &buf);
423 if (ret != GST_FLOW_OK)
427 guint8 b = GST_READ_UINT8 (buf + n);
431 total = (total << 8) | b;
436 *length = G_MAXUINT64;
443 ebml->offset += read;
449 * Return: the ID of the next element.
450 * Level_up contains the amount of levels that this
451 * next element lies higher than the previous one.
455 gst_ebml_peek_id (GstEbmlRead * ebml, guint * level_up, guint32 * id)
458 guint level_up_tmp = 0;
467 off = ebml->offset; /* save offset */
469 if ((ret = gst_ebml_read_element_id (ebml, id, &level_up_tmp)) != GST_FLOW_OK) {
470 if (ret != GST_FLOW_END)
473 /* simulate dummy VOID element,
474 * and have the call stack bail out all the way */
475 *id = GST_EBML_ID_VOID;
476 *level_up = G_MAXUINT32 >> 2;
481 ebml->offset = off; /* restore offset */
483 *level_up += level_up_tmp;
487 case GST_EBML_ID_VOID:
488 GST_DEBUG_OBJECT (ebml, "Skipping EBML Void element");
489 if ((ret = gst_ebml_read_skip (ebml)) != GST_FLOW_OK)
493 case GST_EBML_ID_CRC32:
494 GST_DEBUG_OBJECT (ebml, "Skipping EBML CRC32 element");
495 if ((ret = gst_ebml_read_skip (ebml)) != GST_FLOW_OK)
505 * Return the length of the stream in bytes
509 gst_ebml_read_get_length (GstEbmlRead * ebml)
511 GstFormat fmt = GST_FORMAT_BYTES;
514 /* FIXME: what to do if we don't get the upstream length */
515 if (!gst_pad_query_peer_duration (ebml->sinkpad, &fmt, &end) ||
516 fmt != GST_FORMAT_BYTES || end < 0)
517 g_return_val_if_reached (0);
523 * Seek to a given offset.
527 gst_ebml_read_seek (GstEbmlRead * ebml, guint64 offset)
529 if (offset >= gst_ebml_read_get_length (ebml))
530 return GST_FLOW_UNEXPECTED;
532 ebml->offset = offset;
538 * Skip the next element.
542 gst_ebml_read_skip (GstEbmlRead * ebml)
548 ret = gst_ebml_read_element_id (ebml, &id, NULL);
549 if (ret != GST_FLOW_OK)
552 ret = gst_ebml_read_element_length (ebml, &length, NULL);
553 if (ret != GST_FLOW_OK)
556 ebml->offset += length;
561 * Read the next element as a GstBuffer (binary).
565 gst_ebml_read_buffer (GstEbmlRead * ebml, guint32 * id, GstBuffer ** buf)
570 ret = gst_ebml_read_element_id (ebml, id, NULL);
571 if (ret != GST_FLOW_OK)
574 ret = gst_ebml_read_element_length (ebml, &length, NULL);
575 if (ret != GST_FLOW_OK)
579 *buf = gst_buffer_new ();
584 ret = gst_ebml_read_pull_bytes (ebml, (guint) length, buf, NULL);
590 * Read the next element, return a pointer to it and its size.
594 gst_ebml_read_bytes (GstEbmlRead * ebml, guint32 * id, guint8 ** data,
602 ret = gst_ebml_read_element_id (ebml, id, NULL);
603 if (ret != GST_FLOW_OK)
606 ret = gst_ebml_read_element_length (ebml, &length, NULL);
607 if (ret != GST_FLOW_OK)
616 ret = gst_ebml_read_pull_bytes (ebml, (guint) length, NULL, data);
617 if (ret != GST_FLOW_OK)
620 *size = (guint) length;
626 * Read the next element as an unsigned int.
630 gst_ebml_read_uint (GstEbmlRead * ebml, guint32 * id, guint64 * num)
636 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
637 if (ret != GST_FLOW_OK)
640 if (size < 1 || size > 8) {
641 GST_ERROR_OBJECT (ebml,
642 "Invalid integer element size %d at position %" G_GUINT64_FORMAT " (0x%"
643 G_GINT64_MODIFIER "x)", size, ebml->offset - size, ebml->offset - size);
644 return GST_FLOW_ERROR;
648 *num = (*num << 8) | *data;
657 * Read the next element as a signed int.
661 gst_ebml_read_sint (GstEbmlRead * ebml, guint32 * id, gint64 * num)
665 gboolean negative = 0;
668 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
669 if (ret != GST_FLOW_OK)
672 if (size < 1 || size > 8) {
673 GST_ERROR_OBJECT (ebml,
674 "Invalid integer element size %d at position %" G_GUINT64_FORMAT " (0x%"
675 G_GINT64_MODIFIER "x)", size, ebml->offset - size, ebml->offset - size);
676 return GST_FLOW_ERROR;
682 *num = *data & ~0x80;
688 *num = (*num << 8) | *data;
701 /* Convert 80 bit extended precision float in big endian format to double.
702 * Code taken from libavutil/intfloat_readwrite.c from ffmpeg,
703 * licensed under LGPL */
712 _ext2dbl (guint8 * data)
714 struct _ext_float ext;
718 memcpy (&ext.exponent, data, 2);
719 memcpy (&ext.mantissa, data + 2, 8);
721 for (i = 0; i < 8; i++)
722 m = (m << 8) + ext.mantissa[i];
723 e = (((gint) ext.exponent[0] & 0x7f) << 8) | ext.exponent[1];
724 if (e == 0x7fff && m)
726 e -= 16383 + 63; /* In IEEE 80 bits, the whole (i.e. 1.xxxx)
727 * mantissa bit is written as opposed to the
728 * single and double precision formats */
729 if (ext.exponent[0] & 0x80)
735 * Read the next element as a float.
739 gst_ebml_read_float (GstEbmlRead * ebml, guint32 * id, gdouble * num)
745 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
746 if (ret != GST_FLOW_OK)
749 if (size != 4 && size != 8 && size != 10) {
750 GST_ERROR_OBJECT (ebml,
751 "Invalid float element size %d at position %" G_GUINT64_FORMAT " (0x%"
752 G_GINT64_MODIFIER "x)", size, ebml->offset - size, ebml->offset - size);
753 return GST_FLOW_ERROR;
759 memcpy (&f, data, 4);
760 f = GFLOAT_FROM_BE (f);
763 } else if (size == 8) {
766 memcpy (&d, data, 8);
767 d = GDOUBLE_FROM_BE (d);
771 *num = _ext2dbl (data);
778 * Read the next element as a C string.
782 gst_ebml_read_string (GstEbmlRead * ebml, guint32 * id, gchar ** str)
788 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
789 if (ret != GST_FLOW_OK)
792 *str = g_malloc (size + 1);
793 memcpy (*str, data, size);
800 * Read the next element as an ASCII string.
804 gst_ebml_read_ascii (GstEbmlRead * ebml, guint32 * id, gchar ** str_out)
810 #ifndef GST_DISABLE_GST_DEBUG
811 guint64 oldoff = ebml->offset;
814 ret = gst_ebml_read_string (ebml, id, &str);
815 if (ret != GST_FLOW_OK)
818 for (iter = str; *iter != '\0'; iter++) {
819 if (G_UNLIKELY (*iter & 0x80)) {
820 GST_ERROR_OBJECT (ebml,
821 "Invalid ASCII string at offset %" G_GUINT64_FORMAT, oldoff);
823 return GST_FLOW_ERROR;
832 * Read the next element as a UTF-8 string.
836 gst_ebml_read_utf8 (GstEbmlRead * ebml, guint32 * id, gchar ** str)
840 #ifndef GST_DISABLE_GST_DEBUG
841 guint64 oldoff = ebml->offset;
844 ret = gst_ebml_read_string (ebml, id, str);
845 if (ret != GST_FLOW_OK)
848 if (str != NULL && *str != NULL && **str != '\0' &&
849 !g_utf8_validate (*str, -1, NULL)) {
850 GST_WARNING_OBJECT (ebml,
851 "Invalid UTF-8 string at offset %" G_GUINT64_FORMAT, oldoff);
858 * Read the next element as a date.
859 * Returns the seconds since the unix epoch.
863 gst_ebml_read_date (GstEbmlRead * ebml, guint32 * id, gint64 * date)
868 ret = gst_ebml_read_sint (ebml, id, &ebml_date);
869 if (ret != GST_FLOW_OK)
872 *date = (ebml_date / GST_SECOND) + GST_EBML_DATE_OFFSET;
878 * Read the next element, but only the header. The contents
879 * are supposed to be sub-elements which can be read separately.
883 gst_ebml_read_master (GstEbmlRead * ebml, guint32 * id)
889 ret = gst_ebml_read_element_id (ebml, id, NULL);
890 if (ret != GST_FLOW_OK)
893 ret = gst_ebml_read_element_length (ebml, &length, NULL);
894 if (ret != GST_FLOW_OK)
898 level = g_slice_new (GstEbmlLevel);
899 level->start = ebml->offset;
900 level->length = length;
901 ebml->level = g_list_prepend (ebml->level, level);
907 * Read the next element as binary data.
911 gst_ebml_read_binary (GstEbmlRead * ebml,
912 guint32 * id, guint8 ** binary, guint64 * length)
918 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
919 if (ret != GST_FLOW_OK)
923 *binary = g_memdup (data, size);
929 * Read an EBML header.
933 gst_ebml_read_header (GstEbmlRead * ebml, gchar ** doctype, guint * version)
935 /* this function is the first to be called */
946 ret = gst_ebml_peek_id (ebml, &level_up, &id);
947 if (ret != GST_FLOW_OK)
950 GST_DEBUG_OBJECT (ebml, "id: %08x", GST_READ_UINT32_BE (&id));
952 if (level_up != 0 || id != GST_EBML_ID_HEADER) {
953 GST_ERROR_OBJECT (ebml, "Failed to read header");
954 return GST_FLOW_ERROR;
956 ret = gst_ebml_read_master (ebml, &id);
957 if (ret != GST_FLOW_OK)
961 ret = gst_ebml_peek_id (ebml, &level_up, &id);
962 if (ret != GST_FLOW_OK)
970 /* is our read version uptodate? */
971 case GST_EBML_ID_EBMLREADVERSION:{
974 ret = gst_ebml_read_uint (ebml, &id, &num);
975 if (ret != GST_FLOW_OK)
977 g_assert (id == GST_EBML_ID_EBMLREADVERSION);
978 if (num != GST_EBML_VERSION) {
979 GST_ERROR_OBJECT (ebml, "Unsupported EBML version %" G_GUINT64_FORMAT,
981 return GST_FLOW_ERROR;
984 GST_DEBUG_OBJECT (ebml, "EbmlReadVersion: %" G_GUINT64_FORMAT, num);
988 /* we only handle 8 byte lengths at max */
989 case GST_EBML_ID_EBMLMAXSIZELENGTH:{
992 ret = gst_ebml_read_uint (ebml, &id, &num);
993 if (ret != GST_FLOW_OK)
995 g_assert (id == GST_EBML_ID_EBMLMAXSIZELENGTH);
996 if (num > sizeof (guint64)) {
997 GST_ERROR_OBJECT (ebml,
998 "Unsupported EBML maximum size %" G_GUINT64_FORMAT, num);
999 return GST_FLOW_ERROR;
1001 GST_DEBUG_OBJECT (ebml, "EbmlMaxSizeLength: %" G_GUINT64_FORMAT, num);
1005 /* we handle 4 byte IDs at max */
1006 case GST_EBML_ID_EBMLMAXIDLENGTH:{
1009 ret = gst_ebml_read_uint (ebml, &id, &num);
1010 if (ret != GST_FLOW_OK)
1012 g_assert (id == GST_EBML_ID_EBMLMAXIDLENGTH);
1013 if (num > sizeof (guint32)) {
1014 GST_ERROR_OBJECT (ebml,
1015 "Unsupported EBML maximum ID %" G_GUINT64_FORMAT, num);
1016 return GST_FLOW_ERROR;
1018 GST_DEBUG_OBJECT (ebml, "EbmlMaxIdLength: %" G_GUINT64_FORMAT, num);
1022 case GST_EBML_ID_DOCTYPE:{
1025 ret = gst_ebml_read_ascii (ebml, &id, &text);
1026 if (ret != GST_FLOW_OK)
1028 g_assert (id == GST_EBML_ID_DOCTYPE);
1030 GST_DEBUG_OBJECT (ebml, "EbmlDocType: %s", GST_STR_NULL (text));
1040 case GST_EBML_ID_DOCTYPEREADVERSION:{
1043 ret = gst_ebml_read_uint (ebml, &id, &num);
1044 if (ret != GST_FLOW_OK)
1046 g_assert (id == GST_EBML_ID_DOCTYPEREADVERSION);
1049 GST_DEBUG_OBJECT (ebml, "EbmlReadVersion: %" G_GUINT64_FORMAT, num);
1054 GST_WARNING_OBJECT (ebml,
1055 "Unknown data type 0x%x in EBML header (ignored)", id);
1058 /* we ignore these two, as they don't tell us anything we care about */
1059 case GST_EBML_ID_EBMLVERSION:
1060 case GST_EBML_ID_DOCTYPEVERSION:
1061 ret = gst_ebml_read_skip (ebml);
1062 if (ret != GST_FLOW_OK)