2 * (c) 2003 Ronald Bultje <rbultje@ronald.bitfreak.net>
4 * ebml-read.c: read EBML data from file/stream
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
27 #include <gst/floatcast/floatcast.h>
29 #include "ebml-read.h"
34 /* NAN is supposed to be in math.h, Microsoft defines it in xmath.h */
39 /* If everything goes wrong try 0.0/0.0 which should be NAN */
41 #define NAN (0.0 / 0.0)
44 GST_DEBUG_CATEGORY_STATIC (ebmlread_debug);
45 #define GST_CAT_DEFAULT ebmlread_debug
47 static void gst_ebml_read_class_init (GstEbmlReadClass * klass);
49 static void gst_ebml_read_init (GstEbmlRead * ebml);
51 static GstStateChangeReturn gst_ebml_read_change_state (GstElement * element,
52 GstStateChange transition);
54 /* convenience functions */
55 static GstFlowReturn gst_ebml_read_peek_bytes (GstEbmlRead * ebml, guint size,
56 GstBuffer ** p_buf, guint8 ** bytes);
57 static GstFlowReturn gst_ebml_read_pull_bytes (GstEbmlRead * ebml, guint size,
58 GstBuffer ** p_buf, guint8 ** bytes);
61 static GstElementClass *parent_class; /* NULL */
64 gst_ebml_read_get_type (void)
66 static GType gst_ebml_read_type; /* 0 */
68 if (!gst_ebml_read_type) {
69 static const GTypeInfo gst_ebml_read_info = {
70 sizeof (GstEbmlReadClass),
73 (GClassInitFunc) gst_ebml_read_class_init,
78 (GInstanceInitFunc) gst_ebml_read_init,
82 g_type_register_static (GST_TYPE_ELEMENT, "GstEbmlRead",
83 &gst_ebml_read_info, 0);
86 return gst_ebml_read_type;
90 gst_ebml_level_free (GstEbmlLevel * level)
92 g_slice_free (GstEbmlLevel, level);
96 gst_ebml_finalize (GObject * obj)
98 GstEbmlRead *ebml = GST_EBML_READ (obj);
100 g_list_foreach (ebml->level, (GFunc) gst_ebml_level_free, NULL);
101 g_list_free (ebml->level);
103 if (ebml->cached_buffer) {
104 gst_buffer_unref (ebml->cached_buffer);
105 ebml->cached_buffer = NULL;
108 G_OBJECT_CLASS (parent_class)->finalize (obj);
112 gst_ebml_read_class_init (GstEbmlReadClass * klass)
114 GstElementClass *gstelement_class = (GstElementClass *) klass;
115 GObjectClass *gobject_class = (GObjectClass *) klass;
117 parent_class = g_type_class_peek_parent (klass);
119 GST_DEBUG_CATEGORY_INIT (ebmlread_debug, "ebmlread",
120 0, "EBML stream helper class");
122 gobject_class->finalize = gst_ebml_finalize;
124 gstelement_class->change_state =
125 GST_DEBUG_FUNCPTR (gst_ebml_read_change_state);
129 gst_ebml_read_init (GstEbmlRead * ebml)
131 ebml->sinkpad = NULL;
135 static GstStateChangeReturn
136 gst_ebml_read_change_state (GstElement * element, GstStateChange transition)
138 GstStateChangeReturn ret;
139 GstEbmlRead *ebml = GST_EBML_READ (element);
141 switch (transition) {
142 case GST_STATE_CHANGE_READY_TO_PAUSED:
143 if (!ebml->sinkpad) {
144 g_return_val_if_reached (GST_STATE_CHANGE_FAILURE);
151 ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
153 switch (transition) {
154 case GST_STATE_CHANGE_PAUSED_TO_READY:
156 g_list_foreach (ebml->level, (GFunc) gst_ebml_level_free, NULL);
157 g_list_free (ebml->level);
159 if (ebml->cached_buffer) {
160 gst_buffer_unref (ebml->cached_buffer);
161 ebml->cached_buffer = NULL;
175 * Provided buffer is used as cache, based on offset 0, and no further reads
180 gst_ebml_read_reset_cache (GstEbmlRead * ebml, GstBuffer * buffer,
183 if (ebml->cached_buffer)
184 gst_buffer_unref (ebml->cached_buffer);
186 ebml->cached_buffer = buffer;
187 ebml->push_cache = TRUE;
188 buffer = gst_buffer_make_metadata_writable (buffer);
189 GST_BUFFER_OFFSET (buffer) = offset;
190 ebml->offset = offset;
191 g_list_foreach (ebml->level, (GFunc) gst_ebml_level_free, NULL);
192 g_list_free (ebml->level);
197 * Return: the amount of levels in the hierarchy that the
198 * current element lies higher than the previous one.
199 * The opposite isn't done - that's auto-done using master
204 gst_ebml_read_element_level_up (GstEbmlRead * ebml)
207 guint64 pos = ebml->offset;
209 while (ebml->level != NULL) {
210 GstEbmlLevel *level = ebml->level->data;
212 if (pos >= level->start + level->length) {
213 ebml->level = g_list_delete_link (ebml->level, ebml->level);
214 gst_ebml_level_free (level);
225 * Calls pull_range for (offset,size) without advancing our offset
228 gst_ebml_read_peek_bytes (GstEbmlRead * ebml, guint size, GstBuffer ** p_buf,
233 /* Caching here actually makes much less difference than one would expect.
234 * We do it mainly to avoid pulling buffers of 1 byte all the time */
235 if (ebml->cached_buffer) {
236 guint64 cache_offset = GST_BUFFER_OFFSET (ebml->cached_buffer);
237 guint cache_size = GST_BUFFER_SIZE (ebml->cached_buffer);
239 if (cache_offset <= ebml->offset &&
240 (ebml->offset + size) <= (cache_offset + cache_size)) {
242 *p_buf = gst_buffer_create_sub (ebml->cached_buffer,
243 ebml->offset - cache_offset, size);
246 GST_BUFFER_DATA (ebml->cached_buffer) + ebml->offset - cache_offset;
249 /* not enough data in the cache, free cache and get a new one */
250 /* never drop pushed cache */
251 if (ebml->push_cache) {
252 if (ebml->offset == cache_offset + cache_size)
255 return GST_FLOW_UNEXPECTED;
257 gst_buffer_unref (ebml->cached_buffer);
258 ebml->cached_buffer = NULL;
261 /* refill the cache */
262 ret = gst_pad_pull_range (ebml->sinkpad, ebml->offset, MAX (size, 64 * 1024),
263 &ebml->cached_buffer);
264 if (ret != GST_FLOW_OK) {
265 ebml->cached_buffer = NULL;
269 if (GST_BUFFER_SIZE (ebml->cached_buffer) >= size) {
271 *p_buf = gst_buffer_create_sub (ebml->cached_buffer, 0, size);
273 *bytes = GST_BUFFER_DATA (ebml->cached_buffer);
277 /* Not possible to get enough data, try a last time with
278 * requesting exactly the size we need */
279 gst_buffer_unref (ebml->cached_buffer);
280 ebml->cached_buffer = NULL;
283 gst_pad_pull_range (ebml->sinkpad, ebml->offset, size,
284 &ebml->cached_buffer);
285 if (ret != GST_FLOW_OK) {
286 GST_DEBUG_OBJECT (ebml, "pull_range returned %d", ret);
294 if (GST_BUFFER_SIZE (ebml->cached_buffer) < size) {
295 GST_WARNING_OBJECT (ebml, "Dropping short buffer at offset %"
296 G_GUINT64_FORMAT ": wanted %u bytes, got %u bytes", ebml->offset,
297 size, GST_BUFFER_SIZE (ebml->cached_buffer));
299 gst_buffer_unref (ebml->cached_buffer);
300 ebml->cached_buffer = NULL;
305 return GST_FLOW_UNEXPECTED;
309 *p_buf = gst_buffer_create_sub (ebml->cached_buffer, 0, size);
311 *bytes = GST_BUFFER_DATA (*p_buf);
317 * Calls pull_range for (offset,size) and advances our offset by size
320 gst_ebml_read_pull_bytes (GstEbmlRead * ebml, guint size, GstBuffer ** p_buf,
325 ret = gst_ebml_read_peek_bytes (ebml, size, p_buf, bytes);
326 if (ret != GST_FLOW_OK)
329 ebml->offset += size;
334 * Read: the element content data ID.
335 * Return: FALSE on error.
339 gst_ebml_read_element_id (GstEbmlRead * ebml, guint32 * id, guint * level_up)
342 gint len_mask = 0x80, read = 1, n = 1;
347 ret = gst_ebml_read_peek_bytes (ebml, 1, NULL, &buf);
348 if (ret != GST_FLOW_OK)
351 b = GST_READ_UINT8 (buf);
355 while (read <= 4 && !(total & len_mask)) {
360 GST_ERROR_OBJECT (ebml,
361 "Invalid EBML ID size tag (0x%x) at position %" G_GUINT64_FORMAT " (0x%"
362 G_GINT64_MODIFIER "x)", (guint) b, ebml->offset, ebml->offset);
363 return GST_FLOW_ERROR;
366 ret = gst_ebml_read_peek_bytes (ebml, read, NULL, &buf);
367 if (ret != GST_FLOW_OK)
371 b = GST_READ_UINT8 (buf + n);
372 total = (total << 8) | b;
380 *level_up = gst_ebml_read_element_level_up (ebml);
382 ebml->offset += read;
387 * Read: element content length.
388 * Return: the number of bytes read or -1 on error.
392 gst_ebml_read_element_length (GstEbmlRead * ebml, guint64 * length,
397 gint len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
401 ret = gst_ebml_read_peek_bytes (ebml, 1, NULL, &buf);
402 if (ret != GST_FLOW_OK)
405 b = GST_READ_UINT8 (buf);
409 while (read <= 8 && !(total & len_mask)) {
414 GST_ERROR_OBJECT (ebml,
415 "Invalid EBML length size tag (0x%x) at position %" G_GUINT64_FORMAT
416 " (0x%" G_GINT64_MODIFIER "x)", (guint) b, ebml->offset, ebml->offset);
417 return GST_FLOW_ERROR;
420 if ((total &= (len_mask - 1)) == len_mask - 1)
423 ret = gst_ebml_read_peek_bytes (ebml, read, NULL, &buf);
424 if (ret != GST_FLOW_OK)
428 guint8 b = GST_READ_UINT8 (buf + n);
432 total = (total << 8) | b;
437 *length = G_MAXUINT64;
444 ebml->offset += read;
450 * Return: the ID of the next element.
451 * Level_up contains the amount of levels that this
452 * next element lies higher than the previous one.
456 gst_ebml_peek_id (GstEbmlRead * ebml, guint * level_up, guint32 * id)
459 guint level_up_tmp = 0;
468 off = ebml->offset; /* save offset */
470 if ((ret = gst_ebml_read_element_id (ebml, id, &level_up_tmp)) != GST_FLOW_OK) {
471 if (ret != GST_FLOW_END)
474 /* simulate dummy VOID element,
475 * and have the call stack bail out all the way */
476 *id = GST_EBML_ID_VOID;
477 *level_up = G_MAXUINT32 >> 2;
482 ebml->offset = off; /* restore offset */
484 *level_up += level_up_tmp;
488 case GST_EBML_ID_VOID:
489 GST_DEBUG_OBJECT (ebml, "Skipping EBML Void element");
490 if ((ret = gst_ebml_read_skip (ebml)) != GST_FLOW_OK)
494 case GST_EBML_ID_CRC32:
495 GST_DEBUG_OBJECT (ebml, "Skipping EBML CRC32 element");
496 if ((ret = gst_ebml_read_skip (ebml)) != GST_FLOW_OK)
506 * Return the length of the stream in bytes
510 gst_ebml_read_get_length (GstEbmlRead * ebml)
512 GstFormat fmt = GST_FORMAT_BYTES;
515 /* FIXME: what to do if we don't get the upstream length */
516 if (!gst_pad_query_peer_duration (ebml->sinkpad, &fmt, &end) ||
517 fmt != GST_FORMAT_BYTES || end < 0)
518 g_return_val_if_reached (0);
524 * Seek to a given offset.
528 gst_ebml_read_seek (GstEbmlRead * ebml, guint64 offset)
530 if (offset >= gst_ebml_read_get_length (ebml))
531 return GST_FLOW_UNEXPECTED;
533 ebml->offset = offset;
539 * Skip the next element.
543 gst_ebml_read_skip (GstEbmlRead * ebml)
549 ret = gst_ebml_read_element_id (ebml, &id, NULL);
550 if (ret != GST_FLOW_OK)
553 ret = gst_ebml_read_element_length (ebml, &length, NULL);
554 if (ret != GST_FLOW_OK)
557 ebml->offset += length;
562 * Read the next element as a GstBuffer (binary).
566 gst_ebml_read_buffer (GstEbmlRead * ebml, guint32 * id, GstBuffer ** buf)
571 ret = gst_ebml_read_element_id (ebml, id, NULL);
572 if (ret != GST_FLOW_OK)
575 ret = gst_ebml_read_element_length (ebml, &length, NULL);
576 if (ret != GST_FLOW_OK)
580 *buf = gst_buffer_new ();
585 ret = gst_ebml_read_pull_bytes (ebml, (guint) length, buf, NULL);
591 * Read the next element, return a pointer to it and its size.
595 gst_ebml_read_bytes (GstEbmlRead * ebml, guint32 * id, guint8 ** data,
603 ret = gst_ebml_read_element_id (ebml, id, NULL);
604 if (ret != GST_FLOW_OK)
607 ret = gst_ebml_read_element_length (ebml, &length, NULL);
608 if (ret != GST_FLOW_OK)
617 ret = gst_ebml_read_pull_bytes (ebml, (guint) length, NULL, data);
618 if (ret != GST_FLOW_OK)
621 *size = (guint) length;
627 * Read the next element as an unsigned int.
631 gst_ebml_read_uint (GstEbmlRead * ebml, guint32 * id, guint64 * num)
637 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
638 if (ret != GST_FLOW_OK)
641 if (size < 1 || size > 8) {
642 GST_ERROR_OBJECT (ebml,
643 "Invalid integer element size %d at position %" G_GUINT64_FORMAT " (0x%"
644 G_GINT64_MODIFIER "x)", size, ebml->offset - size, ebml->offset - size);
645 return GST_FLOW_ERROR;
649 *num = (*num << 8) | *data;
658 * Read the next element as a signed int.
662 gst_ebml_read_sint (GstEbmlRead * ebml, guint32 * id, gint64 * num)
666 gboolean negative = 0;
669 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
670 if (ret != GST_FLOW_OK)
673 if (size < 1 || size > 8) {
674 GST_ERROR_OBJECT (ebml,
675 "Invalid integer element size %d at position %" G_GUINT64_FORMAT " (0x%"
676 G_GINT64_MODIFIER "x)", size, ebml->offset - size, ebml->offset - size);
677 return GST_FLOW_ERROR;
683 *num = *data & ~0x80;
689 *num = (*num << 8) | *data;
702 /* Convert 80 bit extended precision float in big endian format to double.
703 * Code taken from libavutil/intfloat_readwrite.c from ffmpeg,
704 * licensed under LGPL */
713 _ext2dbl (guint8 * data)
715 struct _ext_float ext;
719 memcpy (&ext.exponent, data, 2);
720 memcpy (&ext.mantissa, data + 2, 8);
722 for (i = 0; i < 8; i++)
723 m = (m << 8) + ext.mantissa[i];
724 e = (((gint) ext.exponent[0] & 0x7f) << 8) | ext.exponent[1];
725 if (e == 0x7fff && m)
727 e -= 16383 + 63; /* In IEEE 80 bits, the whole (i.e. 1.xxxx)
728 * mantissa bit is written as opposed to the
729 * single and double precision formats */
730 if (ext.exponent[0] & 0x80)
736 * Read the next element as a float.
740 gst_ebml_read_float (GstEbmlRead * ebml, guint32 * id, gdouble * num)
746 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
747 if (ret != GST_FLOW_OK)
750 if (size != 4 && size != 8 && size != 10) {
751 GST_ERROR_OBJECT (ebml,
752 "Invalid float element size %d at position %" G_GUINT64_FORMAT " (0x%"
753 G_GINT64_MODIFIER "x)", size, ebml->offset - size, ebml->offset - size);
754 return GST_FLOW_ERROR;
760 memcpy (&f, data, 4);
761 f = GFLOAT_FROM_BE (f);
764 } else if (size == 8) {
767 memcpy (&d, data, 8);
768 d = GDOUBLE_FROM_BE (d);
772 *num = _ext2dbl (data);
779 * Read the next element as a C string.
783 gst_ebml_read_string (GstEbmlRead * ebml, guint32 * id, gchar ** str)
789 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
790 if (ret != GST_FLOW_OK)
793 *str = g_malloc (size + 1);
794 memcpy (*str, data, size);
801 * Read the next element as an ASCII string.
805 gst_ebml_read_ascii (GstEbmlRead * ebml, guint32 * id, gchar ** str_out)
811 #ifndef GST_DISABLE_GST_DEBUG
812 guint64 oldoff = ebml->offset;
815 ret = gst_ebml_read_string (ebml, id, &str);
816 if (ret != GST_FLOW_OK)
819 for (iter = str; *iter != '\0'; iter++) {
820 if (G_UNLIKELY (*iter & 0x80)) {
821 GST_ERROR_OBJECT (ebml,
822 "Invalid ASCII string at offset %" G_GUINT64_FORMAT, oldoff);
824 return GST_FLOW_ERROR;
833 * Read the next element as a UTF-8 string.
837 gst_ebml_read_utf8 (GstEbmlRead * ebml, guint32 * id, gchar ** str)
841 #ifndef GST_DISABLE_GST_DEBUG
842 guint64 oldoff = ebml->offset;
845 ret = gst_ebml_read_string (ebml, id, str);
846 if (ret != GST_FLOW_OK)
849 if (str != NULL && *str != NULL && **str != '\0' &&
850 !g_utf8_validate (*str, -1, NULL)) {
851 GST_WARNING_OBJECT (ebml,
852 "Invalid UTF-8 string at offset %" G_GUINT64_FORMAT, oldoff);
859 * Read the next element as a date.
860 * Returns the seconds since the unix epoch.
864 gst_ebml_read_date (GstEbmlRead * ebml, guint32 * id, gint64 * date)
869 ret = gst_ebml_read_sint (ebml, id, &ebml_date);
870 if (ret != GST_FLOW_OK)
873 *date = (ebml_date / GST_SECOND) + GST_EBML_DATE_OFFSET;
879 * Read the next element, but only the header. The contents
880 * are supposed to be sub-elements which can be read separately.
884 gst_ebml_read_master (GstEbmlRead * ebml, guint32 * id)
890 ret = gst_ebml_read_element_id (ebml, id, NULL);
891 if (ret != GST_FLOW_OK)
894 ret = gst_ebml_read_element_length (ebml, &length, NULL);
895 if (ret != GST_FLOW_OK)
899 level = g_slice_new (GstEbmlLevel);
900 level->start = ebml->offset;
901 level->length = length;
902 ebml->level = g_list_prepend (ebml->level, level);
908 * Read the next element as binary data.
912 gst_ebml_read_binary (GstEbmlRead * ebml,
913 guint32 * id, guint8 ** binary, guint64 * length)
919 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
920 if (ret != GST_FLOW_OK)
924 *binary = g_memdup (data, size);
930 * Read an EBML header.
934 gst_ebml_read_header (GstEbmlRead * ebml, gchar ** doctype, guint * version)
936 /* this function is the first to be called */
947 ret = gst_ebml_peek_id (ebml, &level_up, &id);
948 if (ret != GST_FLOW_OK)
951 GST_DEBUG_OBJECT (ebml, "id: %08x", GST_READ_UINT32_BE (&id));
953 if (level_up != 0 || id != GST_EBML_ID_HEADER) {
954 GST_ERROR_OBJECT (ebml, "Failed to read header");
955 return GST_FLOW_ERROR;
957 ret = gst_ebml_read_master (ebml, &id);
958 if (ret != GST_FLOW_OK)
962 ret = gst_ebml_peek_id (ebml, &level_up, &id);
963 if (ret != GST_FLOW_OK)
971 /* is our read version uptodate? */
972 case GST_EBML_ID_EBMLREADVERSION:{
975 ret = gst_ebml_read_uint (ebml, &id, &num);
976 if (ret != GST_FLOW_OK)
978 g_assert (id == GST_EBML_ID_EBMLREADVERSION);
979 if (num != GST_EBML_VERSION) {
980 GST_ERROR_OBJECT (ebml, "Unsupported EBML version %" G_GUINT64_FORMAT,
982 return GST_FLOW_ERROR;
985 GST_DEBUG_OBJECT (ebml, "EbmlReadVersion: %" G_GUINT64_FORMAT, num);
989 /* we only handle 8 byte lengths at max */
990 case GST_EBML_ID_EBMLMAXSIZELENGTH:{
993 ret = gst_ebml_read_uint (ebml, &id, &num);
994 if (ret != GST_FLOW_OK)
996 g_assert (id == GST_EBML_ID_EBMLMAXSIZELENGTH);
997 if (num > sizeof (guint64)) {
998 GST_ERROR_OBJECT (ebml,
999 "Unsupported EBML maximum size %" G_GUINT64_FORMAT, num);
1000 return GST_FLOW_ERROR;
1002 GST_DEBUG_OBJECT (ebml, "EbmlMaxSizeLength: %" G_GUINT64_FORMAT, num);
1006 /* we handle 4 byte IDs at max */
1007 case GST_EBML_ID_EBMLMAXIDLENGTH:{
1010 ret = gst_ebml_read_uint (ebml, &id, &num);
1011 if (ret != GST_FLOW_OK)
1013 g_assert (id == GST_EBML_ID_EBMLMAXIDLENGTH);
1014 if (num > sizeof (guint32)) {
1015 GST_ERROR_OBJECT (ebml,
1016 "Unsupported EBML maximum ID %" G_GUINT64_FORMAT, num);
1017 return GST_FLOW_ERROR;
1019 GST_DEBUG_OBJECT (ebml, "EbmlMaxIdLength: %" G_GUINT64_FORMAT, num);
1023 case GST_EBML_ID_DOCTYPE:{
1026 ret = gst_ebml_read_ascii (ebml, &id, &text);
1027 if (ret != GST_FLOW_OK)
1029 g_assert (id == GST_EBML_ID_DOCTYPE);
1031 GST_DEBUG_OBJECT (ebml, "EbmlDocType: %s", GST_STR_NULL (text));
1041 case GST_EBML_ID_DOCTYPEREADVERSION:{
1044 ret = gst_ebml_read_uint (ebml, &id, &num);
1045 if (ret != GST_FLOW_OK)
1047 g_assert (id == GST_EBML_ID_DOCTYPEREADVERSION);
1050 GST_DEBUG_OBJECT (ebml, "EbmlReadVersion: %" G_GUINT64_FORMAT, num);
1055 GST_WARNING_OBJECT (ebml,
1056 "Unknown data type 0x%x in EBML header (ignored)", id);
1059 /* we ignore these two, as they don't tell us anything we care about */
1060 case GST_EBML_ID_EBMLVERSION:
1061 case GST_EBML_ID_DOCTYPEVERSION:
1062 ret = gst_ebml_read_skip (ebml);
1063 if (ret != GST_FLOW_OK)