2 * (c) 2003 Ronald Bultje <rbultje@ronald.bitfreak.net>
4 * ebml-read.c: read EBML data from file/stream
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
27 #include <gst/floatcast/floatcast.h>
29 #include "ebml-read.h"
34 /* NAN is supposed to be in math.h, Microsoft defines it in xmath.h */
39 /* If everything goes wrong try 0.0/0.0 which should be NAN */
41 #define NAN (0.0 / 0.0)
44 GST_DEBUG_CATEGORY_STATIC (ebmlread_debug);
45 #define GST_CAT_DEFAULT ebmlread_debug
47 static void gst_ebml_read_class_init (GstEbmlReadClass * klass);
49 static void gst_ebml_read_init (GstEbmlRead * ebml);
51 static GstStateChangeReturn gst_ebml_read_change_state (GstElement * element,
52 GstStateChange transition);
54 /* convenience functions */
55 static GstFlowReturn gst_ebml_read_peek_bytes (GstEbmlRead * ebml, guint size,
56 GstBuffer ** p_buf, guint8 ** bytes);
57 static GstFlowReturn gst_ebml_read_pull_bytes (GstEbmlRead * ebml, guint size,
58 GstBuffer ** p_buf, guint8 ** bytes);
61 static GstElementClass *parent_class; /* NULL */
64 gst_ebml_read_get_type (void)
66 static GType gst_ebml_read_type; /* 0 */
68 if (!gst_ebml_read_type) {
69 static const GTypeInfo gst_ebml_read_info = {
70 sizeof (GstEbmlReadClass),
73 (GClassInitFunc) gst_ebml_read_class_init,
78 (GInstanceInitFunc) gst_ebml_read_init,
82 g_type_register_static (GST_TYPE_ELEMENT, "GstEbmlRead",
83 &gst_ebml_read_info, 0);
86 return gst_ebml_read_type;
90 gst_ebml_level_free (GstEbmlLevel * level)
92 g_slice_free (GstEbmlLevel, level);
96 gst_ebml_finalize (GObject * obj)
98 GstEbmlRead *ebml = GST_EBML_READ (obj);
100 g_list_foreach (ebml->level, (GFunc) gst_ebml_level_free, NULL);
101 g_list_free (ebml->level);
103 if (ebml->cached_buffer) {
104 gst_buffer_unref (ebml->cached_buffer);
105 ebml->cached_buffer = NULL;
108 G_OBJECT_CLASS (parent_class)->finalize (obj);
112 gst_ebml_read_class_init (GstEbmlReadClass * klass)
114 GstElementClass *gstelement_class = (GstElementClass *) klass;
115 GObjectClass *gobject_class = (GObjectClass *) klass;
117 parent_class = g_type_class_peek_parent (klass);
119 GST_DEBUG_CATEGORY_INIT (ebmlread_debug, "ebmlread",
120 0, "EBML stream helper class");
122 gobject_class->finalize = gst_ebml_finalize;
124 gstelement_class->change_state =
125 GST_DEBUG_FUNCPTR (gst_ebml_read_change_state);
129 gst_ebml_read_init (GstEbmlRead * ebml)
131 ebml->sinkpad = NULL;
135 static GstStateChangeReturn
136 gst_ebml_read_change_state (GstElement * element, GstStateChange transition)
138 GstStateChangeReturn ret;
139 GstEbmlRead *ebml = GST_EBML_READ (element);
141 switch (transition) {
142 case GST_STATE_CHANGE_READY_TO_PAUSED:
143 if (!ebml->sinkpad) {
144 g_return_val_if_reached (GST_STATE_CHANGE_FAILURE);
151 ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
153 switch (transition) {
154 case GST_STATE_CHANGE_PAUSED_TO_READY:
156 g_list_foreach (ebml->level, (GFunc) gst_ebml_level_free, NULL);
157 g_list_free (ebml->level);
159 if (ebml->cached_buffer) {
160 gst_buffer_unref (ebml->cached_buffer);
161 ebml->cached_buffer = NULL;
174 * Return: the amount of levels in the hierarchy that the
175 * current element lies higher than the previous one.
176 * The opposite isn't done - that's auto-done using master
181 gst_ebml_read_element_level_up (GstEbmlRead * ebml)
184 guint64 pos = ebml->offset;
186 while (ebml->level != NULL) {
187 GstEbmlLevel *level = ebml->level->data;
189 if (pos >= level->start + level->length) {
190 ebml->level = g_list_delete_link (ebml->level, ebml->level);
191 gst_ebml_level_free (level);
202 * Calls pull_range for (offset,size) without advancing our offset
205 gst_ebml_read_peek_bytes (GstEbmlRead * ebml, guint size, GstBuffer ** p_buf,
210 /* Caching here actually makes much less difference than one would expect.
211 * We do it mainly to avoid pulling buffers of 1 byte all the time */
212 if (ebml->cached_buffer) {
213 guint64 cache_offset = GST_BUFFER_OFFSET (ebml->cached_buffer);
214 guint cache_size = GST_BUFFER_SIZE (ebml->cached_buffer);
216 if (cache_offset <= ebml->offset &&
217 (ebml->offset + size) < (cache_offset + cache_size)) {
219 *p_buf = gst_buffer_create_sub (ebml->cached_buffer,
220 ebml->offset - cache_offset, size);
223 GST_BUFFER_DATA (ebml->cached_buffer) + ebml->offset - cache_offset;
226 /* not enough data in the cache, free cache and get a new one */
227 gst_buffer_unref (ebml->cached_buffer);
228 ebml->cached_buffer = NULL;
231 /* refill the cache */
232 ret = gst_pad_pull_range (ebml->sinkpad, ebml->offset, MAX (size, 64 * 1024),
233 &ebml->cached_buffer);
234 if (ret != GST_FLOW_OK) {
235 ebml->cached_buffer = NULL;
239 if (GST_BUFFER_SIZE (ebml->cached_buffer) >= size) {
241 *p_buf = gst_buffer_create_sub (ebml->cached_buffer, 0, size);
243 *bytes = GST_BUFFER_DATA (ebml->cached_buffer);
247 /* Not possible to get enough data, try a last time with
248 * requesting exactly the size we need */
249 gst_buffer_unref (ebml->cached_buffer);
250 ebml->cached_buffer = NULL;
253 gst_pad_pull_range (ebml->sinkpad, ebml->offset, size,
254 &ebml->cached_buffer);
255 if (ret != GST_FLOW_OK) {
256 GST_DEBUG_OBJECT (ebml, "pull_range returned %d", ret);
264 if (GST_BUFFER_SIZE (ebml->cached_buffer) < size) {
265 GST_WARNING_OBJECT (ebml, "Dropping short buffer at offset %"
266 G_GUINT64_FORMAT ": wanted %u bytes, got %u bytes", ebml->offset,
267 size, GST_BUFFER_SIZE (ebml->cached_buffer));
269 gst_buffer_unref (ebml->cached_buffer);
270 ebml->cached_buffer = NULL;
275 return GST_FLOW_UNEXPECTED;
279 *p_buf = gst_buffer_create_sub (ebml->cached_buffer, 0, size);
281 *bytes = GST_BUFFER_DATA (*p_buf);
287 * Calls pull_range for (offset,size) and advances our offset by size
290 gst_ebml_read_pull_bytes (GstEbmlRead * ebml, guint size, GstBuffer ** p_buf,
295 ret = gst_ebml_read_peek_bytes (ebml, size, p_buf, bytes);
296 if (ret != GST_FLOW_OK)
299 ebml->offset += size;
304 * Read: the element content data ID.
305 * Return: FALSE on error.
309 gst_ebml_read_element_id (GstEbmlRead * ebml, guint32 * id, guint * level_up)
312 gint len_mask = 0x80, read = 1, n = 1;
317 ret = gst_ebml_read_peek_bytes (ebml, 1, NULL, &buf);
318 if (ret != GST_FLOW_OK)
321 b = GST_READ_UINT8 (buf);
325 while (read <= 4 && !(total & len_mask)) {
330 GST_ERROR_OBJECT (ebml,
331 "Invalid EBML ID size tag (0x%x) at position %" G_GUINT64_FORMAT " (0x%"
332 G_GINT64_MODIFIER "x)", (guint) b, ebml->offset, ebml->offset);
333 return GST_FLOW_ERROR;
336 ret = gst_ebml_read_peek_bytes (ebml, read, NULL, &buf);
337 if (ret != GST_FLOW_OK)
341 b = GST_READ_UINT8 (buf + n);
342 total = (total << 8) | b;
350 *level_up = gst_ebml_read_element_level_up (ebml);
352 ebml->offset += read;
357 * Read: element content length.
358 * Return: the number of bytes read or -1 on error.
362 gst_ebml_read_element_length (GstEbmlRead * ebml, guint64 * length,
367 gint len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
371 ret = gst_ebml_read_peek_bytes (ebml, 1, NULL, &buf);
372 if (ret != GST_FLOW_OK)
375 b = GST_READ_UINT8 (buf);
379 while (read <= 8 && !(total & len_mask)) {
384 GST_ERROR_OBJECT (ebml,
385 "Invalid EBML length size tag (0x%x) at position %" G_GUINT64_FORMAT
386 " (0x%" G_GINT64_MODIFIER "x)", (guint) b, ebml->offset, ebml->offset);
387 return GST_FLOW_ERROR;
390 if ((total &= (len_mask - 1)) == len_mask - 1)
393 ret = gst_ebml_read_peek_bytes (ebml, read, NULL, &buf);
394 if (ret != GST_FLOW_OK)
398 guint8 b = GST_READ_UINT8 (buf + n);
402 total = (total << 8) | b;
407 *length = G_MAXUINT64;
414 ebml->offset += read;
420 * Return: the ID of the next element.
421 * Level_up contains the amount of levels that this
422 * next element lies higher than the previous one.
426 gst_ebml_peek_id (GstEbmlRead * ebml, guint * level_up, guint32 * id)
429 guint level_up_tmp = 0;
438 off = ebml->offset; /* save offset */
440 if ((ret = gst_ebml_read_element_id (ebml, id, &level_up_tmp)) != GST_FLOW_OK)
443 ebml->offset = off; /* restore offset */
445 *level_up += level_up_tmp;
449 case GST_EBML_ID_VOID:
450 GST_DEBUG_OBJECT (ebml, "Skipping EBML Void element");
451 if ((ret = gst_ebml_read_skip (ebml)) != GST_FLOW_OK)
455 case GST_EBML_ID_CRC32:
456 GST_DEBUG_OBJECT (ebml, "Skipping EBML CRC32 element");
457 if ((ret = gst_ebml_read_skip (ebml)) != GST_FLOW_OK)
467 * Return the length of the stream in bytes
471 gst_ebml_read_get_length (GstEbmlRead * ebml)
473 GstFormat fmt = GST_FORMAT_BYTES;
476 /* FIXME: what to do if we don't get the upstream length */
477 if (!gst_pad_query_peer_duration (ebml->sinkpad, &fmt, &end) ||
478 fmt != GST_FORMAT_BYTES || end < 0)
479 g_return_val_if_reached (0);
485 * Seek to a given offset.
489 gst_ebml_read_seek (GstEbmlRead * ebml, guint64 offset)
491 if (offset >= gst_ebml_read_get_length (ebml))
492 return GST_FLOW_UNEXPECTED;
494 ebml->offset = offset;
500 * Skip the next element.
504 gst_ebml_read_skip (GstEbmlRead * ebml)
510 ret = gst_ebml_read_element_id (ebml, &id, NULL);
511 if (ret != GST_FLOW_OK)
514 ret = gst_ebml_read_element_length (ebml, &length, NULL);
515 if (ret != GST_FLOW_OK)
518 ebml->offset += length;
523 * Read the next element as a GstBuffer (binary).
527 gst_ebml_read_buffer (GstEbmlRead * ebml, guint32 * id, GstBuffer ** buf)
532 ret = gst_ebml_read_element_id (ebml, id, NULL);
533 if (ret != GST_FLOW_OK)
536 ret = gst_ebml_read_element_length (ebml, &length, NULL);
537 if (ret != GST_FLOW_OK)
541 *buf = gst_buffer_new ();
546 ret = gst_ebml_read_pull_bytes (ebml, (guint) length, buf, NULL);
552 * Read the next element, return a pointer to it and its size.
556 gst_ebml_read_bytes (GstEbmlRead * ebml, guint32 * id, guint8 ** data,
564 ret = gst_ebml_read_element_id (ebml, id, NULL);
565 if (ret != GST_FLOW_OK)
568 ret = gst_ebml_read_element_length (ebml, &length, NULL);
569 if (ret != GST_FLOW_OK)
578 ret = gst_ebml_read_pull_bytes (ebml, (guint) length, NULL, data);
579 if (ret != GST_FLOW_OK)
582 *size = (guint) length;
588 * Read the next element as an unsigned int.
592 gst_ebml_read_uint (GstEbmlRead * ebml, guint32 * id, guint64 * num)
598 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
599 if (ret != GST_FLOW_OK)
602 if (size < 1 || size > 8) {
603 GST_ERROR_OBJECT (ebml,
604 "Invalid integer element size %d at position %" G_GUINT64_FORMAT " (0x%"
605 G_GINT64_MODIFIER "x)", size, ebml->offset - size, ebml->offset - size);
606 return GST_FLOW_ERROR;
610 *num = (*num << 8) | *data;
619 * Read the next element as a signed int.
623 gst_ebml_read_sint (GstEbmlRead * ebml, guint32 * id, gint64 * num)
627 gboolean negative = 0;
630 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
631 if (ret != GST_FLOW_OK)
634 if (size < 1 || size > 8) {
635 GST_ERROR_OBJECT (ebml,
636 "Invalid integer element size %d at position %" G_GUINT64_FORMAT " (0x%"
637 G_GINT64_MODIFIER "x)", size, ebml->offset - size, ebml->offset - size);
638 return GST_FLOW_ERROR;
644 *num = *data & ~0x80;
650 *num = (*num << 8) | *data;
663 /* Convert 80 bit extended precision float in big endian format to double.
664 * Code taken from libavutil/intfloat_readwrite.c from ffmpeg,
665 * licensed under LGPL */
674 _ext2dbl (guint8 * data)
676 struct _ext_float ext;
680 memcpy (&ext.exponent, data, 2);
681 memcpy (&ext.mantissa, data + 2, 8);
683 for (i = 0; i < 8; i++)
684 m = (m << 8) + ext.mantissa[i];
685 e = (((gint) ext.exponent[0] & 0x7f) << 8) | ext.exponent[1];
686 if (e == 0x7fff && m)
688 e -= 16383 + 63; /* In IEEE 80 bits, the whole (i.e. 1.xxxx)
689 * mantissa bit is written as opposed to the
690 * single and double precision formats */
691 if (ext.exponent[0] & 0x80)
697 * Read the next element as a float.
701 gst_ebml_read_float (GstEbmlRead * ebml, guint32 * id, gdouble * num)
707 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
708 if (ret != GST_FLOW_OK)
711 if (size != 4 && size != 8 && size != 10) {
712 GST_ERROR_OBJECT (ebml,
713 "Invalid float element size %d at position %" G_GUINT64_FORMAT " (0x%"
714 G_GINT64_MODIFIER "x)", size, ebml->offset - size, ebml->offset - size);
715 return GST_FLOW_ERROR;
721 memcpy (&f, data, 4);
722 f = GFLOAT_FROM_BE (f);
725 } else if (size == 8) {
728 memcpy (&d, data, 8);
729 d = GDOUBLE_FROM_BE (d);
733 *num = _ext2dbl (data);
740 * Read the next element as an ASCII string.
744 gst_ebml_read_ascii (GstEbmlRead * ebml, guint32 * id, gchar ** str)
750 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
751 if (ret != GST_FLOW_OK)
754 *str = g_malloc (size + 1);
755 memcpy (*str, data, size);
762 * Read the next element as a UTF-8 string.
766 gst_ebml_read_utf8 (GstEbmlRead * ebml, guint32 * id, gchar ** str)
770 #ifndef GST_DISABLE_GST_DEBUG
771 guint64 oldoff = ebml->offset;
774 ret = gst_ebml_read_ascii (ebml, id, str);
775 if (ret != GST_FLOW_OK)
778 if (str != NULL && *str != NULL && **str != '\0' &&
779 !g_utf8_validate (*str, -1, NULL)) {
780 GST_WARNING_OBJECT (ebml,
781 "Invalid UTF-8 string at offset %" G_GUINT64_FORMAT, oldoff);
788 * Read the next element as a date.
789 * Returns the seconds since the unix epoch.
793 gst_ebml_read_date (GstEbmlRead * ebml, guint32 * id, gint64 * date)
798 ret = gst_ebml_read_sint (ebml, id, &ebml_date);
799 if (ret != GST_FLOW_OK)
802 *date = (ebml_date / GST_SECOND) + GST_EBML_DATE_OFFSET;
808 * Read the next element, but only the header. The contents
809 * are supposed to be sub-elements which can be read separately.
813 gst_ebml_read_master (GstEbmlRead * ebml, guint32 * id)
819 ret = gst_ebml_read_element_id (ebml, id, NULL);
820 if (ret != GST_FLOW_OK)
823 ret = gst_ebml_read_element_length (ebml, &length, NULL);
824 if (ret != GST_FLOW_OK)
828 level = g_slice_new (GstEbmlLevel);
829 level->start = ebml->offset;
830 level->length = length;
831 ebml->level = g_list_prepend (ebml->level, level);
837 * Read the next element as binary data.
841 gst_ebml_read_binary (GstEbmlRead * ebml,
842 guint32 * id, guint8 ** binary, guint64 * length)
848 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
849 if (ret != GST_FLOW_OK)
853 *binary = g_memdup (data, size);
859 * Read an EBML header.
863 gst_ebml_read_header (GstEbmlRead * ebml, gchar ** doctype, guint * version)
865 /* this function is the first to be called */
876 ret = gst_ebml_peek_id (ebml, &level_up, &id);
877 if (ret != GST_FLOW_OK)
880 GST_DEBUG_OBJECT (ebml, "id: %08x", GST_READ_UINT32_BE (&id));
882 if (level_up != 0 || id != GST_EBML_ID_HEADER) {
883 GST_ERROR_OBJECT (ebml, "Failed to read header");
884 return GST_FLOW_ERROR;
886 ret = gst_ebml_read_master (ebml, &id);
887 if (ret != GST_FLOW_OK)
891 ret = gst_ebml_peek_id (ebml, &level_up, &id);
892 if (ret != GST_FLOW_OK)
900 /* is our read version uptodate? */
901 case GST_EBML_ID_EBMLREADVERSION:{
904 ret = gst_ebml_read_uint (ebml, &id, &num);
905 if (ret != GST_FLOW_OK)
907 g_assert (id == GST_EBML_ID_EBMLREADVERSION);
908 if (num != GST_EBML_VERSION) {
909 GST_ERROR_OBJECT (ebml, "Unsupported EBML version %" G_GUINT64_FORMAT,
911 return GST_FLOW_ERROR;
914 GST_DEBUG_OBJECT (ebml, "EbmlReadVersion: %" G_GUINT64_FORMAT, num);
918 /* we only handle 8 byte lengths at max */
919 case GST_EBML_ID_EBMLMAXSIZELENGTH:{
922 ret = gst_ebml_read_uint (ebml, &id, &num);
923 if (ret != GST_FLOW_OK)
925 g_assert (id == GST_EBML_ID_EBMLMAXSIZELENGTH);
926 if (num > sizeof (guint64)) {
927 GST_ERROR_OBJECT (ebml,
928 "Unsupported EBML maximum size %" G_GUINT64_FORMAT, num);
929 return GST_FLOW_ERROR;
931 GST_DEBUG_OBJECT (ebml, "EbmlMaxSizeLength: %" G_GUINT64_FORMAT, num);
935 /* we handle 4 byte IDs at max */
936 case GST_EBML_ID_EBMLMAXIDLENGTH:{
939 ret = gst_ebml_read_uint (ebml, &id, &num);
940 if (ret != GST_FLOW_OK)
942 g_assert (id == GST_EBML_ID_EBMLMAXIDLENGTH);
943 if (num > sizeof (guint32)) {
944 GST_ERROR_OBJECT (ebml,
945 "Unsupported EBML maximum ID %" G_GUINT64_FORMAT, num);
946 return GST_FLOW_ERROR;
948 GST_DEBUG_OBJECT (ebml, "EbmlMaxIdLength: %" G_GUINT64_FORMAT, num);
952 case GST_EBML_ID_DOCTYPE:{
955 ret = gst_ebml_read_ascii (ebml, &id, &text);
956 if (ret != GST_FLOW_OK)
958 g_assert (id == GST_EBML_ID_DOCTYPE);
960 GST_DEBUG_OBJECT (ebml, "EbmlDocType: %s", GST_STR_NULL (text));
970 case GST_EBML_ID_DOCTYPEREADVERSION:{
973 ret = gst_ebml_read_uint (ebml, &id, &num);
974 if (ret != GST_FLOW_OK)
976 g_assert (id == GST_EBML_ID_DOCTYPEREADVERSION);
979 GST_DEBUG_OBJECT (ebml, "EbmlReadVersion: %" G_GUINT64_FORMAT, num);
984 GST_WARNING_OBJECT (ebml,
985 "Unknown data type 0x%x in EBML header (ignored)", id);
988 /* we ignore these two, as they don't tell us anything we care about */
989 case GST_EBML_ID_EBMLVERSION:
990 case GST_EBML_ID_DOCTYPEVERSION:
991 ret = gst_ebml_read_skip (ebml);
992 if (ret != GST_FLOW_OK)