2 * (c) 2003 Ronald Bultje <rbultje@ronald.bitfreak.net>
4 * ebml-read.c: read EBML data from file/stream
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
28 #include "ebml-read.h"
31 GST_DEBUG_CATEGORY_STATIC (ebmlread_debug);
32 #define GST_CAT_DEFAULT ebmlread_debug
34 static void gst_ebml_read_class_init (GstEbmlReadClass * klass);
35 static void gst_ebml_read_init (GstEbmlRead * ebml);
36 static GstStateChangeReturn gst_ebml_read_change_state (GstElement * element,
37 GstStateChange transition);
39 /* convenience functions */
40 static gboolean gst_ebml_read_peek_bytes (GstEbmlRead * ebml, guint size,
42 static gboolean gst_ebml_read_pull_bytes (GstEbmlRead * ebml, guint size,
46 static GstElementClass *parent_class; /* NULL */
49 gst_ebml_read_get_type (void)
51 static GType gst_ebml_read_type; /* 0 */
53 if (!gst_ebml_read_type) {
54 static const GTypeInfo gst_ebml_read_info = {
55 sizeof (GstEbmlReadClass),
58 (GClassInitFunc) gst_ebml_read_class_init,
63 (GInstanceInitFunc) gst_ebml_read_init,
67 g_type_register_static (GST_TYPE_ELEMENT, "GstEbmlRead",
68 &gst_ebml_read_info, 0);
71 return gst_ebml_read_type;
75 gst_ebml_read_class_init (GstEbmlReadClass * klass)
77 GstElementClass *gstelement_class = (GstElementClass *) klass;
79 parent_class = g_type_class_peek_parent (klass);
81 GST_DEBUG_CATEGORY_INIT (ebmlread_debug, "ebmlread",
82 0, "EBML stream helper class");
84 gstelement_class->change_state =
85 GST_DEBUG_FUNCPTR (gst_ebml_read_change_state);
89 gst_ebml_read_init (GstEbmlRead * ebml)
95 static GstStateChangeReturn
96 gst_ebml_read_change_state (GstElement * element, GstStateChange transition)
98 GstStateChangeReturn ret;
99 GstEbmlRead *ebml = GST_EBML_READ (element);
101 switch (transition) {
102 case GST_STATE_CHANGE_READY_TO_PAUSED:
103 if (!ebml->sinkpad) {
104 g_return_val_if_reached (GST_STATE_CHANGE_FAILURE);
111 ret = GST_ELEMENT_CLASS (parent_class)->change_state (element, transition);
113 switch (transition) {
114 case GST_STATE_CHANGE_PAUSED_TO_READY:
116 g_list_foreach (ebml->level, (GFunc) g_free, NULL);
117 g_list_free (ebml->level);
119 if (ebml->cached_buffer) {
120 gst_buffer_unref (ebml->cached_buffer);
121 ebml->cached_buffer = NULL;
134 * Return: the amount of levels in the hierarchy that the
135 * current element lies higher than the previous one.
136 * The opposite isn't done - that's auto-done using master
141 gst_ebml_read_element_level_up (GstEbmlRead * ebml)
144 guint64 pos = ebml->offset;
146 while (ebml->level != NULL) {
147 GList *last = g_list_last (ebml->level);
148 GstEbmlLevel *level = last->data;
150 if (pos >= level->start + level->length) {
151 ebml->level = g_list_remove (ebml->level, level);
163 * Calls pull_range for (offset,size) without advancing our offset
166 gst_ebml_read_peek_bytes (GstEbmlRead * ebml, guint size, GstBuffer ** p_buf)
170 /* Caching here actually makes much less difference than one would expect.
171 * We do it mainly to avoid pulling buffers of 1 byte all the time */
172 if (ebml->cached_buffer) {
173 guint64 cache_offset = GST_BUFFER_OFFSET (ebml->cached_buffer);
174 guint cache_size = GST_BUFFER_SIZE (ebml->cached_buffer);
176 if (cache_offset <= ebml->offset &&
177 (ebml->offset + size) < (cache_offset + cache_size)) {
178 *p_buf = gst_buffer_create_sub (ebml->cached_buffer,
179 ebml->offset - cache_offset, size);
182 gst_buffer_unref (ebml->cached_buffer);
183 ebml->cached_buffer = NULL;
186 if (gst_pad_pull_range (ebml->sinkpad, ebml->offset, MAX (size, 64 * 1024),
187 &ebml->cached_buffer) == GST_FLOW_OK &&
188 GST_BUFFER_SIZE (ebml->cached_buffer) >= size) {
189 *p_buf = gst_buffer_create_sub (ebml->cached_buffer, 0, size);
193 ret = gst_pad_pull_range (ebml->sinkpad, ebml->offset, size, p_buf);
194 if (ret != GST_FLOW_OK) {
195 GST_DEBUG ("pull_range returned %d", ret);
199 if (GST_BUFFER_SIZE (*p_buf) < size) {
200 GST_WARNING_OBJECT (ebml, "Dropping short buffer at offset %"
201 G_GUINT64_FORMAT ": wanted %u bytes, got %u bytes", ebml->offset,
202 size, GST_BUFFER_SIZE (*p_buf));
203 gst_buffer_unref (*p_buf);
212 * Calls pull_range for (offset,size) and advances our offset by size
215 gst_ebml_read_pull_bytes (GstEbmlRead * ebml, guint size, GstBuffer ** p_buf)
217 if (!gst_ebml_read_peek_bytes (ebml, size, p_buf))
220 ebml->offset += size;
225 * Read: the element content data ID.
226 * Return: FALSE on error.
230 gst_ebml_read_element_id (GstEbmlRead * ebml, guint32 * id, guint * level_up)
233 gint len_mask = 0x80, read = 1, n = 1;
237 if (!gst_ebml_read_peek_bytes (ebml, 1, &buf))
240 b = GST_READ_UINT8 (GST_BUFFER_DATA (buf));
241 gst_buffer_unref (buf);
245 while (read <= 4 && !(total & len_mask)) {
250 guint64 pos = ebml->offset;
252 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
253 ("Invalid EBML ID size tag (0x%x) at position %llu (0x%llx)",
254 (guint) b, pos, pos));
258 if (!gst_ebml_read_peek_bytes (ebml, read, &buf))
262 b = GST_READ_UINT8 (GST_BUFFER_DATA (buf) + n);
263 total = (total << 8) | b;
271 *level_up = gst_ebml_read_element_level_up (ebml);
273 gst_buffer_unref (buf);
275 ebml->offset += read;
280 * Read: element content length.
281 * Return: the number of bytes read or -1 on error.
285 gst_ebml_read_element_length (GstEbmlRead * ebml, guint64 * length)
288 gint len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
292 if (!gst_ebml_read_peek_bytes (ebml, 1, &buf))
295 b = GST_READ_UINT8 (GST_BUFFER_DATA (buf));
296 gst_buffer_unref (buf);
300 while (read <= 8 && !(total & len_mask)) {
305 guint64 pos = ebml->offset;
307 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
308 ("Invalid EBML length size tag (0x%x) at position %llu (0x%llx)",
309 (guint) b, pos, pos));
313 if ((total &= (len_mask - 1)) == len_mask - 1)
316 if (!gst_ebml_read_peek_bytes (ebml, read, &buf))
320 guint8 b = GST_READ_UINT8 (GST_BUFFER_DATA (buf) + n);
324 total = (total << 8) | b;
327 gst_buffer_unref (buf);
330 *length = G_MAXUINT64;
334 ebml->offset += read;
340 * Return: the ID of the next element.
341 * Level_up contains the amount of levels that this
342 * next element lies higher than the previous one.
346 gst_ebml_peek_id (GstEbmlRead * ebml, guint * level_up, guint32 * id)
352 off = ebml->offset; /* save offset */
354 if (!gst_ebml_read_element_id (ebml, id, level_up))
357 ebml->offset = off; /* restore offset */
362 * Return the length of the stream in bytes
366 gst_ebml_read_get_length (GstEbmlRead * ebml)
368 GstFormat fmt = GST_FORMAT_BYTES;
371 if (!gst_pad_query_duration (GST_PAD_PEER (ebml->sinkpad), &fmt, &end))
372 g_return_val_if_reached (0); ///// FIXME /////////
374 if (fmt != GST_FORMAT_BYTES || end < 0)
375 g_return_val_if_reached (0); ///// FIXME /////////
381 * Seek to a given offset.
385 gst_ebml_read_seek (GstEbmlRead * ebml, guint64 offset)
387 if (offset >= gst_ebml_read_get_length (ebml))
390 ebml->offset = offset;
396 * Skip the next element.
400 gst_ebml_read_skip (GstEbmlRead * ebml)
405 if (!gst_ebml_read_element_id (ebml, &id, NULL))
408 if (gst_ebml_read_element_length (ebml, &length) < 0)
411 ebml->offset += length;
416 * Read the next element as a GstBuffer (binary).
420 gst_ebml_read_buffer (GstEbmlRead * ebml, guint32 * id, GstBuffer ** buf)
424 if (!gst_ebml_read_element_id (ebml, id, NULL))
427 if (gst_ebml_read_element_length (ebml, &length) < 0)
431 *buf = gst_buffer_new ();
436 if (!gst_ebml_read_pull_bytes (ebml, (guint) length, buf))
443 * Read the next element as an unsigned int.
447 gst_ebml_read_uint (GstEbmlRead * ebml, guint32 * id, guint64 * num)
453 if (!gst_ebml_read_buffer (ebml, id, &buf))
456 data = GST_BUFFER_DATA (buf);
457 size = GST_BUFFER_SIZE (buf);
458 if (size < 1 || size > 8) {
459 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
460 ("Invalid integer element size %d at position %llu (0x%llu)",
461 size, GST_BUFFER_OFFSET (buf), GST_BUFFER_OFFSET (buf)));
462 gst_buffer_unref (buf);
467 *num = (*num << 8) | data[GST_BUFFER_SIZE (buf) - size];
471 gst_buffer_unref (buf);
477 * Read the next element as a signed int.
481 gst_ebml_read_sint (GstEbmlRead * ebml, guint32 * id, gint64 * num)
485 guint size, negative = 0, n = 0;
487 if (!gst_ebml_read_buffer (ebml, id, &buf))
490 size = GST_BUFFER_SIZE (buf);
491 if (size < 1 || size > 8) {
492 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
493 ("Invalid integer element size %d at position %llu (0x%llx)",
494 size, GST_BUFFER_OFFSET (buf), GST_BUFFER_OFFSET (buf)));
495 gst_buffer_unref (buf);
499 buf = gst_buffer_make_writable (buf);
501 data = GST_BUFFER_DATA (buf);
503 if (data[0] & 0x80) {
510 *num = (*num << 8) | data[n++];
518 gst_buffer_unref (buf);
524 * Read the next element as a float.
528 gst_ebml_read_float (GstEbmlRead * ebml, guint32 * id, gdouble * num)
534 if (!gst_ebml_read_buffer (ebml, id, &buf))
537 data = GST_BUFFER_DATA (buf);
538 size = GST_BUFFER_SIZE (buf);
540 if (size != 4 && size != 8 && size != 10) {
541 GST_ELEMENT_ERROR (ebml, STREAM, DEMUX, (NULL),
542 ("Invalid float element size %d at position %llu (0x%llx)",
543 size, GST_BUFFER_OFFSET (buf), GST_BUFFER_OFFSET (buf)));
544 gst_buffer_unref (buf);
549 GST_ELEMENT_ERROR (ebml, CORE, NOT_IMPLEMENTED, (NULL),
550 ("FIXME! 10-byte floats unimplemented"));
551 gst_buffer_unref (buf);
558 #if (G_BYTE_ORDER == G_BIG_ENDIAN)
559 f = *(gfloat *) data;
562 ((guint8 *) & f)[size - 1] = data[4 - size];
571 #if (G_BYTE_ORDER == G_BIG_ENDIAN)
572 d = *(gdouble *) data;
575 ((guint8 *) & d)[size - 1] = data[8 - size];
583 gst_buffer_unref (buf);
589 * Read the next element as an ASCII string.
593 gst_ebml_read_ascii (GstEbmlRead * ebml, guint32 * id, gchar ** str)
597 if (!gst_ebml_read_buffer (ebml, id, &buf))
600 *str = g_malloc (GST_BUFFER_SIZE (buf) + 1);
601 memcpy (*str, GST_BUFFER_DATA (buf), GST_BUFFER_SIZE (buf));
602 (*str)[GST_BUFFER_SIZE (buf)] = '\0';
604 gst_buffer_unref (buf);
610 * Read the next element as a UTF-8 string.
614 gst_ebml_read_utf8 (GstEbmlRead * ebml, guint32 * id, gchar ** str)
618 #ifndef GST_DISABLE_GST_DEBUG
619 guint64 oldoff = ebml->offset;
622 ret = gst_ebml_read_ascii (ebml, id, str);
624 if (str != NULL && *str != NULL && **str != '\0' &&
625 !g_utf8_validate (*str, -1, NULL)) {
626 GST_WARNING ("Invalid UTF-8 string at offset %" G_GUINT64_FORMAT, oldoff);
633 * Read the next element as a date.
634 * Returns the seconds since the unix epoch.
638 gst_ebml_read_date (GstEbmlRead * ebml, guint32 * id, gint64 * date)
641 gboolean res = gst_ebml_read_sint (ebml, id, &ebml_date);
643 *date = (ebml_date / GST_SECOND) + GST_EBML_DATE_OFFSET;
648 * Read the next element, but only the header. The contents
649 * are supposed to be sub-elements which can be read separately.
653 gst_ebml_read_master (GstEbmlRead * ebml, guint32 * id)
658 if (!gst_ebml_read_element_id (ebml, id, NULL))
661 if (gst_ebml_read_element_length (ebml, &length) < 0)
665 level = g_new (GstEbmlLevel, 1);
666 level->start = ebml->offset;
667 level->length = length;
668 ebml->level = g_list_append (ebml->level, level);
674 * Read the next element as binary data.
678 gst_ebml_read_binary (GstEbmlRead * ebml,
679 guint32 * id, guint8 ** binary, guint64 * length)
683 if (!gst_ebml_read_buffer (ebml, id, &buf))
686 *length = GST_BUFFER_SIZE (buf);
687 *binary = g_memdup (GST_BUFFER_DATA (buf), GST_BUFFER_SIZE (buf));
689 gst_buffer_unref (buf);
695 * Read an EBML header.
699 gst_ebml_read_header (GstEbmlRead * ebml, gchar ** doctype, guint * version)
701 /* this function is the first to be called */
711 if (!gst_ebml_peek_id (ebml, &level_up, &id))
714 GST_DEBUG_OBJECT (ebml, "id: %08x", GST_READ_UINT32_BE (&id));
716 if (level_up != 0 || id != GST_EBML_ID_HEADER) {
717 GST_ELEMENT_ERROR (ebml, STREAM, WRONG_TYPE, (NULL), (NULL));
720 if (!gst_ebml_read_master (ebml, &id))
724 if (!gst_ebml_peek_id (ebml, &level_up, &id))
732 /* is our read version uptodate? */
733 case GST_EBML_ID_EBMLREADVERSION:{
736 if (!gst_ebml_read_uint (ebml, &id, &num))
738 g_assert (id == GST_EBML_ID_EBMLREADVERSION);
739 if (num != GST_EBML_VERSION)
744 /* we only handle 8 byte lengths at max */
745 case GST_EBML_ID_EBMLMAXSIZELENGTH:{
748 if (!gst_ebml_read_uint (ebml, &id, &num))
750 g_assert (id == GST_EBML_ID_EBMLMAXSIZELENGTH);
751 if (num != sizeof (guint64))
756 /* we handle 4 byte IDs at max */
757 case GST_EBML_ID_EBMLMAXIDLENGTH:{
760 if (!gst_ebml_read_uint (ebml, &id, &num))
762 g_assert (id == GST_EBML_ID_EBMLMAXIDLENGTH);
763 if (num != sizeof (guint32))
768 case GST_EBML_ID_DOCTYPE:{
771 if (!gst_ebml_read_ascii (ebml, &id, &text))
773 g_assert (id == GST_EBML_ID_DOCTYPE);
783 case GST_EBML_ID_DOCTYPEREADVERSION:{
786 if (!gst_ebml_read_uint (ebml, &id, &num))
788 g_assert (id == GST_EBML_ID_DOCTYPEREADVERSION);
795 GST_WARNING ("Unknown data type 0x%x in EBML header (ignored)", id);
798 /* we ignore these two, as they don't tell us anything we care about */
799 case GST_EBML_ID_VOID:
800 case GST_EBML_ID_EBMLVERSION:
801 case GST_EBML_ID_DOCTYPEVERSION:
802 if (!gst_ebml_read_skip (ebml))