2 * (c) 2003 Ronald Bultje <rbultje@ronald.bitfreak.net>
4 * ebml-read.c: read EBML data from file/stream
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
28 #include "ebml-read.h"
31 #include <gst/math-compat.h>
33 GST_DEBUG_CATEGORY (ebmlread_debug);
34 #define GST_CAT_DEFAULT ebmlread_debug
36 /* Peeks following element id and element length in datastream provided
37 * by @peek with @ctx as user data.
38 * Returns GST_FLOW_EOS if not enough data to read id and length.
39 * Otherwise, @needed provides the prefix length (id + length), and
40 * @length provides element length.
42 * @object and @offset are provided for informative messaging/debug purposes.
45 gst_ebml_peek_id_length (guint32 * _id, guint64 * _length, guint * _needed,
46 GstPeekData peek, gpointer * ctx, GstElement * el, guint64 offset)
50 gint len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
55 g_return_val_if_fail (_id != NULL, GST_FLOW_ERROR);
56 g_return_val_if_fail (_length != NULL, GST_FLOW_ERROR);
57 g_return_val_if_fail (_needed != NULL, GST_FLOW_ERROR);
60 *_id = (guint32) GST_EBML_SIZE_UNKNOWN;
61 *_length = GST_EBML_SIZE_UNKNOWN;
65 ret = peek (ctx, needed, &buf);
66 if (ret != GST_FLOW_OK)
68 b = GST_READ_UINT8 (buf);
70 while (read <= 4 && !(total & len_mask)) {
74 if (G_UNLIKELY (read > 4))
77 /* need id and at least something for subsequent length */
79 ret = peek (ctx, needed, &buf);
80 if (ret != GST_FLOW_OK)
83 b = GST_READ_UINT8 (buf + n);
84 total = (total << 8) | b;
87 *_id = (guint32) total;
89 /* read element length */
90 b = GST_READ_UINT8 (buf + n);
94 while (read <= 8 && !(total & len_mask)) {
98 if (G_UNLIKELY (read > 8))
100 if ((total &= (len_mask - 1)) == len_mask - 1)
104 ret = peek (ctx, needed, &buf);
105 if (ret != GST_FLOW_OK)
107 buf += (needed - read);
110 guint8 b = GST_READ_UINT8 (buf + n);
112 if (G_UNLIKELY (b == 0xff))
114 total = (total << 8) | b;
118 if (G_UNLIKELY (read == num_ffs))
119 *_length = G_MAXUINT64;
130 if (ret != GST_FLOW_FLUSHING && ret != GST_FLOW_EOS)
131 GST_WARNING_OBJECT (el, "peek failed, ret = %s", gst_flow_get_name (ret));
133 GST_DEBUG_OBJECT (el, "peek failed, ret = %s", gst_flow_get_name (ret));
139 GST_ERROR_OBJECT (el,
140 "Invalid EBML ID size tag (0x%x) at position %" G_GUINT64_FORMAT " (0x%"
141 G_GINT64_MODIFIER "x)", (guint) b, offset, offset);
142 return GST_FLOW_ERROR;
146 GST_ERROR_OBJECT (el,
147 "Invalid EBML length size tag (0x%x) at position %" G_GUINT64_FORMAT
148 " (0x%" G_GINT64_MODIFIER "x)", (guint) b, offset, offset);
149 return GST_FLOW_ERROR;
153 /* setup for parsing @buf at position @offset on behalf of @el.
154 * Takes ownership of @buf. */
156 gst_ebml_read_init (GstEbmlRead * ebml, GstElement * el, GstBuffer * buf,
161 g_return_if_fail (el);
162 g_return_if_fail (buf);
165 ebml->offset = offset;
167 gst_buffer_map (buf, &ebml->map, GST_MAP_READ);
168 ebml->readers = g_array_sized_new (FALSE, FALSE, sizeof (GstEbmlMaster), 10);
169 m.offset = ebml->offset;
170 gst_byte_reader_init (&m.br, ebml->map.data, ebml->map.size);
171 g_array_append_val (ebml->readers, m);
175 gst_ebml_read_clear (GstEbmlRead * ebml)
178 g_array_free (ebml->readers, TRUE);
179 ebml->readers = NULL;
181 gst_buffer_unmap (ebml->buf, &ebml->map);
182 gst_buffer_unref (ebml->buf);
189 gst_ebml_read_peek (GstByteReader * br, guint peek, const guint8 ** data)
191 if (G_LIKELY (gst_byte_reader_peek_data (br, peek, data)))
198 gst_ebml_peek_id_full (GstEbmlRead * ebml, guint32 * id, guint64 * length,
203 ret = gst_ebml_peek_id_length (id, length, prefix,
204 (GstPeekData) gst_ebml_read_peek, (gpointer) gst_ebml_read_br (ebml),
205 ebml->el, gst_ebml_read_get_pos (ebml));
206 if (ret != GST_FLOW_OK)
209 GST_LOG_OBJECT (ebml->el, "id 0x%x at offset 0x%" G_GINT64_MODIFIER "x"
210 " of length %" G_GUINT64_FORMAT ", prefix %d", *id,
211 gst_ebml_read_get_pos (ebml), *length, *prefix);
213 #ifndef GST_DISABLE_GST_DEBUG
214 if (ebmlread_debug->threshold >= GST_LEVEL_LOG) {
215 const guint8 *data = NULL;
216 GstByteReader *br = gst_ebml_read_br (ebml);
217 guint size = gst_byte_reader_get_remaining (br);
219 if (gst_byte_reader_peek_data (br, size, &data)) {
221 GST_LOG_OBJECT (ebml->el, "current br %p; remaining %d", br, size);
223 GST_MEMDUMP_OBJECT (ebml->el, "element", data, MIN (size, *length));
232 gst_ebml_peek_id (GstEbmlRead * ebml, guint32 * id)
237 return gst_ebml_peek_id_full (ebml, id, &length, &needed);
241 * Read the next element, the contents are supposed to be sub-elements which
242 * can be read separately. A new bytereader is setup for doing so.
245 gst_ebml_read_master (GstEbmlRead * ebml, guint32 * id)
249 const guint8 *data = NULL;
253 ret = gst_ebml_peek_id_full (ebml, id, &length, &prefix);
254 if (ret != GST_FLOW_OK)
257 /* we just at least peeked the id */
258 if (!gst_byte_reader_skip (gst_ebml_read_br (ebml), prefix))
259 return GST_FLOW_ERROR; /* FIXME: do proper error handling */
261 m.offset = gst_ebml_read_get_pos (ebml);
262 if (!gst_byte_reader_get_data (gst_ebml_read_br (ebml), length, &data))
263 return GST_FLOW_PARSE;
265 GST_LOG_OBJECT (ebml->el, "pushing level %d at offset %" G_GUINT64_FORMAT,
266 ebml->readers->len, m.offset);
267 gst_byte_reader_init (&m.br, data, length);
268 g_array_append_val (ebml->readers, m);
273 /* explicitly pop a bytereader from stack. Usually invoked automagically. */
275 gst_ebml_read_pop_master (GstEbmlRead * ebml)
277 g_return_val_if_fail (ebml->readers, GST_FLOW_ERROR);
279 /* never remove initial bytereader */
280 if (ebml->readers->len > 1) {
281 GST_LOG_OBJECT (ebml->el, "popping level %d", ebml->readers->len - 1);
282 g_array_remove_index (ebml->readers, ebml->readers->len - 1);
289 * Skip the next element.
293 gst_ebml_read_skip (GstEbmlRead * ebml)
300 ret = gst_ebml_peek_id_full (ebml, &id, &length, &prefix);
301 if (ret != GST_FLOW_OK)
304 if (!gst_byte_reader_skip (gst_ebml_read_br (ebml), length + prefix))
305 return GST_FLOW_PARSE;
311 * Read the next element as a GstBuffer (binary).
315 gst_ebml_read_buffer (GstEbmlRead * ebml, guint32 * id, GstBuffer ** buf)
321 ret = gst_ebml_peek_id_full (ebml, id, &length, &prefix);
322 if (ret != GST_FLOW_OK)
325 /* we just at least peeked the id */
326 if (!gst_byte_reader_skip (gst_ebml_read_br (ebml), prefix))
327 return GST_FLOW_ERROR; /* FIXME: do proper error handling */
329 if (G_LIKELY (length > 0)) {
332 offset = gst_ebml_read_get_pos (ebml) - ebml->offset;
333 if (G_LIKELY (gst_byte_reader_skip (gst_ebml_read_br (ebml), length))) {
334 *buf = gst_buffer_copy_region (ebml->buf, GST_BUFFER_COPY_ALL,
338 return GST_FLOW_PARSE;
341 *buf = gst_buffer_new ();
348 * Read the next element, return a pointer to it and its size.
352 gst_ebml_read_bytes (GstEbmlRead * ebml, guint32 * id, const guint8 ** data,
361 ret = gst_ebml_peek_id_full (ebml, id, &length, &prefix);
362 if (ret != GST_FLOW_OK)
365 /* we just at least peeked the id */
366 if (!gst_byte_reader_skip (gst_ebml_read_br (ebml), prefix))
367 return GST_FLOW_ERROR; /* FIXME: do proper error handling */
370 if (G_LIKELY (length > 0)) {
371 if (!gst_byte_reader_get_data (gst_ebml_read_br (ebml), length, data))
372 return GST_FLOW_PARSE;
381 * Read the next element as an unsigned int.
385 gst_ebml_read_uint (GstEbmlRead * ebml, guint32 * id, guint64 * num)
391 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
392 if (ret != GST_FLOW_OK)
396 GST_ERROR_OBJECT (ebml->el,
397 "Invalid integer element size %d at position %" G_GUINT64_FORMAT " (0x%"
398 G_GINT64_MODIFIER "x)", size, gst_ebml_read_get_pos (ebml) - size,
399 gst_ebml_read_get_pos (ebml) - size);
400 return GST_FLOW_ERROR;
410 *num = (*num << 8) | *data;
419 * Read the next element as a signed int.
423 gst_ebml_read_sint (GstEbmlRead * ebml, guint32 * id, gint64 * num)
427 gboolean negative = 0;
430 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
431 if (ret != GST_FLOW_OK)
435 GST_ERROR_OBJECT (ebml->el,
436 "Invalid integer element size %d at position %" G_GUINT64_FORMAT " (0x%"
437 G_GINT64_MODIFIER "x)", size, gst_ebml_read_get_pos (ebml) - size,
438 gst_ebml_read_get_pos (ebml) - size);
439 return GST_FLOW_ERROR;
450 *num = *data & ~0x80;
456 *num = (*num << 8) | *data;
469 /* Convert 80 bit extended precision float in big endian format to double.
470 * Code taken from libavutil/intfloat_readwrite.c from ffmpeg,
471 * licensed under LGPL */
480 _ext2dbl (const guint8 * data)
482 struct _ext_float ext;
486 memcpy (&ext.exponent, data, 2);
487 memcpy (&ext.mantissa, data + 2, 8);
489 for (i = 0; i < 8; i++)
490 m = (m << 8) + ext.mantissa[i];
491 e = (((gint) ext.exponent[0] & 0x7f) << 8) | ext.exponent[1];
492 if (e == 0x7fff && m)
494 e -= 16383 + 63; /* In IEEE 80 bits, the whole (i.e. 1.xxxx)
495 * mantissa bit is written as opposed to the
496 * single and double precision formats */
497 if (ext.exponent[0] & 0x80)
503 * Read the next element as a float.
507 gst_ebml_read_float (GstEbmlRead * ebml, guint32 * id, gdouble * num)
513 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
514 if (ret != GST_FLOW_OK)
517 if (size != 0 && size != 4 && size != 8 && size != 10) {
518 GST_ERROR_OBJECT (ebml->el,
519 "Invalid float element size %d at position %" G_GUINT64_FORMAT " (0x%"
520 G_GINT64_MODIFIER "x)", size, gst_ebml_read_get_pos (ebml) - size,
521 gst_ebml_read_get_pos (ebml) - size);
522 return GST_FLOW_ERROR;
528 memcpy (&f, data, 4);
529 f = GFLOAT_FROM_BE (f);
532 } else if (size == 8) {
535 memcpy (&d, data, 8);
536 d = GDOUBLE_FROM_BE (d);
539 } else if (size == 10) {
540 *num = _ext2dbl (data);
542 /* size == 0 means a value of 0.0 */
550 * Read the next element as a C string.
554 gst_ebml_read_string (GstEbmlRead * ebml, guint32 * id, gchar ** str)
560 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
561 if (ret != GST_FLOW_OK)
564 *str = g_malloc (size + 1);
565 memcpy (*str, data, size);
572 * Read the next element as an ASCII string.
576 gst_ebml_read_ascii (GstEbmlRead * ebml, guint32 * id, gchar ** str_out)
582 #ifndef GST_DISABLE_GST_DEBUG
583 guint64 oldoff = ebml->offset;
586 ret = gst_ebml_read_string (ebml, id, &str);
587 if (ret != GST_FLOW_OK)
590 for (iter = str; *iter != '\0'; iter++) {
591 if (G_UNLIKELY (*iter & 0x80)) {
592 GST_ERROR_OBJECT (ebml,
593 "Invalid ASCII string at offset %" G_GUINT64_FORMAT, oldoff);
595 return GST_FLOW_ERROR;
604 * Read the next element as a UTF-8 string.
608 gst_ebml_read_utf8 (GstEbmlRead * ebml, guint32 * id, gchar ** str)
612 #ifndef GST_DISABLE_GST_DEBUG
613 guint64 oldoff = gst_ebml_read_get_pos (ebml);
616 ret = gst_ebml_read_string (ebml, id, str);
617 if (ret != GST_FLOW_OK)
620 if (str != NULL && *str != NULL && **str != '\0' &&
621 !g_utf8_validate (*str, -1, NULL)) {
622 GST_WARNING_OBJECT (ebml->el,
623 "Invalid UTF-8 string at offset %" G_GUINT64_FORMAT, oldoff);
630 * Read the next element as a date.
631 * Returns the seconds since the unix epoch.
635 gst_ebml_read_date (GstEbmlRead * ebml, guint32 * id, gint64 * date)
640 ret = gst_ebml_read_sint (ebml, id, &ebml_date);
641 if (ret != GST_FLOW_OK)
644 *date = (ebml_date / GST_SECOND) + GST_EBML_DATE_OFFSET;
650 * Read the next element as binary data.
654 gst_ebml_read_binary (GstEbmlRead * ebml,
655 guint32 * id, guint8 ** binary, guint64 * length)
661 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
662 if (ret != GST_FLOW_OK)
666 *binary = g_memdup (data, size);