2 * (c) 2003 Ronald Bultje <rbultje@ronald.bitfreak.net>
4 * ebml-read.c: read EBML data from file/stream
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the
18 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 * Boston, MA 02111-1307, USA.
28 #include "ebml-read.h"
33 /* NAN is supposed to be in math.h, Microsoft defines it in xmath.h */
38 /* If everything goes wrong try 0.0/0.0 which should be NAN */
40 #define NAN (0.0 / 0.0)
43 GST_DEBUG_CATEGORY (ebmlread_debug);
44 #define GST_CAT_DEFAULT ebmlread_debug
46 /* Peeks following element id and element length in datastream provided
47 * by @peek with @ctx as user data.
48 * Returns GST_FLOW_UNEXPECTED if not enough data to read id and length.
49 * Otherwise, @needed provides the prefix length (id + length), and
50 * @length provides element length.
52 * @object and @offset are provided for informative messaging/debug purposes.
55 gst_ebml_peek_id_length (guint32 * _id, guint64 * _length, guint * _needed,
56 GstPeekData peek, gpointer * ctx, GstElement * el, guint64 offset)
60 gint len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
64 g_return_val_if_fail (_id != NULL, GST_FLOW_ERROR);
65 g_return_val_if_fail (_length != NULL, GST_FLOW_ERROR);
66 g_return_val_if_fail (_needed != NULL, GST_FLOW_ERROR);
69 *_id = (guint32) GST_EBML_SIZE_UNKNOWN;
70 *_length = GST_EBML_SIZE_UNKNOWN;
74 buf = peek (ctx, needed);
78 b = GST_READ_UINT8 (buf);
80 while (read <= 4 && !(total & len_mask)) {
84 if (G_UNLIKELY (read > 4))
87 /* need id and at least something for subsequent length */
89 buf = peek (ctx, needed);
94 b = GST_READ_UINT8 (buf + n);
95 total = (total << 8) | b;
98 *_id = (guint32) total;
100 /* read element length */
101 b = GST_READ_UINT8 (buf + n);
105 while (read <= 8 && !(total & len_mask)) {
109 if (G_UNLIKELY (read > 8))
111 if ((total &= (len_mask - 1)) == len_mask - 1)
115 buf = peek (ctx, needed);
117 goto not_enough_data;
119 buf += (needed - read);
122 guint8 b = GST_READ_UINT8 (buf + n);
124 if (G_UNLIKELY (b == 0xff))
126 total = (total << 8) | b;
130 if (G_UNLIKELY (read == num_ffs))
131 *_length = G_MAXUINT64;
144 return GST_FLOW_UNEXPECTED;
148 GST_ERROR_OBJECT (el,
149 "Invalid EBML ID size tag (0x%x) at position %" G_GUINT64_FORMAT " (0x%"
150 G_GINT64_MODIFIER "x)", (guint) b, offset, offset);
151 return GST_FLOW_ERROR;
155 GST_ERROR_OBJECT (el,
156 "Invalid EBML length size tag (0x%x) at position %" G_GUINT64_FORMAT
157 " (0x%" G_GINT64_MODIFIER "x)", (guint) b, offset, offset);
158 return GST_FLOW_ERROR;
162 /* setup for parsing @buf at position @offset on behalf of @el.
163 * Takes ownership of @buf. */
165 gst_ebml_read_init (GstEbmlRead * ebml, GstElement * el, GstBuffer * buf,
170 g_return_if_fail (el);
171 g_return_if_fail (buf);
174 ebml->offset = offset;
176 ebml->readers = g_array_sized_new (FALSE, FALSE, sizeof (GstEbmlMaster), 10);
177 m.offset = ebml->offset;
178 gst_byte_reader_init (&m.br, GST_BUFFER_DATA (buf), GST_BUFFER_SIZE (buf));
179 g_array_append_val (ebml->readers, m);
183 gst_ebml_read_clear (GstEbmlRead * ebml)
186 g_array_free (ebml->readers, TRUE);
187 ebml->readers = NULL;
189 gst_buffer_unref (ebml->buf);
194 static const guint8 *
195 gst_ebml_read_peek (GstByteReader * br, guint peek)
199 if (G_LIKELY (gst_byte_reader_peek_data (br, peek, &data)))
206 gst_ebml_peek_id_full (GstEbmlRead * ebml, guint32 * id, guint64 * length,
210 const guint8 *data = NULL;
212 ret = gst_ebml_peek_id_length (id, length, prefix,
213 (GstPeekData) gst_ebml_read_peek, (gpointer) gst_ebml_read_br (ebml),
214 ebml->el, gst_ebml_read_get_pos (ebml));
215 if (ret != GST_FLOW_OK)
218 GST_LOG_OBJECT (ebml->el, "id 0x%x at offset 0x%" G_GINT64_MODIFIER "x"
219 " of length %" G_GUINT64_FORMAT ", prefix %d", *id,
220 gst_ebml_read_get_pos (ebml), *length, *prefix);
222 #ifndef GST_DISABLE_GST_DEBUG
224 GstByteReader *br = gst_ebml_read_br (ebml);
225 guint size = gst_byte_reader_get_remaining (br);
226 gst_byte_reader_peek_data (br, size, &data);
228 GST_LOG_OBJECT (ebml->el, "current br %p; remaining %d", br, size);
230 GST_MEMDUMP_OBJECT (ebml->el, "element", data, MIN (size, *length));
238 gst_ebml_peek_id (GstEbmlRead * ebml, guint32 * id)
243 return gst_ebml_peek_id_full (ebml, id, &length, &needed);
247 * Read the next element, the contents are supposed to be sub-elements which
248 * can be read separately. A new bytereader is setup for doing so.
251 gst_ebml_read_master (GstEbmlRead * ebml, guint32 * id)
259 ret = gst_ebml_peek_id_full (ebml, id, &length, &prefix);
260 if (ret != GST_FLOW_OK)
263 /* we just at least peeked the id */
264 g_assert (gst_byte_reader_skip (gst_ebml_read_br (ebml), prefix));
266 m.offset = gst_ebml_read_get_pos (ebml);
267 if (!gst_byte_reader_get_data (gst_ebml_read_br (ebml), length, &data))
268 return GST_FLOW_PARSE;
270 GST_LOG_OBJECT (ebml->el, "pushing level %d at offset %" G_GUINT64_FORMAT,
271 ebml->readers->len, m.offset);
272 gst_byte_reader_init (&m.br, data, length);
273 g_array_append_val (ebml->readers, m);
278 /* explicitly pop a bytereader from stack. Usually invoked automagically. */
280 gst_ebml_read_pop_master (GstEbmlRead * ebml)
282 g_return_val_if_fail (ebml->readers, GST_FLOW_ERROR);
284 /* never remove initial bytereader */
285 if (ebml->readers->len > 1) {
286 GST_LOG_OBJECT (ebml->el, "popping level %d", ebml->readers->len - 1);
287 g_array_remove_index (ebml->readers, ebml->readers->len - 1);
294 * Skip the next element.
298 gst_ebml_read_skip (GstEbmlRead * ebml)
305 ret = gst_ebml_peek_id_full (ebml, &id, &length, &prefix);
306 if (ret != GST_FLOW_OK)
309 if (!gst_byte_reader_skip (gst_ebml_read_br (ebml), length + prefix))
310 return GST_FLOW_PARSE;
316 * Read the next element as a GstBuffer (binary).
320 gst_ebml_read_buffer (GstEbmlRead * ebml, guint32 * id, GstBuffer ** buf)
326 ret = gst_ebml_peek_id_full (ebml, id, &length, &prefix);
327 if (ret != GST_FLOW_OK)
330 /* we just at least peeked the id */
331 g_assert (gst_byte_reader_skip (gst_ebml_read_br (ebml), prefix));
333 if (G_LIKELY (length > 0)) {
336 offset = gst_ebml_read_get_pos (ebml) - ebml->offset;
337 if (G_LIKELY (gst_byte_reader_skip (gst_ebml_read_br (ebml), length))) {
338 *buf = gst_buffer_create_sub (ebml->buf, offset, length);
341 return GST_FLOW_PARSE;
344 *buf = gst_buffer_new ();
351 * Read the next element, return a pointer to it and its size.
355 gst_ebml_read_bytes (GstEbmlRead * ebml, guint32 * id, const guint8 ** data,
364 ret = gst_ebml_peek_id_full (ebml, id, &length, &prefix);
365 if (ret != GST_FLOW_OK)
368 /* we just at least peeked the id */
369 g_assert (gst_byte_reader_skip (gst_ebml_read_br (ebml), prefix));
372 if (G_LIKELY (length >= 0)) {
373 if (!gst_byte_reader_get_data (gst_ebml_read_br (ebml), length, data))
374 return GST_FLOW_PARSE;
383 * Read the next element as an unsigned int.
387 gst_ebml_read_uint (GstEbmlRead * ebml, guint32 * id, guint64 * num)
393 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
394 if (ret != GST_FLOW_OK)
397 if (size < 1 || size > 8) {
398 GST_ERROR_OBJECT (ebml->el,
399 "Invalid integer element size %d at position %" G_GUINT64_FORMAT " (0x%"
400 G_GINT64_MODIFIER "x)", size, gst_ebml_read_get_pos (ebml) - size,
401 gst_ebml_read_get_pos (ebml) - size);
402 return GST_FLOW_ERROR;
406 *num = (*num << 8) | *data;
415 * Read the next element as a signed int.
419 gst_ebml_read_sint (GstEbmlRead * ebml, guint32 * id, gint64 * num)
423 gboolean negative = 0;
426 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
427 if (ret != GST_FLOW_OK)
430 if (size < 1 || size > 8) {
431 GST_ERROR_OBJECT (ebml->el,
432 "Invalid integer element size %d at position %" G_GUINT64_FORMAT " (0x%"
433 G_GINT64_MODIFIER "x)", size, gst_ebml_read_get_pos (ebml) - size,
434 gst_ebml_read_get_pos (ebml) - size);
435 return GST_FLOW_ERROR;
441 *num = *data & ~0x80;
447 *num = (*num << 8) | *data;
460 /* Convert 80 bit extended precision float in big endian format to double.
461 * Code taken from libavutil/intfloat_readwrite.c from ffmpeg,
462 * licensed under LGPL */
471 _ext2dbl (const guint8 * data)
473 struct _ext_float ext;
477 memcpy (&ext.exponent, data, 2);
478 memcpy (&ext.mantissa, data + 2, 8);
480 for (i = 0; i < 8; i++)
481 m = (m << 8) + ext.mantissa[i];
482 e = (((gint) ext.exponent[0] & 0x7f) << 8) | ext.exponent[1];
483 if (e == 0x7fff && m)
485 e -= 16383 + 63; /* In IEEE 80 bits, the whole (i.e. 1.xxxx)
486 * mantissa bit is written as opposed to the
487 * single and double precision formats */
488 if (ext.exponent[0] & 0x80)
494 * Read the next element as a float.
498 gst_ebml_read_float (GstEbmlRead * ebml, guint32 * id, gdouble * num)
504 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
505 if (ret != GST_FLOW_OK)
508 if (size != 4 && size != 8 && size != 10) {
509 GST_ERROR_OBJECT (ebml->el,
510 "Invalid float element size %d at position %" G_GUINT64_FORMAT " (0x%"
511 G_GINT64_MODIFIER "x)", size, gst_ebml_read_get_pos (ebml) - size,
512 gst_ebml_read_get_pos (ebml) - size);
513 return GST_FLOW_ERROR;
519 memcpy (&f, data, 4);
520 f = GFLOAT_FROM_BE (f);
523 } else if (size == 8) {
526 memcpy (&d, data, 8);
527 d = GDOUBLE_FROM_BE (d);
531 *num = _ext2dbl (data);
538 * Read the next element as a C string.
542 gst_ebml_read_string (GstEbmlRead * ebml, guint32 * id, gchar ** str)
548 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
549 if (ret != GST_FLOW_OK)
552 *str = g_malloc (size + 1);
553 memcpy (*str, data, size);
560 * Read the next element as an ASCII string.
564 gst_ebml_read_ascii (GstEbmlRead * ebml, guint32 * id, gchar ** str_out)
570 #ifndef GST_DISABLE_GST_DEBUG
571 guint64 oldoff = ebml->offset;
574 ret = gst_ebml_read_string (ebml, id, &str);
575 if (ret != GST_FLOW_OK)
578 for (iter = str; *iter != '\0'; iter++) {
579 if (G_UNLIKELY (*iter & 0x80)) {
580 GST_ERROR_OBJECT (ebml,
581 "Invalid ASCII string at offset %" G_GUINT64_FORMAT, oldoff);
583 return GST_FLOW_ERROR;
592 * Read the next element as a UTF-8 string.
596 gst_ebml_read_utf8 (GstEbmlRead * ebml, guint32 * id, gchar ** str)
600 #ifndef GST_DISABLE_GST_DEBUG
601 guint64 oldoff = gst_ebml_read_get_pos (ebml);
604 ret = gst_ebml_read_string (ebml, id, str);
605 if (ret != GST_FLOW_OK)
608 if (str != NULL && *str != NULL && **str != '\0' &&
609 !g_utf8_validate (*str, -1, NULL)) {
610 GST_WARNING_OBJECT (ebml->el,
611 "Invalid UTF-8 string at offset %" G_GUINT64_FORMAT, oldoff);
618 * Read the next element as a date.
619 * Returns the seconds since the unix epoch.
623 gst_ebml_read_date (GstEbmlRead * ebml, guint32 * id, gint64 * date)
628 ret = gst_ebml_read_sint (ebml, id, &ebml_date);
629 if (ret != GST_FLOW_OK)
632 *date = (ebml_date / GST_SECOND) + GST_EBML_DATE_OFFSET;
638 * Read the next element as binary data.
642 gst_ebml_read_binary (GstEbmlRead * ebml,
643 guint32 * id, guint8 ** binary, guint64 * length)
649 ret = gst_ebml_read_bytes (ebml, id, &data, &size);
650 if (ret != GST_FLOW_OK)
654 *binary = g_memdup (data, size);