1 /* GStreamer MPEG audio parser
2 * Copyright (C) 2006-2007 Jan Schmidt <thaytan@mad.scientist.com>
3 * Copyright (C) 2010 Mark Nauwelaerts <mnauw users sf net>
4 * Copyright (C) 2010 Nokia Corporation. All rights reserved.
5 * Contact: Stefan Kost <stefan.kost@nokia.com>
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
17 * You should have received a copy of the GNU Library General Public
18 * License along with this library; if not, write to the
19 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 * Boston, MA 02111-1307, USA.
23 * SECTION:element-mpegaudioparse
24 * @short_description: MPEG audio parser
25 * @see_also: #GstAmrParse, #GstAACParse
27 * Parses and frames mpeg1 audio streams. Provides seeking.
30 * <title>Example launch line</title>
32 * gst-launch filesrc location=test.mp3 ! mpegaudioparse ! mad ! autoaudiosink
37 /* FIXME: we should make the base class (GstBaseParse) aware of the
38 * XING seek table somehow, so it can use it properly for things like
39 * accurate seeks. Currently it can only do a lookup via the convert function,
40 * but then doesn't know what the result represents exactly. One could either
41 * add a vfunc for index lookup, or just make mpegaudioparse populate the
42 * base class's index via the API provided.
50 #include "gstmpegaudioparse.h"
51 #include <gst/base/gstbytereader.h>
53 GST_DEBUG_CATEGORY_STATIC (mpeg_audio_parse_debug);
54 #define GST_CAT_DEFAULT mpeg_audio_parse_debug
56 #define MPEG_AUDIO_CHANNEL_MODE_UNKNOWN -1
57 #define MPEG_AUDIO_CHANNEL_MODE_STEREO 0
58 #define MPEG_AUDIO_CHANNEL_MODE_JOINT_STEREO 1
59 #define MPEG_AUDIO_CHANNEL_MODE_DUAL_CHANNEL 2
60 #define MPEG_AUDIO_CHANNEL_MODE_MONO 3
62 #define CRC_UNKNOWN -1
63 #define CRC_PROTECTED 0
64 #define CRC_NOT_PROTECTED 1
66 #define XING_FRAMES_FLAG 0x0001
67 #define XING_BYTES_FLAG 0x0002
68 #define XING_TOC_FLAG 0x0004
69 #define XING_VBR_SCALE_FLAG 0x0008
71 #define MIN_FRAME_SIZE 6
73 static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src",
76 GST_STATIC_CAPS ("audio/mpeg, "
77 "mpegversion = (int) 1, "
78 "layer = (int) [ 1, 3 ], "
79 "mpegaudioversion = (int) [ 1, 3], "
80 "rate = (int) [ 8000, 48000 ], "
81 "channels = (int) [ 1, 2 ], " "parsed=(boolean) true")
84 static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
87 GST_STATIC_CAPS ("audio/mpeg, mpegversion = (int) 1")
90 static void gst_mpeg_audio_parse_finalize (GObject * object);
92 static gboolean gst_mpeg_audio_parse_start (GstBaseParse * parse);
93 static gboolean gst_mpeg_audio_parse_stop (GstBaseParse * parse);
94 static GstFlowReturn gst_mpeg_audio_parse_handle_frame (GstBaseParse * parse,
95 GstBaseParseFrame * frame, gint * skipsize);
96 static GstFlowReturn gst_mpeg_audio_parse_pre_push_frame (GstBaseParse * parse,
97 GstBaseParseFrame * frame);
98 static gboolean gst_mpeg_audio_parse_convert (GstBaseParse * parse,
99 GstFormat src_format, gint64 src_value,
100 GstFormat dest_format, gint64 * dest_value);
101 static GstCaps *gst_mpeg_audio_parse_get_sink_caps (GstBaseParse * parse,
104 static void gst_mpeg_audio_parse_handle_first_frame (GstMpegAudioParse *
105 mp3parse, GstBuffer * buf);
107 #define gst_mpeg_audio_parse_parent_class parent_class
108 G_DEFINE_TYPE (GstMpegAudioParse, gst_mpeg_audio_parse, GST_TYPE_BASE_PARSE);
110 #define GST_TYPE_MPEG_AUDIO_CHANNEL_MODE \
111 (gst_mpeg_audio_channel_mode_get_type())
113 static const GEnumValue mpeg_audio_channel_mode[] = {
114 {MPEG_AUDIO_CHANNEL_MODE_UNKNOWN, "Unknown", "unknown"},
115 {MPEG_AUDIO_CHANNEL_MODE_MONO, "Mono", "mono"},
116 {MPEG_AUDIO_CHANNEL_MODE_DUAL_CHANNEL, "Dual Channel", "dual-channel"},
117 {MPEG_AUDIO_CHANNEL_MODE_JOINT_STEREO, "Joint Stereo", "joint-stereo"},
118 {MPEG_AUDIO_CHANNEL_MODE_STEREO, "Stereo", "stereo"},
123 gst_mpeg_audio_channel_mode_get_type (void)
125 static GType mpeg_audio_channel_mode_type = 0;
127 if (!mpeg_audio_channel_mode_type) {
128 mpeg_audio_channel_mode_type =
129 g_enum_register_static ("GstMpegAudioChannelMode",
130 mpeg_audio_channel_mode);
132 return mpeg_audio_channel_mode_type;
136 gst_mpeg_audio_channel_mode_get_nick (gint mode)
139 for (i = 0; i < G_N_ELEMENTS (mpeg_audio_channel_mode); i++) {
140 if (mpeg_audio_channel_mode[i].value == mode)
141 return mpeg_audio_channel_mode[i].value_nick;
147 gst_mpeg_audio_parse_class_init (GstMpegAudioParseClass * klass)
149 GstBaseParseClass *parse_class = GST_BASE_PARSE_CLASS (klass);
150 GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
151 GObjectClass *object_class = G_OBJECT_CLASS (klass);
153 GST_DEBUG_CATEGORY_INIT (mpeg_audio_parse_debug, "mpegaudioparse", 0,
154 "MPEG1 audio stream parser");
156 object_class->finalize = gst_mpeg_audio_parse_finalize;
158 parse_class->start = GST_DEBUG_FUNCPTR (gst_mpeg_audio_parse_start);
159 parse_class->stop = GST_DEBUG_FUNCPTR (gst_mpeg_audio_parse_stop);
160 parse_class->handle_frame =
161 GST_DEBUG_FUNCPTR (gst_mpeg_audio_parse_handle_frame);
162 parse_class->pre_push_frame =
163 GST_DEBUG_FUNCPTR (gst_mpeg_audio_parse_pre_push_frame);
164 parse_class->convert = GST_DEBUG_FUNCPTR (gst_mpeg_audio_parse_convert);
165 parse_class->get_sink_caps =
166 GST_DEBUG_FUNCPTR (gst_mpeg_audio_parse_get_sink_caps);
169 #define GST_TAG_CRC "has-crc"
170 #define GST_TAG_MODE "channel-mode"
172 gst_tag_register (GST_TAG_CRC, GST_TAG_FLAG_META, G_TYPE_BOOLEAN,
173 "has crc", "Using CRC", NULL);
174 gst_tag_register (GST_TAG_MODE, GST_TAG_FLAG_ENCODED, G_TYPE_STRING,
175 "channel mode", "MPEG audio channel mode", NULL);
177 g_type_class_ref (GST_TYPE_MPEG_AUDIO_CHANNEL_MODE);
179 gst_element_class_add_pad_template (element_class,
180 gst_static_pad_template_get (&sink_template));
181 gst_element_class_add_pad_template (element_class,
182 gst_static_pad_template_get (&src_template));
184 gst_element_class_set_details_simple (element_class, "MPEG1 Audio Parser",
185 "Codec/Parser/Audio",
186 "Parses and frames mpeg1 audio streams (levels 1-3), provides seek",
187 "Jan Schmidt <thaytan@mad.scientist.com>,"
188 "Mark Nauwelaerts <mark.nauwelaerts@collabora.co.uk>");
192 gst_mpeg_audio_parse_reset (GstMpegAudioParse * mp3parse)
194 mp3parse->channels = -1;
196 mp3parse->sent_codec_tag = FALSE;
197 mp3parse->last_posted_crc = CRC_UNKNOWN;
198 mp3parse->last_posted_channel_mode = MPEG_AUDIO_CHANNEL_MODE_UNKNOWN;
199 mp3parse->freerate = 0;
201 mp3parse->hdr_bitrate = 0;
203 mp3parse->xing_flags = 0;
204 mp3parse->xing_bitrate = 0;
205 mp3parse->xing_frames = 0;
206 mp3parse->xing_total_time = 0;
207 mp3parse->xing_bytes = 0;
208 mp3parse->xing_vbr_scale = 0;
209 memset (mp3parse->xing_seek_table, 0, 100);
210 memset (mp3parse->xing_seek_table_inverse, 0, 256);
212 mp3parse->vbri_bitrate = 0;
213 mp3parse->vbri_frames = 0;
214 mp3parse->vbri_total_time = 0;
215 mp3parse->vbri_bytes = 0;
216 mp3parse->vbri_seek_points = 0;
217 g_free (mp3parse->vbri_seek_table);
218 mp3parse->vbri_seek_table = NULL;
220 mp3parse->encoder_delay = 0;
221 mp3parse->encoder_padding = 0;
225 gst_mpeg_audio_parse_init (GstMpegAudioParse * mp3parse)
227 gst_mpeg_audio_parse_reset (mp3parse);
231 gst_mpeg_audio_parse_finalize (GObject * object)
233 G_OBJECT_CLASS (parent_class)->finalize (object);
237 gst_mpeg_audio_parse_start (GstBaseParse * parse)
239 GstMpegAudioParse *mp3parse = GST_MPEG_AUDIO_PARSE (parse);
241 gst_base_parse_set_min_frame_size (GST_BASE_PARSE (mp3parse), MIN_FRAME_SIZE);
242 GST_DEBUG_OBJECT (parse, "starting");
244 gst_mpeg_audio_parse_reset (mp3parse);
250 gst_mpeg_audio_parse_stop (GstBaseParse * parse)
252 GstMpegAudioParse *mp3parse = GST_MPEG_AUDIO_PARSE (parse);
254 GST_DEBUG_OBJECT (parse, "stopping");
256 gst_mpeg_audio_parse_reset (mp3parse);
261 static const guint mp3types_bitrates[2][3][16] = {
263 {0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448,},
264 {0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384,},
265 {0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320,}
268 {0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256,},
269 {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160,},
270 {0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160,}
274 static const guint mp3types_freqs[3][3] = { {44100, 48000, 32000},
275 {22050, 24000, 16000},
280 mp3_type_frame_length_from_header (GstMpegAudioParse * mp3parse, guint32 header,
281 guint * put_version, guint * put_layer, guint * put_channels,
282 guint * put_bitrate, guint * put_samplerate, guint * put_mode,
286 gulong mode, samplerate, bitrate, layer, channels, padding, crc;
290 if (header & (1 << 20)) {
291 lsf = (header & (1 << 19)) ? 0 : 1;
298 version = 1 + lsf + mpg25;
300 layer = 4 - ((header >> 17) & 0x3);
302 crc = (header >> 16) & 0x1;
304 bitrate = (header >> 12) & 0xF;
305 bitrate = mp3types_bitrates[lsf][layer - 1][bitrate] * 1000;
307 GST_LOG_OBJECT (mp3parse, "using freeform bitrate");
308 bitrate = mp3parse->freerate;
311 samplerate = (header >> 10) & 0x3;
312 samplerate = mp3types_freqs[lsf + mpg25][samplerate];
314 /* force 0 length if 0 bitrate */
315 padding = (bitrate > 0) ? (header >> 9) & 0x1 : 0;
317 mode = (header >> 6) & 0x3;
318 channels = (mode == 3) ? 1 : 2;
322 length = 4 * ((bitrate * 12) / samplerate + padding);
325 length = (bitrate * 144) / samplerate + padding;
329 length = (bitrate * 144) / (samplerate << lsf) + padding;
333 GST_DEBUG_OBJECT (mp3parse, "Calculated mp3 frame length of %u bytes",
335 GST_DEBUG_OBJECT (mp3parse, "samplerate = %lu, bitrate = %lu, version = %lu, "
336 "layer = %lu, channels = %lu, mode = %s", samplerate, bitrate, version,
337 layer, channels, gst_mpeg_audio_channel_mode_get_nick (mode));
340 *put_version = version;
344 *put_channels = channels;
346 *put_bitrate = bitrate;
348 *put_samplerate = samplerate;
357 /* Minimum number of consecutive, valid-looking frames to consider
359 #define MIN_RESYNC_FRAMES 3
361 /* Perform extended validation to check that subsequent headers match
362 * the first header given here in important characteristics, to avoid
363 * false sync. We look for a minimum of MIN_RESYNC_FRAMES consecutive
364 * frames to match their major characteristics.
366 * If at_eos is set to TRUE, we just check that we don't find any invalid
367 * frames in whatever data is available, rather than requiring a full
368 * MIN_RESYNC_FRAMES of data.
370 * Returns TRUE if we've seen enough data to validate or reject the frame.
371 * If TRUE is returned, then *valid contains TRUE if it validated, or false
372 * if we decided it was false sync.
373 * If FALSE is returned, then *valid contains minimum needed data.
376 gst_mp3parse_validate_extended (GstMpegAudioParse * mp3parse, GstBuffer * buf,
377 guint32 header, int bpf, gboolean at_eos, gint * valid)
382 int frames_found = 1;
385 gst_buffer_map (buf, &map, GST_MAP_READ);
387 while (frames_found < MIN_RESYNC_FRAMES) {
388 /* Check if we have enough data for all these frames, plus the next
390 if (map.size < offset + 4) {
392 /* Running out of data at EOS is fine; just accept it */
402 next_header = GST_READ_UINT32_BE (map.data + offset);
403 GST_DEBUG_OBJECT (mp3parse, "At %d: header=%08X, header2=%08X, bpf=%d",
404 offset, (unsigned int) header, (unsigned int) next_header, bpf);
406 /* mask the bits which are allowed to differ between frames */
407 #define HDRMASK ~((0xF << 12) /* bitrate */ | \
408 (0x1 << 9) /* padding */ | \
409 (0xf << 4) /* mode|mode extension */ | \
410 (0xf)) /* copyright|emphasis */
412 if ((next_header & HDRMASK) != (header & HDRMASK)) {
413 /* If any of the unmasked bits don't match, then it's not valid */
414 GST_DEBUG_OBJECT (mp3parse, "next header doesn't match "
415 "(header=%08X (%08X), header2=%08X (%08X), bpf=%d)",
416 (guint) header, (guint) header & HDRMASK, (guint) next_header,
417 (guint) next_header & HDRMASK, bpf);
420 } else if (((next_header >> 12) & 0xf) == 0xf) {
421 /* The essential parts were the same, but the bitrate held an
422 invalid value - also reject */
423 GST_DEBUG_OBJECT (mp3parse, "next header invalid (bitrate)");
428 bpf = mp3_type_frame_length_from_header (mp3parse, next_header,
429 NULL, NULL, NULL, NULL, NULL, NULL, NULL);
431 /* if no bitrate, and no freeform rate known, then fail */
432 if (G_UNLIKELY (!bpf)) {
433 GST_DEBUG_OBJECT (mp3parse, "next header invalid (bitrate 0)");
445 gst_buffer_unmap (buf, &map);
450 gst_mpeg_audio_parse_head_check (GstMpegAudioParse * mp3parse,
453 GST_DEBUG_OBJECT (mp3parse, "checking mp3 header 0x%08lx", head);
454 /* if it's not a valid sync */
455 if ((head & 0xffe00000) != 0xffe00000) {
456 GST_WARNING_OBJECT (mp3parse, "invalid sync");
459 /* if it's an invalid MPEG version */
460 if (((head >> 19) & 3) == 0x1) {
461 GST_WARNING_OBJECT (mp3parse, "invalid MPEG version: 0x%lx",
465 /* if it's an invalid layer */
466 if (!((head >> 17) & 3)) {
467 GST_WARNING_OBJECT (mp3parse, "invalid layer: 0x%lx", (head >> 17) & 3);
470 /* if it's an invalid bitrate */
471 if (((head >> 12) & 0xf) == 0xf) {
472 GST_WARNING_OBJECT (mp3parse, "invalid bitrate: 0x%lx", (head >> 12) & 0xf);
475 /* if it's an invalid samplerate */
476 if (((head >> 10) & 0x3) == 0x3) {
477 GST_WARNING_OBJECT (mp3parse, "invalid samplerate: 0x%lx",
482 if ((head & 0x3) == 0x2) {
483 /* Ignore this as there are some files with emphasis 0x2 that can
484 * be played fine. See BGO #537235 */
485 GST_WARNING_OBJECT (mp3parse, "invalid emphasis: 0x%lx", head & 0x3);
491 /* Determines possible freeform frame rate/size by looking for next
492 * header with valid bitrate (0 or otherwise valid) (and sufficiently
493 * matching current header).
495 * Returns TRUE if we've found such one, and *rate then contains rate
496 * (or *rate contains 0 if decided no freeframe size could be determined).
497 * If not enough data, returns FALSE.
500 gst_mp3parse_find_freerate (GstMpegAudioParse * mp3parse, GstMapInfo * map,
501 guint32 header, gboolean at_eos, gint * _rate)
507 gulong samplerate, rate, layer, padding;
511 available = map->size;
516 /* pick apart header again partially */
517 if (header & (1 << 20)) {
518 lsf = (header & (1 << 19)) ? 0 : 1;
524 layer = 4 - ((header >> 17) & 0x3);
525 samplerate = (header >> 10) & 0x3;
526 samplerate = mp3types_freqs[lsf + mpg25][samplerate];
527 padding = (header >> 9) & 0x1;
529 for (; offset < available; ++offset) {
530 /* Check if we have enough data for all these frames, plus the next
532 if (available < offset + 4) {
534 /* Running out of data; failed to determine size */
542 next_header = GST_READ_UINT32_BE (data + offset);
543 if ((next_header & 0xFFE00000) != 0xFFE00000)
546 GST_DEBUG_OBJECT (mp3parse, "At %d: header=%08X, header2=%08X",
547 offset, (unsigned int) header, (unsigned int) next_header);
549 if ((next_header & HDRMASK) != (header & HDRMASK)) {
550 /* If any of the unmasked bits don't match, then it's not valid */
551 GST_DEBUG_OBJECT (mp3parse, "next header doesn't match "
552 "(header=%08X (%08X), header2=%08X (%08X))",
553 (guint) header, (guint) header & HDRMASK, (guint) next_header,
554 (guint) next_header & HDRMASK);
556 } else if (((next_header >> 12) & 0xf) == 0xf) {
557 /* The essential parts were the same, but the bitrate held an
558 invalid value - also reject */
559 GST_DEBUG_OBJECT (mp3parse, "next header invalid (bitrate)");
566 /* almost accept as free frame */
568 rate = samplerate * (offset - 4 * padding + 4) / 48000;
570 rate = samplerate * (offset - padding + 1) / (144 >> lsf) / 1000;
574 GST_LOG_OBJECT (mp3parse, "calculated rate %lu", rate * 1000);
575 if (rate < 8 || (layer == 3 && rate > 640)) {
576 GST_DEBUG_OBJECT (mp3parse, "rate invalid");
578 /* maybe some hope */
581 GST_DEBUG_OBJECT (mp3parse, "aborting");
586 *_rate = rate * 1000;
589 /* avoid indefinite searching */
591 GST_DEBUG_OBJECT (mp3parse, "exceeded sanity rate; aborting");
601 gst_mpeg_audio_parse_handle_frame (GstBaseParse * parse,
602 GstBaseParseFrame * frame, gint * skipsize)
604 GstMpegAudioParse *mp3parse = GST_MPEG_AUDIO_PARSE (parse);
605 GstBuffer *buf = frame->buffer;
606 GstByteReader reader;
608 gboolean lost_sync, draining, valid, caps_change;
610 guint bitrate, layer, rate, channels, version, mode, crc;
612 gboolean res = FALSE;
614 gst_buffer_map (buf, &map, GST_MAP_READ);
615 if (G_UNLIKELY (map.size < 6)) {
620 gst_byte_reader_init (&reader, map.data, map.size);
622 off = gst_byte_reader_masked_scan_uint32 (&reader, 0xffe00000, 0xffe00000,
625 GST_LOG_OBJECT (parse, "possible sync at buffer offset %d", off);
627 /* didn't find anything that looks like a sync word, skip */
629 *skipsize = map.size - 3;
633 /* possible frame header, but not at offset 0? skip bytes before sync */
639 /* make sure the values in the frame header look sane */
640 header = GST_READ_UINT32_BE (map.data);
641 if (!gst_mpeg_audio_parse_head_check (mp3parse, header)) {
646 GST_LOG_OBJECT (parse, "got frame");
648 lost_sync = GST_BASE_PARSE_LOST_SYNC (parse);
649 draining = GST_BASE_PARSE_DRAINING (parse);
651 if (G_UNLIKELY (lost_sync))
652 mp3parse->freerate = 0;
654 bpf = mp3_type_frame_length_from_header (mp3parse, header,
655 &version, &layer, &channels, &bitrate, &rate, &mode, &crc);
657 if (channels != mp3parse->channels || rate != mp3parse->rate ||
658 layer != mp3parse->layer || version != mp3parse->version)
663 /* maybe free format */
665 GST_LOG_OBJECT (mp3parse, "possibly free format");
666 if (lost_sync || mp3parse->freerate == 0) {
667 GST_DEBUG_OBJECT (mp3parse, "finding free format rate");
668 if (!gst_mp3parse_find_freerate (mp3parse, &map, header, draining,
670 /* not enough data */
671 gst_base_parse_set_min_frame_size (parse, valid);
675 GST_DEBUG_OBJECT (parse, "determined freeform size %d", valid);
676 mp3parse->freerate = valid;
680 bpf = mp3_type_frame_length_from_header (mp3parse, header,
681 &version, &layer, &channels, &bitrate, &rate, &mode, &crc);
683 /* did not come up with valid freeform length, reject after all */
689 if (!draining && (lost_sync || caps_change)) {
690 if (!gst_mp3parse_validate_extended (mp3parse, buf, header, bpf, draining,
692 /* not enough data */
693 gst_base_parse_set_min_frame_size (parse, valid);
702 } else if (draining && lost_sync && caps_change && mp3parse->rate > 0) {
703 /* avoid caps jitter that we can't be sure of */
708 /* restore default minimum */
709 gst_base_parse_set_min_frame_size (parse, MIN_FRAME_SIZE);
713 /* metadata handling */
714 if (G_UNLIKELY (caps_change)) {
715 GstCaps *caps = gst_caps_new_simple ("audio/mpeg",
716 "mpegversion", G_TYPE_INT, 1,
717 "mpegaudioversion", G_TYPE_INT, version,
718 "layer", G_TYPE_INT, layer,
719 "rate", G_TYPE_INT, rate,
720 "channels", G_TYPE_INT, channels, "parsed", G_TYPE_BOOLEAN, TRUE, NULL);
721 gst_pad_set_caps (GST_BASE_PARSE_SRC_PAD (parse), caps);
722 gst_caps_unref (caps);
724 mp3parse->rate = rate;
725 mp3parse->channels = channels;
726 mp3parse->layer = layer;
727 mp3parse->version = version;
729 /* see http://www.codeproject.com/audio/MPEGAudioInfo.asp */
730 if (mp3parse->layer == 1)
732 else if (mp3parse->layer == 2)
733 mp3parse->spf = 1152;
734 else if (mp3parse->version == 1) {
735 mp3parse->spf = 1152;
737 /* MPEG-2 or "2.5" */
742 * We start pushing 9 frames earlier (29 frames for MPEG2) than
743 * segment start to be able to decode the first frame we want.
744 * 9 (29) frames are the theoretical maximum of frames that contain
745 * data for the current frame (bit reservoir).
748 * Some mp3 streams have an offset in the timestamps, for which we have to
749 * push the frame *after* the end position in order for the decoder to be
750 * able to decode everything up until the segment.stop position. */
751 gst_base_parse_set_frame_rate (parse, mp3parse->rate, mp3parse->spf,
752 (version == 1) ? 10 : 30, 2);
755 mp3parse->hdr_bitrate = bitrate;
757 /* For first frame; check for seek tables and output a codec tag */
758 gst_mpeg_audio_parse_handle_first_frame (mp3parse, buf);
760 /* store some frame info for later processing */
761 mp3parse->last_crc = crc;
762 mp3parse->last_mode = mode;
765 gst_buffer_unmap (buf, &map);
767 if (res && bpf <= map.size) {
768 return gst_base_parse_finish_frame (parse, frame, bpf);
775 gst_mpeg_audio_parse_handle_first_frame (GstMpegAudioParse * mp3parse,
778 const guint32 xing_id = 0x58696e67; /* 'Xing' in hex */
779 const guint32 info_id = 0x496e666f; /* 'Info' in hex - found in LAME CBR files */
780 const guint32 vbri_id = 0x56425249; /* 'VBRI' in hex */
781 const guint32 lame_id = 0x4c414d45; /* 'LAME' in hex */
782 gint offset_xing, offset_vbri;
784 gint64 upstream_total_bytes = 0;
785 guint32 read_id_xing = 0, read_id_vbri = 0;
790 if (mp3parse->sent_codec_tag)
793 /* Check first frame for Xing info */
794 if (mp3parse->version == 1) { /* MPEG-1 file */
795 if (mp3parse->channels == 1)
799 } else { /* MPEG-2 header */
800 if (mp3parse->channels == 1)
806 /* The VBRI tag is always at offset 0x20 */
809 /* Skip the 4 bytes of the MP3 header too */
813 /* Check if we have enough data to read the Xing header */
814 gst_buffer_map (buf, &map, GST_MAP_READ);
818 if (avail >= offset_xing + 4) {
819 read_id_xing = GST_READ_UINT32_BE (data + offset_xing);
821 if (avail >= offset_vbri + 4) {
822 read_id_vbri = GST_READ_UINT32_BE (data + offset_vbri);
825 /* obtain real upstream total bytes */
826 if (!gst_pad_peer_query_duration (GST_BASE_PARSE_SINK_PAD (mp3parse),
827 GST_FORMAT_BYTES, &upstream_total_bytes))
828 upstream_total_bytes = 0;
830 if (read_id_xing == xing_id || read_id_xing == info_id) {
832 guint bytes_needed = offset_xing + 8;
834 GstClockTime total_time;
836 GST_DEBUG_OBJECT (mp3parse, "Found Xing header marker 0x%x", xing_id);
838 /* Move data after Xing header */
839 data += offset_xing + 4;
841 /* Read 4 base bytes of flags, big-endian */
842 xing_flags = GST_READ_UINT32_BE (data);
844 if (xing_flags & XING_FRAMES_FLAG)
846 if (xing_flags & XING_BYTES_FLAG)
848 if (xing_flags & XING_TOC_FLAG)
850 if (xing_flags & XING_VBR_SCALE_FLAG)
852 if (avail < bytes_needed) {
853 GST_DEBUG_OBJECT (mp3parse,
854 "Not enough data to read Xing header (need %d)", bytes_needed);
858 GST_DEBUG_OBJECT (mp3parse, "Reading Xing header");
859 mp3parse->xing_flags = xing_flags;
861 if (xing_flags & XING_FRAMES_FLAG) {
862 mp3parse->xing_frames = GST_READ_UINT32_BE (data);
863 if (mp3parse->xing_frames == 0) {
864 GST_WARNING_OBJECT (mp3parse,
865 "Invalid number of frames in Xing header");
866 mp3parse->xing_flags &= ~XING_FRAMES_FLAG;
868 mp3parse->xing_total_time = gst_util_uint64_scale (GST_SECOND,
869 (guint64) (mp3parse->xing_frames) * (mp3parse->spf),
875 mp3parse->xing_frames = 0;
876 mp3parse->xing_total_time = 0;
879 if (xing_flags & XING_BYTES_FLAG) {
880 mp3parse->xing_bytes = GST_READ_UINT32_BE (data);
881 if (mp3parse->xing_bytes == 0) {
882 GST_WARNING_OBJECT (mp3parse, "Invalid number of bytes in Xing header");
883 mp3parse->xing_flags &= ~XING_BYTES_FLAG;
887 mp3parse->xing_bytes = 0;
890 /* If we know the upstream size and duration, compute the
891 * total bitrate, rounded up to the nearest kbit/sec */
892 if ((total_time = mp3parse->xing_total_time) &&
893 (total_bytes = mp3parse->xing_bytes)) {
894 mp3parse->xing_bitrate = gst_util_uint64_scale (total_bytes,
895 8 * GST_SECOND, total_time);
896 mp3parse->xing_bitrate += 500;
897 mp3parse->xing_bitrate -= mp3parse->xing_bitrate % 1000;
900 if (xing_flags & XING_TOC_FLAG) {
902 guchar *table = mp3parse->xing_seek_table;
907 GST_DEBUG_OBJECT (mp3parse,
908 "Subtracting initial offset of %d bytes from Xing TOC", first);
910 /* xing seek table: percent time -> 1/256 bytepos */
911 for (i = 0; i < 100; i++) {
912 new = data[i] - first;
914 GST_WARNING_OBJECT (mp3parse, "Skipping broken Xing TOC");
915 mp3parse->xing_flags &= ~XING_TOC_FLAG;
918 mp3parse->xing_seek_table[i] = old = new;
921 /* build inverse table: 1/256 bytepos -> 1/100 percent time */
922 for (i = 0; i < 256; i++) {
923 while (percent < 99 && table[percent + 1] <= i)
926 if (table[percent] == i) {
927 mp3parse->xing_seek_table_inverse[i] = percent * 100;
928 } else if (table[percent] < i && percent < 99) {
930 gint a = percent, b = percent + 1;
934 fx = (b - a) / (fb - fa) * (i - fa) + a;
935 mp3parse->xing_seek_table_inverse[i] = (guint16) (fx * 100);
936 } else if (percent == 99) {
938 gint a = percent, b = 100;
942 fx = (b - a) / (fb - fa) * (i - fa) + a;
943 mp3parse->xing_seek_table_inverse[i] = (guint16) (fx * 100);
949 memset (mp3parse->xing_seek_table, 0, 100);
950 memset (mp3parse->xing_seek_table_inverse, 0, 256);
953 if (xing_flags & XING_VBR_SCALE_FLAG) {
954 mp3parse->xing_vbr_scale = GST_READ_UINT32_BE (data);
957 mp3parse->xing_vbr_scale = 0;
959 GST_DEBUG_OBJECT (mp3parse, "Xing header reported %u frames, time %"
960 GST_TIME_FORMAT ", %u bytes, vbr scale %u", mp3parse->xing_frames,
961 GST_TIME_ARGS (mp3parse->xing_total_time), mp3parse->xing_bytes,
962 mp3parse->xing_vbr_scale);
964 /* check for truncated file */
965 if (upstream_total_bytes && mp3parse->xing_bytes &&
966 mp3parse->xing_bytes * 0.8 > upstream_total_bytes) {
967 GST_WARNING_OBJECT (mp3parse, "File appears to have been truncated; "
968 "invalidating Xing header duration and size");
969 mp3parse->xing_flags &= ~XING_BYTES_FLAG;
970 mp3parse->xing_flags &= ~XING_FRAMES_FLAG;
973 /* Optional LAME tag? */
974 if (avail - bytes_needed >= 36 && GST_READ_UINT32_BE (data) == lame_id) {
975 gchar lame_version[10] = { 0, };
977 guint32 encoder_delay, encoder_padding;
979 memcpy (lame_version, data, 9);
981 tag_rev = data[0] >> 4;
982 GST_DEBUG_OBJECT (mp3parse, "Found LAME tag revision %d created by '%s'",
983 tag_rev, lame_version);
985 /* Skip all the information we're not interested in */
987 /* Encoder delay and end padding */
988 encoder_delay = GST_READ_UINT24_BE (data);
989 encoder_delay >>= 12;
990 encoder_padding = GST_READ_UINT24_BE (data);
991 encoder_padding &= 0x000fff;
993 mp3parse->encoder_delay = encoder_delay;
994 mp3parse->encoder_padding = encoder_padding;
996 GST_DEBUG_OBJECT (mp3parse, "Encoder delay %u, encoder padding %u",
997 encoder_delay, encoder_padding);
999 } else if (read_id_vbri == vbri_id) {
1000 gint64 total_bytes, total_frames;
1001 GstClockTime total_time;
1002 guint16 nseek_points;
1004 GST_DEBUG_OBJECT (mp3parse, "Found VBRI header marker 0x%x", vbri_id);
1006 if (avail < offset_vbri + 26) {
1007 GST_DEBUG_OBJECT (mp3parse,
1008 "Not enough data to read VBRI header (need %d)", offset_vbri + 26);
1012 GST_DEBUG_OBJECT (mp3parse, "Reading VBRI header");
1014 /* Move data after VBRI header */
1015 data += offset_vbri + 4;
1017 if (GST_READ_UINT16_BE (data) != 0x0001) {
1018 GST_WARNING_OBJECT (mp3parse,
1019 "Unsupported VBRI version 0x%x", GST_READ_UINT16_BE (data));
1024 /* Skip encoder delay */
1030 total_bytes = GST_READ_UINT32_BE (data);
1031 if (total_bytes != 0)
1032 mp3parse->vbri_bytes = total_bytes;
1035 total_frames = GST_READ_UINT32_BE (data);
1036 if (total_frames != 0) {
1037 mp3parse->vbri_frames = total_frames;
1038 mp3parse->vbri_total_time = gst_util_uint64_scale (GST_SECOND,
1039 (guint64) (mp3parse->vbri_frames) * (mp3parse->spf), mp3parse->rate);
1043 /* If we know the upstream size and duration, compute the
1044 * total bitrate, rounded up to the nearest kbit/sec */
1045 if ((total_time = mp3parse->vbri_total_time) &&
1046 (total_bytes = mp3parse->vbri_bytes)) {
1047 mp3parse->vbri_bitrate = gst_util_uint64_scale (total_bytes,
1048 8 * GST_SECOND, total_time);
1049 mp3parse->vbri_bitrate += 500;
1050 mp3parse->vbri_bitrate -= mp3parse->vbri_bitrate % 1000;
1053 nseek_points = GST_READ_UINT16_BE (data);
1056 if (nseek_points > 0) {
1057 guint scale, seek_bytes, seek_frames;
1060 mp3parse->vbri_seek_points = nseek_points;
1062 scale = GST_READ_UINT16_BE (data);
1065 seek_bytes = GST_READ_UINT16_BE (data);
1068 seek_frames = GST_READ_UINT16_BE (data);
1070 if (scale == 0 || seek_bytes == 0 || seek_bytes > 4 || seek_frames == 0) {
1071 GST_WARNING_OBJECT (mp3parse, "Unsupported VBRI seek table");
1075 if (avail < offset_vbri + 26 + nseek_points * seek_bytes) {
1076 GST_WARNING_OBJECT (mp3parse,
1077 "Not enough data to read VBRI seek table (need %d)",
1078 offset_vbri + 26 + nseek_points * seek_bytes);
1082 if (seek_frames * nseek_points < total_frames - seek_frames ||
1083 seek_frames * nseek_points > total_frames + seek_frames) {
1084 GST_WARNING_OBJECT (mp3parse,
1085 "VBRI seek table doesn't cover the complete file");
1089 if (avail < offset_vbri + 26) {
1090 GST_DEBUG_OBJECT (mp3parse,
1091 "Not enough data to read VBRI header (need %d)",
1092 offset_vbri + 26 + nseek_points * seek_bytes);
1097 data += offset_vbri + 26;
1099 /* VBRI seek table: frame/seek_frames -> byte */
1100 mp3parse->vbri_seek_table = g_new (guint32, nseek_points);
1101 if (seek_bytes == 4)
1102 for (i = 0; i < nseek_points; i++) {
1103 mp3parse->vbri_seek_table[i] = GST_READ_UINT32_BE (data) * scale;
1105 } else if (seek_bytes == 3)
1106 for (i = 0; i < nseek_points; i++) {
1107 mp3parse->vbri_seek_table[i] = GST_READ_UINT24_BE (data) * scale;
1109 } else if (seek_bytes == 2)
1110 for (i = 0; i < nseek_points; i++) {
1111 mp3parse->vbri_seek_table[i] = GST_READ_UINT16_BE (data) * scale;
1113 } else /* seek_bytes == 1 */
1114 for (i = 0; i < nseek_points; i++) {
1115 mp3parse->vbri_seek_table[i] = GST_READ_UINT8 (data) * scale;
1121 GST_DEBUG_OBJECT (mp3parse, "VBRI header reported %u frames, time %"
1122 GST_TIME_FORMAT ", bytes %u", mp3parse->vbri_frames,
1123 GST_TIME_ARGS (mp3parse->vbri_total_time), mp3parse->vbri_bytes);
1125 /* check for truncated file */
1126 if (upstream_total_bytes && mp3parse->vbri_bytes &&
1127 mp3parse->vbri_bytes * 0.8 > upstream_total_bytes) {
1128 GST_WARNING_OBJECT (mp3parse, "File appears to have been truncated; "
1129 "invalidating VBRI header duration and size");
1130 mp3parse->vbri_valid = FALSE;
1132 mp3parse->vbri_valid = TRUE;
1135 GST_DEBUG_OBJECT (mp3parse,
1136 "Xing, LAME or VBRI header not found in first frame");
1139 /* set duration if tables provided a valid one */
1140 if (mp3parse->xing_flags & XING_FRAMES_FLAG) {
1141 gst_base_parse_set_duration (GST_BASE_PARSE (mp3parse), GST_FORMAT_TIME,
1142 mp3parse->xing_total_time, 0);
1144 if (mp3parse->vbri_total_time != 0 && mp3parse->vbri_valid) {
1145 gst_base_parse_set_duration (GST_BASE_PARSE (mp3parse), GST_FORMAT_TIME,
1146 mp3parse->vbri_total_time, 0);
1149 /* tell baseclass how nicely we can seek, and a bitrate if one found */
1150 /* FIXME: fill index with seek table */
1152 seekable = GST_BASE_PARSE_SEEK_DEFAULT;
1153 if ((mp3parse->xing_flags & XING_TOC_FLAG) && mp3parse->xing_bytes &&
1154 mp3parse->xing_total_time)
1155 seekable = GST_BASE_PARSE_SEEK_TABLE;
1157 if (mp3parse->vbri_seek_table && mp3parse->vbri_bytes &&
1158 mp3parse->vbri_total_time)
1159 seekable = GST_BASE_PARSE_SEEK_TABLE;
1162 if (mp3parse->xing_bitrate)
1163 bitrate = mp3parse->xing_bitrate;
1164 else if (mp3parse->vbri_bitrate)
1165 bitrate = mp3parse->vbri_bitrate;
1169 gst_base_parse_set_average_bitrate (GST_BASE_PARSE (mp3parse), bitrate);
1172 gst_buffer_unmap (buf, &map);
1176 gst_mpeg_audio_parse_time_to_bytepos (GstMpegAudioParse * mp3parse,
1177 GstClockTime ts, gint64 * bytepos)
1180 GstClockTime total_time;
1182 /* If XING seek table exists use this for time->byte conversion */
1183 if ((mp3parse->xing_flags & XING_TOC_FLAG) &&
1184 (total_bytes = mp3parse->xing_bytes) &&
1185 (total_time = mp3parse->xing_total_time)) {
1188 CLAMP ((100.0 * gst_util_guint64_to_gdouble (ts)) /
1189 gst_util_guint64_to_gdouble (total_time), 0.0, 100.0);
1190 gint index = CLAMP (percent, 0, 99);
1192 fa = mp3parse->xing_seek_table[index];
1194 fb = mp3parse->xing_seek_table[index + 1];
1198 fx = fa + (fb - fa) * (percent - index);
1200 *bytepos = (1.0 / 256.0) * fx * total_bytes;
1205 if (mp3parse->vbri_seek_table && (total_bytes = mp3parse->vbri_bytes) &&
1206 (total_time = mp3parse->vbri_total_time)) {
1208 gdouble a, b, fa, fb;
1210 i = gst_util_uint64_scale (ts, mp3parse->vbri_seek_points - 1, total_time);
1211 i = CLAMP (i, 0, mp3parse->vbri_seek_points - 1);
1213 a = gst_guint64_to_gdouble (gst_util_uint64_scale (i, total_time,
1214 mp3parse->vbri_seek_points));
1216 for (j = i; j >= 0; j--)
1217 fa += mp3parse->vbri_seek_table[j];
1219 if (i + 1 < mp3parse->vbri_seek_points) {
1220 b = gst_guint64_to_gdouble (gst_util_uint64_scale (i + 1, total_time,
1221 mp3parse->vbri_seek_points));
1222 fb = fa + mp3parse->vbri_seek_table[i + 1];
1224 b = gst_guint64_to_gdouble (total_time);
1228 *bytepos = fa + ((fb - fa) / (b - a)) * (gst_guint64_to_gdouble (ts) - a);
1237 gst_mpeg_audio_parse_bytepos_to_time (GstMpegAudioParse * mp3parse,
1238 gint64 bytepos, GstClockTime * ts)
1241 GstClockTime total_time;
1243 /* If XING seek table exists use this for byte->time conversion */
1244 if ((mp3parse->xing_flags & XING_TOC_FLAG) &&
1245 (total_bytes = mp3parse->xing_bytes) &&
1246 (total_time = mp3parse->xing_total_time)) {
1251 pos = CLAMP ((bytepos * 256.0) / total_bytes, 0.0, 256.0);
1252 index = CLAMP (pos, 0, 255);
1253 fa = mp3parse->xing_seek_table_inverse[index];
1255 fb = mp3parse->xing_seek_table_inverse[index + 1];
1259 fx = fa + (fb - fa) * (pos - index);
1261 *ts = (1.0 / 10000.0) * fx * gst_util_guint64_to_gdouble (total_time);
1266 if (mp3parse->vbri_seek_table &&
1267 (total_bytes = mp3parse->vbri_bytes) &&
1268 (total_time = mp3parse->vbri_total_time)) {
1271 gdouble a, b, fa, fb;
1274 sum += mp3parse->vbri_seek_table[i];
1276 } while (i + 1 < mp3parse->vbri_seek_points
1277 && sum + mp3parse->vbri_seek_table[i] < bytepos);
1280 a = gst_guint64_to_gdouble (sum);
1281 fa = gst_guint64_to_gdouble (gst_util_uint64_scale (i, total_time,
1282 mp3parse->vbri_seek_points));
1284 if (i + 1 < mp3parse->vbri_seek_points) {
1285 b = a + mp3parse->vbri_seek_table[i + 1];
1286 fb = gst_guint64_to_gdouble (gst_util_uint64_scale (i + 1, total_time,
1287 mp3parse->vbri_seek_points));
1290 fb = gst_guint64_to_gdouble (total_time);
1293 *ts = gst_gdouble_to_guint64 (fa + ((fb - fa) / (b - a)) * (bytepos - a));
1302 gst_mpeg_audio_parse_convert (GstBaseParse * parse, GstFormat src_format,
1303 gint64 src_value, GstFormat dest_format, gint64 * dest_value)
1305 GstMpegAudioParse *mp3parse = GST_MPEG_AUDIO_PARSE (parse);
1306 gboolean res = FALSE;
1308 if (src_format == GST_FORMAT_TIME && dest_format == GST_FORMAT_BYTES)
1310 gst_mpeg_audio_parse_time_to_bytepos (mp3parse, src_value, dest_value);
1311 else if (src_format == GST_FORMAT_BYTES && dest_format == GST_FORMAT_TIME)
1312 res = gst_mpeg_audio_parse_bytepos_to_time (mp3parse, src_value,
1313 (GstClockTime *) dest_value);
1315 /* if no tables, fall back to default estimated rate based conversion */
1317 return gst_base_parse_convert_default (parse, src_format, src_value,
1318 dest_format, dest_value);
1323 static GstFlowReturn
1324 gst_mpeg_audio_parse_pre_push_frame (GstBaseParse * parse,
1325 GstBaseParseFrame * frame)
1327 GstMpegAudioParse *mp3parse = GST_MPEG_AUDIO_PARSE (parse);
1328 GstTagList *taglist;
1330 /* tag sending done late enough in hook to ensure pending events
1331 * have already been sent */
1333 if (!mp3parse->sent_codec_tag) {
1337 if (mp3parse->layer == 3) {
1338 codec = g_strdup_printf ("MPEG %d Audio, Layer %d (MP3)",
1339 mp3parse->version, mp3parse->layer);
1341 codec = g_strdup_printf ("MPEG %d Audio, Layer %d",
1342 mp3parse->version, mp3parse->layer);
1344 taglist = gst_tag_list_new (GST_TAG_AUDIO_CODEC, codec, NULL);
1345 if (mp3parse->hdr_bitrate > 0 && mp3parse->xing_bitrate == 0 &&
1346 mp3parse->vbri_bitrate == 0) {
1347 /* We don't have a VBR bitrate, so post the available bitrate as
1348 * nominal and let baseparse calculate the real bitrate */
1349 gst_tag_list_add (taglist, GST_TAG_MERGE_REPLACE,
1350 GST_TAG_NOMINAL_BITRATE, mp3parse->hdr_bitrate, NULL);
1352 gst_pad_push_event (GST_BASE_PARSE_SRC_PAD (mp3parse),
1353 gst_event_new_tag (taglist));
1356 /* also signals the end of first-frame processing */
1357 mp3parse->sent_codec_tag = TRUE;
1360 /* we will create a taglist (if any of the parameters has changed)
1361 * to add the tags that changed */
1363 if (mp3parse->last_posted_crc != mp3parse->last_crc) {
1367 taglist = gst_tag_list_new_empty ();
1369 mp3parse->last_posted_crc = mp3parse->last_crc;
1370 if (mp3parse->last_posted_crc == CRC_PROTECTED) {
1375 gst_tag_list_add (taglist, GST_TAG_MERGE_REPLACE, GST_TAG_CRC,
1379 if (mp3parse->last_posted_channel_mode != mp3parse->last_mode) {
1381 taglist = gst_tag_list_new_empty ();
1383 mp3parse->last_posted_channel_mode = mp3parse->last_mode;
1385 gst_tag_list_add (taglist, GST_TAG_MERGE_REPLACE, GST_TAG_MODE,
1386 gst_mpeg_audio_channel_mode_get_nick (mp3parse->last_mode), NULL);
1389 /* if the taglist exists, we need to send it */
1391 gst_pad_push_event (GST_BASE_PARSE_SRC_PAD (mp3parse),
1392 gst_event_new_tag (taglist));
1395 /* usual clipping applies */
1396 frame->flags |= GST_BASE_PARSE_FRAME_FLAG_CLIP;
1402 gst_mpeg_audio_parse_get_sink_caps (GstBaseParse * parse, GstCaps * filter)
1407 /* FIXME: handle filter caps */
1409 peercaps = gst_pad_get_allowed_caps (GST_BASE_PARSE_SRC_PAD (parse));
1413 /* Remove the parsed field */
1414 peercaps = gst_caps_make_writable (peercaps);
1415 n = gst_caps_get_size (peercaps);
1416 for (i = 0; i < n; i++) {
1417 GstStructure *s = gst_caps_get_structure (peercaps, i);
1419 gst_structure_remove_field (s, "parsed");
1423 gst_caps_intersect_full (peercaps,
1424 gst_pad_get_pad_template_caps (GST_BASE_PARSE_SRC_PAD (parse)),
1425 GST_CAPS_INTERSECT_FIRST);
1426 gst_caps_unref (peercaps);
1429 gst_caps_copy (gst_pad_get_pad_template_caps (GST_BASE_PARSE_SINK_PAD