1 /* MP3 decoding plugin for GStreamer using the mpg123 library
2 * Copyright (C) 2012 Carlos Rafael Giani
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 * SECTION: element-mpg123audiodec
21 * @see_also: lamemp3enc, mad
23 * Audio decoder for MPEG-1 layer 1/2/3 audio data.
26 * <title>Example pipelines</title>
28 * gst-launch filesrc location=music.mp3 ! mpegaudioparse ! mpg123audiodec ! audioconvert ! audioresample ! autoaudiosink
29 * ]| Decode and play the mp3 file
37 #include "gstmpg123audiodec.h"
42 GST_DEBUG_CATEGORY_STATIC (mpg123_debug);
43 #define GST_CAT_DEFAULT mpg123_debug
45 /* Omitted sample formats that mpg123 supports (or at least can support):
46 * - 8bit integer signed
47 * - 8bit integer unsigned
52 * The first four formats are not supported by the GstAudioDecoder base class.
53 * (The internal gst_audio_format_from_caps_structure() call fails.)
55 * The 64bit float issue is tricky. mpg123 actually decodes to "real",
56 * not necessarily to "float".
58 * "real" can be fixed point, 32bit float, 64bit float. There seems to be
59 * no way how to find out which one of them is actually used.
61 * However, in all known installations, "real" equals 32bit float, so that's
64 static GstStaticPadTemplate static_sink_template =
65 GST_STATIC_PAD_TEMPLATE ("sink",
68 GST_STATIC_CAPS ("audio/mpeg, "
69 "mpegversion = (int) { 1 }, "
70 "layer = (int) [ 1, 3 ], "
71 "rate = (int) { 8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000 }, "
72 "channels = (int) [ 1, 2 ], " "parsed = (boolean) true ")
75 static gboolean gst_mpg123_audio_dec_start (GstAudioDecoder * dec);
76 static gboolean gst_mpg123_audio_dec_stop (GstAudioDecoder * dec);
77 static GstFlowReturn gst_mpg123_audio_dec_push_decoded_bytes (GstMpg123AudioDec
78 * mpg123_decoder, unsigned char const *decoded_bytes,
79 size_t const num_decoded_bytes);
80 static GstFlowReturn gst_mpg123_audio_dec_handle_frame (GstAudioDecoder * dec,
81 GstBuffer * input_buffer);
82 static gboolean gst_mpg123_audio_dec_set_format (GstAudioDecoder * dec,
83 GstCaps * input_caps);
84 static void gst_mpg123_audio_dec_flush (GstAudioDecoder * dec, gboolean hard);
86 G_DEFINE_TYPE (GstMpg123AudioDec, gst_mpg123_audio_dec, GST_TYPE_AUDIO_DECODER);
89 gst_mpg123_audio_dec_class_init (GstMpg123AudioDecClass * klass)
91 GstAudioDecoderClass *base_class;
92 GstElementClass *element_class;
93 GstPadTemplate *src_template, *sink_template;
96 GST_DEBUG_CATEGORY_INIT (mpg123_debug, "mpg123", 0, "mpg123 mp3 decoder");
98 base_class = GST_AUDIO_DECODER_CLASS (klass);
99 element_class = GST_ELEMENT_CLASS (klass);
101 gst_element_class_set_static_metadata (element_class,
102 "mpg123 mp3 decoder",
103 "Codec/Decoder/Audio",
104 "Decodes mp3 streams using the mpg123 library",
105 "Carlos Rafael Giani <dv@pseudoterminal.org>");
107 /* Not using static pad template for srccaps, since the comma-separated list
108 * of formats needs to be created depending on whatever mpg123 supports */
110 const int *format_list;
111 const long *rates_list;
114 GstCaps *src_template_caps;
116 s = g_string_new ("audio/x-raw, ");
118 mpg123_encodings (&format_list, &num);
119 g_string_append (s, "format = { ");
120 for (i = 0; i < num; ++i) {
121 switch (format_list[i]) {
122 case MPG123_ENC_SIGNED_16:
123 g_string_append (s, (i > 0) ? ", " : "");
124 g_string_append (s, GST_AUDIO_NE (S16));
126 case MPG123_ENC_UNSIGNED_16:
127 g_string_append (s, (i > 0) ? ", " : "");
128 g_string_append (s, GST_AUDIO_NE (U16));
130 case MPG123_ENC_SIGNED_24:
131 g_string_append (s, (i > 0) ? ", " : "");
132 g_string_append (s, GST_AUDIO_NE (S24));
134 case MPG123_ENC_UNSIGNED_24:
135 g_string_append (s, (i > 0) ? ", " : "");
136 g_string_append (s, GST_AUDIO_NE (U24));
138 case MPG123_ENC_SIGNED_32:
139 g_string_append (s, (i > 0) ? ", " : "");
140 g_string_append (s, GST_AUDIO_NE (S32));
142 case MPG123_ENC_UNSIGNED_32:
143 g_string_append (s, (i > 0) ? ", " : "");
144 g_string_append (s, GST_AUDIO_NE (U32));
146 case MPG123_ENC_FLOAT_32:
147 g_string_append (s, (i > 0) ? ", " : "");
148 g_string_append (s, GST_AUDIO_NE (F32));
151 GST_DEBUG ("Ignoring mpg123 format %d", format_list[i]);
155 g_string_append (s, " }, ");
157 mpg123_rates (&rates_list, &num);
158 g_string_append (s, "rate = (int) { ");
159 for (i = 0; i < num; ++i) {
160 g_string_append_printf (s, "%s%lu", (i > 0) ? ", " : "", rates_list[i]);
162 g_string_append (s, "}, ");
164 g_string_append (s, "channels = (int) [ 1, 2 ], ");
165 g_string_append (s, "layout = (string) interleaved");
167 src_template_caps = gst_caps_from_string (s->str);
168 src_template = gst_pad_template_new ("src", GST_PAD_SRC, GST_PAD_ALWAYS,
171 g_string_free (s, TRUE);
174 sink_template = gst_static_pad_template_get (&static_sink_template);
176 gst_element_class_add_pad_template (element_class, sink_template);
177 gst_element_class_add_pad_template (element_class, src_template);
179 base_class->start = GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_start);
180 base_class->stop = GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_stop);
181 base_class->handle_frame =
182 GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_handle_frame);
183 base_class->set_format = GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_set_format);
184 base_class->flush = GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_flush);
186 error = mpg123_init ();
187 if (G_UNLIKELY (error != MPG123_OK))
188 GST_ERROR ("Could not initialize mpg123 library: %s",
189 mpg123_plain_strerror (error));
191 GST_INFO ("mpg123 library initialized");
196 gst_mpg123_audio_dec_init (GstMpg123AudioDec * mpg123_decoder)
198 mpg123_decoder->handle = NULL;
203 gst_mpg123_audio_dec_start (GstAudioDecoder * dec)
205 GstMpg123AudioDec *mpg123_decoder;
208 mpg123_decoder = GST_MPG123_AUDIO_DEC (dec);
211 mpg123_decoder->handle = mpg123_new (NULL, &error);
212 mpg123_decoder->has_next_audioinfo = FALSE;
213 mpg123_decoder->frame_offset = 0;
215 /* Initially, the mpg123 handle comes with a set of default formats
216 * supported. This clears this set. This is necessary, since only one
217 * format shall be supported (see set_format for more). */
218 mpg123_format_none (mpg123_decoder->handle);
220 /* Built-in mpg123 support for gapless decoding is disabled for now,
221 * since it does not work well with seeking */
222 mpg123_param (mpg123_decoder->handle, MPG123_REMOVE_FLAGS, MPG123_GAPLESS, 0);
223 /* Tells mpg123 to use a small read-ahead buffer for better MPEG sync;
224 * essential for MP3 radio streams */
225 mpg123_param (mpg123_decoder->handle, MPG123_ADD_FLAGS, MPG123_SEEKBUFFER, 0);
226 /* Sets the resync limit to the end of the stream (otherwise mpg123 may give
227 * up on decoding prematurely, especially with mp3 web radios) */
228 mpg123_param (mpg123_decoder->handle, MPG123_RESYNC_LIMIT, -1, 0);
229 #if MPG123_API_VERSION >= 36
230 /* The precise API version where MPG123_AUTO_RESAMPLE appeared is
231 * somewhere between 29 and 36 */
232 /* Don't let mpg123 resample output */
233 mpg123_param (mpg123_decoder->handle, MPG123_REMOVE_FLAGS,
234 MPG123_AUTO_RESAMPLE, 0);
236 /* Don't let mpg123 print messages to stdout/stderr */
237 mpg123_param (mpg123_decoder->handle, MPG123_ADD_FLAGS, MPG123_QUIET, 0);
239 /* Open in feed mode (= encoded data is fed manually into the handle). */
240 error = mpg123_open_feed (mpg123_decoder->handle);
242 if (G_UNLIKELY (error != MPG123_OK)) {
243 GST_ELEMENT_ERROR (dec, LIBRARY, INIT, (NULL),
244 ("%s", mpg123_strerror (mpg123_decoder->handle)));
245 mpg123_close (mpg123_decoder->handle);
246 mpg123_delete (mpg123_decoder->handle);
247 mpg123_decoder->handle = NULL;
251 GST_INFO_OBJECT (dec, "mpg123 decoder started");
258 gst_mpg123_audio_dec_stop (GstAudioDecoder * dec)
260 GstMpg123AudioDec *mpg123_decoder = GST_MPG123_AUDIO_DEC (dec);
262 if (G_LIKELY (mpg123_decoder->handle != NULL)) {
263 mpg123_close (mpg123_decoder->handle);
264 mpg123_delete (mpg123_decoder->handle);
265 mpg123_decoder->handle = NULL;
268 GST_INFO_OBJECT (dec, "mpg123 decoder stopped");
275 gst_mpg123_audio_dec_push_decoded_bytes (GstMpg123AudioDec * mpg123_decoder,
276 unsigned char const *decoded_bytes, size_t const num_decoded_bytes)
278 GstBuffer *output_buffer;
279 GstAudioDecoder *dec;
281 output_buffer = NULL;
282 dec = GST_AUDIO_DECODER (mpg123_decoder);
284 if ((num_decoded_bytes == 0) || (decoded_bytes == NULL)) {
285 /* This occurs in the first few frames, which do not carry data; once
286 * MPG123_AUDIO_DEC_NEW_FORMAT is received, the empty frames stop occurring */
287 GST_DEBUG_OBJECT (mpg123_decoder,
288 "cannot decode yet, need more data -> no output buffer to push");
292 output_buffer = gst_buffer_new_allocate (NULL, num_decoded_bytes, NULL);
294 if (output_buffer == NULL) {
295 /* This is necessary to advance playback in time,
296 * even when nothing was decoded. */
297 return gst_audio_decoder_finish_frame (dec, NULL, 1);
301 if (gst_buffer_map (output_buffer, &info, GST_MAP_WRITE)) {
302 memcpy (info.data, decoded_bytes, num_decoded_bytes);
303 gst_buffer_unmap (output_buffer, &info);
305 GST_ERROR_OBJECT (mpg123_decoder, "gst_buffer_map() returned NULL");
306 gst_buffer_unref (output_buffer);
307 output_buffer = NULL;
310 return gst_audio_decoder_finish_frame (dec, output_buffer, 1);
316 gst_mpg123_audio_dec_handle_frame (GstAudioDecoder * dec,
317 GstBuffer * input_buffer)
319 GstMpg123AudioDec *mpg123_decoder;
321 unsigned char *decoded_bytes;
322 size_t num_decoded_bytes;
323 GstFlowReturn retval;
325 mpg123_decoder = GST_MPG123_AUDIO_DEC (dec);
327 g_assert (mpg123_decoder->handle != NULL);
329 /* The actual decoding */
331 /* feed input data (if there is any) */
332 if (G_LIKELY (input_buffer != NULL)) {
335 if (gst_buffer_map (input_buffer, &info, GST_MAP_READ)) {
336 mpg123_feed (mpg123_decoder->handle, info.data, info.size);
337 gst_buffer_unmap (input_buffer, &info);
339 GST_ERROR_OBJECT (mpg123_decoder, "gst_memory_map() failed");
340 return GST_FLOW_ERROR;
344 /* Try to decode a frame */
345 decoded_bytes = NULL;
346 num_decoded_bytes = 0;
347 decode_error = mpg123_decode_frame (mpg123_decoder->handle,
348 &mpg123_decoder->frame_offset, &decoded_bytes, &num_decoded_bytes);
351 retval = GST_FLOW_OK;
353 switch (decode_error) {
354 case MPG123_NEW_FORMAT:
355 /* As mentioned in gst_mpg123_audio_dec_set_format(), the next audioinfo
356 * is not set immediately; instead, the code waits for mpg123 to take
357 * note of the new format, and then sets the audioinfo. This fixes glitches
358 * with mp3s containing several format headers (for example, first half
359 * using 44.1kHz, second half 32 kHz) */
362 "mpg123 reported a new format -> setting next srccaps");
364 gst_mpg123_audio_dec_push_decoded_bytes (mpg123_decoder, decoded_bytes,
367 /* If there is a next audioinfo, use it, then set has_next_audioinfo to
368 * FALSE, to make sure gst_audio_decoder_set_output_format() isn't called
369 * again until set_format is called by the base class */
370 if (mpg123_decoder->has_next_audioinfo) {
371 if (!gst_audio_decoder_set_output_format (dec,
372 &(mpg123_decoder->next_audioinfo))) {
373 GST_WARNING_OBJECT (dec, "Unable to set output format");
374 retval = GST_FLOW_NOT_NEGOTIATED;
376 mpg123_decoder->has_next_audioinfo = FALSE;
381 case MPG123_NEED_MORE:
383 retval = gst_mpg123_audio_dec_push_decoded_bytes (mpg123_decoder,
384 decoded_bytes, num_decoded_bytes);
388 /* If this happens, then the upstream parser somehow missed the ending
389 * of the bitstream */
390 GST_LOG_OBJECT (dec, "mpg123 is done decoding");
391 gst_mpg123_audio_dec_push_decoded_bytes (mpg123_decoder, decoded_bytes,
393 retval = GST_FLOW_EOS;
398 /* Anything else is considered an error */
400 switch (decode_error) {
402 errcode = mpg123_errcode (mpg123_decoder->handle);
405 errcode = decode_error;
408 case MPG123_BAD_OUTFORMAT:{
409 GstCaps *input_caps =
410 gst_pad_get_current_caps (GST_AUDIO_DECODER_SINK_PAD (dec));
411 GST_ELEMENT_ERROR (dec, STREAM, FORMAT, (NULL),
412 ("Output sample format could not be used when trying to decode frame. "
413 "This is typically caused when the input caps (often the sample "
414 "rate) do not match the actual format of the audio data. "
415 "Input caps: %" GST_PTR_FORMAT, input_caps)
417 gst_caps_unref (input_caps);
421 char const *errmsg = mpg123_plain_strerror (errcode);
422 GST_ERROR_OBJECT (dec, "Reported error: %s", errmsg);
426 retval = GST_FLOW_ERROR;
435 gst_mpg123_audio_dec_set_format (GstAudioDecoder * dec, GstCaps * input_caps)
437 /* Using the parsed information upstream, and the list of allowed caps
438 * downstream, this code tries to find a suitable audio info. It is important
439 * to keep in mind that the rate and number of channels should never deviate
440 * from the one the bitstream has, otherwise mpg123 has to mix channels and/or
441 * resample (and as its docs say, its internal resampler is very crude). The
442 * sample format, however, can be chosen freely, because the MPEG specs do not
443 * mandate any special format. Therefore, rate and number of channels are taken
444 * from upstream (which parsed the MPEG frames, so the input_caps contain
445 * exactly the rate and number of channels the bitstream actually has), while
446 * the sample format is chosen by trying out all caps that are allowed by
447 * downstream. This way, the output is adjusted to what the downstream prefers.
449 * Also, the new output audio info is not set immediately. Instead, it is
450 * considered the "next audioinfo". The code waits for mpg123 to notice the new
451 * format (= when mpg123_decode_frame() returns MPG123_AUDIO_DEC_NEW_FORMAT),
452 * and then sets the next audioinfo. Otherwise, the next audioinfo is set too
453 * soon, which may cause problems with mp3s containing several format headers.
454 * One example would be an mp3 with the first 30 seconds using 44.1 kHz, then
455 * the next 30 seconds using 32 kHz. Rare, but possible.
459 * 1. get rate and channels from input_caps
460 * 2. get allowed caps from src pad
461 * 3. for each structure in allowed caps:
463 * 3.2. if the combination of format with rate and channels is unsupported by
464 * mpg123, go to (3), or exit with error if there are no more structures
466 * 3.3. create next audioinfo out of rate,channels,format, and exit
471 GstMpg123AudioDec *mpg123_decoder;
472 GstCaps *allowed_srccaps;
474 gboolean match_found = FALSE;
476 mpg123_decoder = GST_MPG123_AUDIO_DEC (dec);
478 g_assert (mpg123_decoder->handle != NULL);
480 mpg123_decoder->has_next_audioinfo = FALSE;
482 /* Get rate and channels from input_caps */
484 GstStructure *structure;
485 gboolean err = FALSE;
487 /* Only the first structure is used (multiple
488 * input caps structures don't make sense */
489 structure = gst_caps_get_structure (input_caps, 0);
491 if (!gst_structure_get_int (structure, "rate", &rate)) {
493 GST_ERROR_OBJECT (dec, "Input caps do not have a rate value");
495 if (!gst_structure_get_int (structure, "channels", &channels)) {
497 GST_ERROR_OBJECT (dec, "Input caps do not have a channel value");
504 /* Get the caps that are allowed by downstream */
506 GstCaps *allowed_srccaps_unnorm =
507 gst_pad_get_allowed_caps (GST_AUDIO_DECODER_SRC_PAD (dec));
508 allowed_srccaps = gst_caps_normalize (allowed_srccaps_unnorm);
511 /* Go through all allowed caps, pick the first one that matches */
512 for (structure_nr = 0; structure_nr < gst_caps_get_size (allowed_srccaps);
514 GstStructure *structure;
515 gchar const *format_str;
516 GstAudioFormat format;
519 structure = gst_caps_get_structure (allowed_srccaps, structure_nr);
521 format_str = gst_structure_get_string (structure, "format");
522 if (format_str == NULL) {
523 GST_DEBUG_OBJECT (dec, "Could not get format from src caps");
527 format = gst_audio_format_from_string (format_str);
528 if (format == GST_AUDIO_FORMAT_UNKNOWN) {
529 GST_DEBUG_OBJECT (dec, "Unknown format %s", format_str);
534 case GST_AUDIO_FORMAT_S16:
535 encoding = MPG123_ENC_SIGNED_16;
537 case GST_AUDIO_FORMAT_S24:
538 encoding = MPG123_ENC_SIGNED_24;
540 case GST_AUDIO_FORMAT_S32:
541 encoding = MPG123_ENC_SIGNED_32;
543 case GST_AUDIO_FORMAT_U16:
544 encoding = MPG123_ENC_UNSIGNED_16;
546 case GST_AUDIO_FORMAT_U24:
547 encoding = MPG123_ENC_UNSIGNED_24;
549 case GST_AUDIO_FORMAT_U32:
550 encoding = MPG123_ENC_UNSIGNED_32;
552 case GST_AUDIO_FORMAT_F32:
553 encoding = MPG123_ENC_FLOAT_32;
556 GST_DEBUG_OBJECT (dec,
557 "Format %s in srccaps is not supported", format_str);
564 /* Cleanup old formats & set new one */
565 mpg123_format_none (mpg123_decoder->handle);
566 err = mpg123_format (mpg123_decoder->handle, rate, channels, encoding);
567 if (err != MPG123_OK) {
568 GST_DEBUG_OBJECT (dec,
569 "mpg123 cannot use caps %" GST_PTR_FORMAT
570 " because mpg123_format() failed: %s", structure,
571 mpg123_strerror (mpg123_decoder->handle));
576 gst_audio_info_init (&(mpg123_decoder->next_audioinfo));
577 gst_audio_info_set_format (&(mpg123_decoder->next_audioinfo), format, rate,
579 GST_LOG_OBJECT (dec, "The next audio format is: %s, %u Hz, %u channels",
580 format_str, rate, channels);
581 mpg123_decoder->has_next_audioinfo = TRUE;
588 gst_caps_unref (allowed_srccaps);
595 gst_mpg123_audio_dec_flush (GstAudioDecoder * dec, gboolean hard)
598 GstMpg123AudioDec *mpg123_decoder;
602 GST_LOG_OBJECT (dec, "Flushing decoder");
604 mpg123_decoder = GST_MPG123_AUDIO_DEC (dec);
606 g_assert (mpg123_decoder->handle != NULL);
608 /* Flush by reopening the feed */
609 mpg123_close (mpg123_decoder->handle);
610 error = mpg123_open_feed (mpg123_decoder->handle);
612 if (G_UNLIKELY (error != MPG123_OK)) {
613 GST_ELEMENT_ERROR (dec, LIBRARY, INIT, (NULL),
614 ("Error while reopening mpg123 feed: %s",
615 mpg123_plain_strerror (error)));
616 mpg123_close (mpg123_decoder->handle);
617 mpg123_delete (mpg123_decoder->handle);
618 mpg123_decoder->handle = NULL;
621 mpg123_decoder->has_next_audioinfo = FALSE;
623 /* opening/closing feeds do not affect the format defined by the
624 * mpg123_format() call that was made in gst_mpg123_audio_dec_set_format(),
625 * and since the up/downstream caps are not expected to change here, no
626 * mpg123_format() calls are done */
630 plugin_init (GstPlugin * plugin)
632 return gst_element_register (plugin, "mpg123audiodec",
633 GST_RANK_MARGINAL, gst_mpg123_audio_dec_get_type ());
636 GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
638 mpg123, "mp3 decoding based on the mpg123 library",
639 plugin_init, VERSION, "LGPL", GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN)