ext/speex/gstspeexdec.c

   1 /* GStreamer
   2  * Copyright (C) 2004 Wim Taymans <wim@fluendo.com>
   3  * Copyright (C) 2006 Tim-Philipp Müller <tim centricular net>
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Library General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Library General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Library General Public
  16  * License along with this library; if not, write to the
  17  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18  * Boston, MA 02111-1307, USA.
  19  */
  20
  21 /**
  22  * SECTION:element-speexdec
  23  * @see_also: speexenc, oggdemux
  24  *
  25  * This element decodes a Speex stream to raw integer audio.
  26  * <ulink url="http://www.speex.org/">Speex</ulink> is a royalty-free
  27  * audio codec maintained by the <ulink url="http://www.xiph.org/">Xiph.org
  28  * Foundation</ulink>.
  29  *
  30  * <refsect2>
  31  * <title>Example pipelines</title>
  32  * |[
  33  * gst-launch -v filesrc location=speex.ogg ! oggdemux ! speexdec ! audioconvert ! audioresample ! alsasink
  34  * ]| Decode an Ogg/Speex file. To create an Ogg/Speex file refer to the
  35  * documentation of speexenc.
  36  * </refsect2>
  37  *
  38  * Last reviewed on 2006-04-05 (0.10.2)
  39  */
  40
  41 #ifdef HAVE_CONFIG_H
  42 #  include "config.h"
  43 #endif
  44
  45 #include "gstspeexdec.h"
  46 #include <stdlib.h>
  47 #include <string.h>
  48 #include <gst/tag/tag.h>
  49 #include <gst/audio/audio.h>
  50
  51 GST_DEBUG_CATEGORY_STATIC (speexdec_debug);
  52 #define GST_CAT_DEFAULT speexdec_debug
  53
  54 #define DEFAULT_ENH   TRUE
  55
  56 enum
  57 {
  58   ARG_0,
  59   ARG_ENH
  60 };
  61
  62 #define FORMAT_STR GST_AUDIO_NE(S16)
  63
  64 static GstStaticPadTemplate speex_dec_src_factory =
  65 GST_STATIC_PAD_TEMPLATE ("src",
  66     GST_PAD_SRC,
  67     GST_PAD_ALWAYS,
  68     GST_STATIC_CAPS ("audio/x-raw, "
  69         "format = (string) " FORMAT_STR ", "
  70         "layout = (string) interleaved, "
  71         "rate = (int) [ 6000, 48000 ], " "channels = (int) [ 1, 2 ]")
  72     );
  73
  74 static GstStaticPadTemplate speex_dec_sink_factory =
  75 GST_STATIC_PAD_TEMPLATE ("sink",
  76     GST_PAD_SINK,
  77     GST_PAD_ALWAYS,
  78     GST_STATIC_CAPS ("audio/x-speex")
  79     );
  80
  81 #define gst_speex_dec_parent_class parent_class
  82 G_DEFINE_TYPE (GstSpeexDec, gst_speex_dec, GST_TYPE_AUDIO_DECODER);
  83
  84 static gboolean gst_speex_dec_start (GstAudioDecoder * dec);
  85 static gboolean gst_speex_dec_stop (GstAudioDecoder * dec);
  86 static gboolean gst_speex_dec_set_format (GstAudioDecoder * bdec,
  87     GstCaps * caps);
  88 static GstFlowReturn gst_speex_dec_handle_frame (GstAudioDecoder * dec,
  89     GstBuffer * buffer);
  90
  91 static void gst_speex_dec_get_property (GObject * object, guint prop_id,
  92     GValue * value, GParamSpec * pspec);
  93 static void gst_speex_dec_set_property (GObject * object, guint prop_id,
  94     const GValue * value, GParamSpec * pspec);
  95
  96 static void
  97 gst_speex_dec_class_init (GstSpeexDecClass * klass)
  98 {
  99   GObjectClass *gobject_class;
 100   GstElementClass *gstelement_class;
 101   GstAudioDecoderClass *base_class;
 102
 103   gobject_class = (GObjectClass *) klass;
 104   gstelement_class = (GstElementClass *) klass;
 105   base_class = (GstAudioDecoderClass *) klass;
 106
 107   gobject_class->set_property = gst_speex_dec_set_property;
 108   gobject_class->get_property = gst_speex_dec_get_property;
 109
 110   base_class->start = GST_DEBUG_FUNCPTR (gst_speex_dec_start);
 111   base_class->stop = GST_DEBUG_FUNCPTR (gst_speex_dec_stop);
 112   base_class->set_format = GST_DEBUG_FUNCPTR (gst_speex_dec_set_format);
 113   base_class->handle_frame = GST_DEBUG_FUNCPTR (gst_speex_dec_handle_frame);
 114
 115   g_object_class_install_property (G_OBJECT_CLASS (klass), ARG_ENH,
 116       g_param_spec_boolean ("enh", "Enh", "Enable perceptual enhancement",
 117           DEFAULT_ENH, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
 118
 119   gst_element_class_add_pad_template (gstelement_class,
 120       gst_static_pad_template_get (&speex_dec_src_factory));
 121   gst_element_class_add_pad_template (gstelement_class,
 122       gst_static_pad_template_get (&speex_dec_sink_factory));
 123   gst_element_class_set_details_simple (gstelement_class, "Speex audio decoder",
 124       "Codec/Decoder/Audio",
 125       "decode speex streams to audio", "Wim Taymans <wim@fluendo.com>");
 126
 127   GST_DEBUG_CATEGORY_INIT (speexdec_debug, "speexdec", 0,
 128       "speex decoding element");
 129 }
 130
 131 static void
 132 gst_speex_dec_reset (GstSpeexDec * dec)
 133 {
 134   dec->packetno = 0;
 135   dec->frame_size = 0;
 136   dec->frame_duration = 0;
 137   dec->mode = NULL;
 138   free (dec->header);
 139   dec->header = NULL;
 140   speex_bits_destroy (&dec->bits);
 141
 142   gst_buffer_replace (&dec->streamheader, NULL);
 143   gst_buffer_replace (&dec->vorbiscomment, NULL);
 144
 145   if (dec->stereo) {
 146     speex_stereo_state_destroy (dec->stereo);
 147     dec->stereo = NULL;
 148   }
 149
 150   if (dec->state) {
 151     speex_decoder_destroy (dec->state);
 152     dec->state = NULL;
 153   }
 154 }
 155
 156 static void
 157 gst_speex_dec_init (GstSpeexDec * dec)
 158 {
 159   dec->enh = DEFAULT_ENH;
 160
 161   gst_speex_dec_reset (dec);
 162 }
 163
 164 static gboolean
 165 gst_speex_dec_start (GstAudioDecoder * dec)
 166 {
 167   GstSpeexDec *sd = GST_SPEEX_DEC (dec);
 168
 169   GST_DEBUG_OBJECT (dec, "start");
 170   gst_speex_dec_reset (sd);
 171
 172   /* we know about concealment */
 173   gst_audio_decoder_set_plc_aware (dec, TRUE);
 174
 175   return TRUE;
 176 }
 177
 178 static gboolean
 179 gst_speex_dec_stop (GstAudioDecoder * dec)
 180 {
 181   GstSpeexDec *sd = GST_SPEEX_DEC (dec);
 182
 183   GST_DEBUG_OBJECT (dec, "stop");
 184   gst_speex_dec_reset (sd);
 185
 186   return TRUE;
 187 }
 188
 189 static GstFlowReturn
 190 gst_speex_dec_parse_header (GstSpeexDec * dec, GstBuffer * buf)
 191 {
 192   GstMapInfo map;
 193   GstAudioInfo info;
 194   static const GstAudioChannelPosition chan_pos[2][2] = {
 195     {GST_AUDIO_CHANNEL_POSITION_MONO},
 196     {GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
 197         GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT}
 198   };
 199
 200   /* get the header */
 201   gst_buffer_map (buf, &map, GST_MAP_READ);
 202   dec->header = speex_packet_to_header ((gchar *) map.data, map.size);
 203   gst_buffer_unmap (buf, &map);
 204
 205   if (!dec->header)
 206     goto no_header;
 207
 208   if (dec->header->mode >= SPEEX_NB_MODES || dec->header->mode < 0)
 209     goto mode_too_old;
 210
 211   dec->mode = speex_lib_get_mode (dec->header->mode);
 212
 213   /* initialize the decoder */
 214   dec->state = speex_decoder_init (dec->mode);
 215   if (!dec->state)
 216     goto init_failed;
 217
 218   speex_decoder_ctl (dec->state, SPEEX_SET_ENH, &dec->enh);
 219   speex_decoder_ctl (dec->state, SPEEX_GET_FRAME_SIZE, &dec->frame_size);
 220
 221   if (dec->header->nb_channels != 1) {
 222     dec->stereo = speex_stereo_state_init ();
 223     dec->callback.callback_id = SPEEX_INBAND_STEREO;
 224     dec->callback.func = speex_std_stereo_request_handler;
 225     dec->callback.data = dec->stereo;
 226     speex_decoder_ctl (dec->state, SPEEX_SET_HANDLER, &dec->callback);
 227   }
 228
 229   speex_decoder_ctl (dec->state, SPEEX_SET_SAMPLING_RATE, &dec->header->rate);
 230
 231   dec->frame_duration = gst_util_uint64_scale_int (dec->frame_size,
 232       GST_SECOND, dec->header->rate);
 233
 234   speex_bits_init (&dec->bits);
 235
 236   /* set caps */
 237   gst_audio_info_init (&info);
 238   gst_audio_info_set_format (&info,
 239       GST_AUDIO_FORMAT_S16,
 240       dec->header->rate,
 241       dec->header->nb_channels, chan_pos[dec->header->nb_channels - 1]);
 242
 243   if (!gst_audio_decoder_set_output_format (GST_AUDIO_DECODER (dec), &info))
 244     goto nego_failed;
 245
 246   return GST_FLOW_OK;
 247
 248   /* ERRORS */
 249 no_header:
 250   {
 251     GST_ELEMENT_ERROR (GST_ELEMENT (dec), STREAM, DECODE,
 252         (NULL), ("couldn't read header"));
 253     return GST_FLOW_ERROR;
 254   }
 255 mode_too_old:
 256   {
 257     GST_ELEMENT_ERROR (GST_ELEMENT (dec), STREAM, DECODE,
 258         (NULL),
 259         ("Mode number %d does not (yet/any longer) exist in this version",
 260             dec->header->mode));
 261     return GST_FLOW_ERROR;
 262   }
 263 init_failed:
 264   {
 265     GST_ELEMENT_ERROR (GST_ELEMENT (dec), STREAM, DECODE,
 266         (NULL), ("couldn't initialize decoder"));
 267     return GST_FLOW_ERROR;
 268   }
 269 nego_failed:
 270   {
 271     GST_ELEMENT_ERROR (GST_ELEMENT (dec), STREAM, DECODE,
 272         (NULL), ("couldn't negotiate format"));
 273     return GST_FLOW_NOT_NEGOTIATED;
 274   }
 275 }
 276
 277 static GstFlowReturn
 278 gst_speex_dec_parse_comments (GstSpeexDec * dec, GstBuffer * buf)
 279 {
 280   GstTagList *list;
 281   gchar *ver, *encoder = NULL;
 282
 283   list = gst_tag_list_from_vorbiscomment_buffer (buf, NULL, 0, &encoder);
 284
 285   if (!list) {
 286     GST_WARNING_OBJECT (dec, "couldn't decode comments");
 287     list = gst_tag_list_new_empty ();
 288   }
 289
 290   if (encoder) {
 291     gst_tag_list_add (list, GST_TAG_MERGE_REPLACE,
 292         GST_TAG_ENCODER, encoder, NULL);
 293   }
 294
 295   gst_tag_list_add (list, GST_TAG_MERGE_REPLACE,
 296       GST_TAG_AUDIO_CODEC, "Speex", NULL);
 297
 298   ver = g_strndup (dec->header->speex_version, SPEEX_HEADER_VERSION_LENGTH);
 299   g_strstrip (ver);
 300
 301   if (ver != NULL && *ver != '\0') {
 302     gst_tag_list_add (list, GST_TAG_MERGE_REPLACE,
 303         GST_TAG_ENCODER_VERSION, ver, NULL);
 304   }
 305
 306   if (dec->header->bitrate > 0) {
 307     gst_tag_list_add (list, GST_TAG_MERGE_REPLACE,
 308         GST_TAG_BITRATE, (guint) dec->header->bitrate, NULL);
 309   }
 310
 311   GST_INFO_OBJECT (dec, "tags: %" GST_PTR_FORMAT, list);
 312
 313   gst_pad_push_event (GST_AUDIO_DECODER_SRC_PAD (dec),
 314       gst_event_new_tag (list));
 315
 316   g_free (encoder);
 317   g_free (ver);
 318
 319   return GST_FLOW_OK;
 320 }
 321
 322 static gboolean
 323 gst_speex_dec_set_format (GstAudioDecoder * bdec, GstCaps * caps)
 324 {
 325   GstSpeexDec *dec = GST_SPEEX_DEC (bdec);
 326   gboolean ret = TRUE;
 327   GstStructure *s;
 328   const GValue *streamheader;
 329
 330   s = gst_caps_get_structure (caps, 0);
 331   if ((streamheader = gst_structure_get_value (s, "streamheader")) &&
 332       G_VALUE_HOLDS (streamheader, GST_TYPE_ARRAY) &&
 333       gst_value_array_get_size (streamheader) >= 2) {
 334     const GValue *header, *vorbiscomment;
 335     GstBuffer *buf;
 336     GstFlowReturn res = GST_FLOW_OK;
 337
 338     header = gst_value_array_get_value (streamheader, 0);
 339     if (header && G_VALUE_HOLDS (header, GST_TYPE_BUFFER)) {
 340       buf = gst_value_get_buffer (header);
 341       res = gst_speex_dec_parse_header (dec, buf);
 342       if (res != GST_FLOW_OK)
 343         goto done;
 344       gst_buffer_replace (&dec->streamheader, buf);
 345     }
 346
 347     vorbiscomment = gst_value_array_get_value (streamheader, 1);
 348     if (vorbiscomment && G_VALUE_HOLDS (vorbiscomment, GST_TYPE_BUFFER)) {
 349       buf = gst_value_get_buffer (vorbiscomment);
 350       res = gst_speex_dec_parse_comments (dec, buf);
 351       if (res != GST_FLOW_OK)
 352         goto done;
 353       gst_buffer_replace (&dec->vorbiscomment, buf);
 354     }
 355   }
 356
 357 done:
 358   return ret;
 359 }
 360
 361 static GstFlowReturn
 362 gst_speex_dec_parse_data (GstSpeexDec * dec, GstBuffer * buf)
 363 {
 364   GstFlowReturn res = GST_FLOW_OK;
 365   gint i, fpp;
 366   SpeexBits *bits;
 367   GstMapInfo map;
 368
 369   if (!dec->frame_duration)
 370     goto not_negotiated;
 371
 372   if (G_LIKELY (gst_buffer_get_size (buf))) {
 373     /* send data to the bitstream */
 374     gst_buffer_map (buf, &map, GST_MAP_READ);
 375     speex_bits_read_from (&dec->bits, (gchar *) map.data, map.size);
 376     gst_buffer_unmap (buf, &map);
 377
 378     fpp = dec->header->frames_per_packet;
 379     bits = &dec->bits;
 380
 381     GST_DEBUG_OBJECT (dec, "received buffer of size %" G_GSIZE_FORMAT
 382         ", fpp %d, %d bits", map.size, fpp, speex_bits_remaining (bits));
 383   } else {
 384     /* FIXME ? actually consider how much concealment is needed */
 385     /* concealment data, pass NULL as the bits parameters */
 386     GST_DEBUG_OBJECT (dec, "creating concealment data");
 387     fpp = dec->header->frames_per_packet;
 388     bits = NULL;
 389   }
 390
 391   /* now decode each frame, catering for unknown number of them (e.g. rtp) */
 392   for (i = 0; i < fpp; i++) {
 393     GstBuffer *outbuf;
 394     gint ret;
 395
 396     GST_LOG_OBJECT (dec, "decoding frame %d/%d, %d bits remaining", i, fpp,
 397         bits ? speex_bits_remaining (bits) : -1);
 398 #if 0
 399     res =
 400         gst_pad_alloc_buffer_and_set_caps (GST_AUDIO_DECODER_SRC_PAD (dec),
 401         GST_BUFFER_OFFSET_NONE, dec->frame_size * dec->header->nb_channels * 2,
 402         GST_PAD_CAPS (GST_AUDIO_DECODER_SRC_PAD (dec)), &outbuf);
 403
 404     if (res != GST_FLOW_OK) {
 405       GST_DEBUG_OBJECT (dec, "buf alloc flow: %s", gst_flow_get_name (res));
 406       return res;
 407     }
 408 #endif
 409     /* FIXME, we can use a bufferpool because we have fixed size buffers. We
 410      * could also use an allocator */
 411     outbuf =
 412         gst_buffer_new_allocate (NULL,
 413         dec->frame_size * dec->header->nb_channels * 2, 0);
 414
 415     gst_buffer_map (outbuf, &map, GST_MAP_WRITE);
 416     ret = speex_decode_int (dec->state, bits, (spx_int16_t *) map.data);
 417
 418     if (ret == -1) {
 419       /* uh? end of stream */
 420       if (fpp == 0 && speex_bits_remaining (bits) < 8) {
 421         /* if we did not know how many frames to expect, then we get this
 422            at the end if there are leftover bits to pad to the next byte */
 423         GST_DEBUG_OBJECT (dec, "Discarding leftover bits");
 424       } else {
 425         GST_WARNING_OBJECT (dec, "Unexpected end of stream found");
 426       }
 427       gst_audio_decoder_finish_frame (GST_AUDIO_DECODER (dec), NULL, 1);
 428       gst_buffer_unref (outbuf);
 429     } else if (ret == -2) {
 430       GST_WARNING_OBJECT (dec, "Decoding error: corrupted stream?");
 431       gst_audio_decoder_finish_frame (GST_AUDIO_DECODER (dec), NULL, 1);
 432       gst_buffer_unref (outbuf);
 433     }
 434
 435     if (bits && speex_bits_remaining (bits) < 0) {
 436       GST_WARNING_OBJECT (dec, "Decoding overflow: corrupted stream?");
 437       gst_audio_decoder_finish_frame (GST_AUDIO_DECODER (dec), NULL, 1);
 438       gst_buffer_unref (outbuf);
 439     }
 440     if (dec->header->nb_channels == 2)
 441       speex_decode_stereo_int ((spx_int16_t *) map.data, dec->frame_size,
 442           dec->stereo);
 443
 444     gst_buffer_unmap (outbuf, &map);
 445
 446     res = gst_audio_decoder_finish_frame (GST_AUDIO_DECODER (dec), outbuf, 1);
 447
 448     if (res != GST_FLOW_OK) {
 449       GST_DEBUG_OBJECT (dec, "flow: %s", gst_flow_get_name (res));
 450       break;
 451     }
 452   }
 453
 454   return res;
 455
 456   /* ERRORS */
 457 not_negotiated:
 458   {
 459     GST_ELEMENT_ERROR (dec, CORE, NEGOTIATION, (NULL),
 460         ("decoder not initialized"));
 461     return GST_FLOW_NOT_NEGOTIATED;
 462   }
 463 }
 464
 465 static gboolean
 466 memcmp_buffers (GstBuffer * buf1, GstBuffer * buf2)
 467 {
 468   GstMapInfo map;
 469   gsize size1, size2;
 470   gboolean res;
 471
 472   size1 = gst_buffer_get_size (buf1);
 473   size2 = gst_buffer_get_size (buf2);
 474
 475   if (size1 != size2)
 476     return FALSE;
 477
 478   gst_buffer_map (buf1, &map, GST_MAP_READ);
 479   res = gst_buffer_memcmp (buf2, 0, map.data, map.size) == 0;
 480   gst_buffer_unmap (buf1, &map);
 481
 482   return res;
 483 }
 484
 485 static GstFlowReturn
 486 gst_speex_dec_handle_frame (GstAudioDecoder * bdec, GstBuffer * buf)
 487 {
 488   GstFlowReturn res;
 489   GstSpeexDec *dec;
 490
 491   /* no fancy draining */
 492   if (G_UNLIKELY (!buf))
 493     return GST_FLOW_OK;
 494
 495   dec = GST_SPEEX_DEC (bdec);
 496
 497   /* If we have the streamheader and vorbiscomment from the caps already
 498    * ignore them here */
 499   if (dec->streamheader && dec->vorbiscomment) {
 500     if (memcmp_buffers (dec->streamheader, buf)) {
 501       GST_DEBUG_OBJECT (dec, "found streamheader");
 502       gst_audio_decoder_finish_frame (bdec, NULL, 1);
 503       res = GST_FLOW_OK;
 504     } else if (memcmp_buffers (dec->vorbiscomment, buf)) {
 505       GST_DEBUG_OBJECT (dec, "found vorbiscomments");
 506       gst_audio_decoder_finish_frame (bdec, NULL, 1);
 507       res = GST_FLOW_OK;
 508     } else {
 509       res = gst_speex_dec_parse_data (dec, buf);
 510     }
 511   } else {
 512     /* Otherwise fall back to packet counting and assume that the
 513      * first two packets are the headers. */
 514     switch (dec->packetno) {
 515       case 0:
 516         GST_DEBUG_OBJECT (dec, "counted streamheader");
 517         res = gst_speex_dec_parse_header (dec, buf);
 518         gst_audio_decoder_finish_frame (bdec, NULL, 1);
 519         break;
 520       case 1:
 521         GST_DEBUG_OBJECT (dec, "counted vorbiscomments");
 522         res = gst_speex_dec_parse_comments (dec, buf);
 523         gst_audio_decoder_finish_frame (bdec, NULL, 1);
 524         break;
 525       default:
 526       {
 527         res = gst_speex_dec_parse_data (dec, buf);
 528         break;
 529       }
 530     }
 531   }
 532
 533   dec->packetno++;
 534
 535   return res;
 536 }
 537
 538 static void
 539 gst_speex_dec_get_property (GObject * object, guint prop_id,
 540     GValue * value, GParamSpec * pspec)
 541 {
 542   GstSpeexDec *speexdec;
 543
 544   speexdec = GST_SPEEX_DEC (object);
 545
 546   switch (prop_id) {
 547     case ARG_ENH:
 548       g_value_set_boolean (value, speexdec->enh);
 549       break;
 550     default:
 551       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
 552       break;
 553   }
 554 }
 555
 556 static void
 557 gst_speex_dec_set_property (GObject * object, guint prop_id,
 558     const GValue * value, GParamSpec * pspec)
 559 {
 560   GstSpeexDec *speexdec;
 561
 562   speexdec = GST_SPEEX_DEC (object);
 563
 564   switch (prop_id) {
 565     case ARG_ENH:
 566       speexdec->enh = g_value_get_boolean (value);
 567       break;
 568     default:
 569       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
 570       break;
 571   }
 572 }