ext/speex/gstspeexdec.c

   1 /* GStreamer
   2  * Copyright (C) 2004 Wim Taymans <wim@fluendo.com>
   3  * Copyright (C) 2006 Tim-Philipp Müller <tim centricular net>
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Library General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Library General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Library General Public
  16  * License along with this library; if not, write to the
  17  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
  18  * Boston, MA 02110-1301, USA.
  19  */
  20
  21 /**
  22  * SECTION:element-speexdec
  23  * @see_also: speexenc, oggdemux
  24  *
  25  * This element decodes a Speex stream to raw integer audio.
  26  * <ulink url="http://www.speex.org/">Speex</ulink> is a royalty-free
  27  * audio codec maintained by the <ulink url="http://www.xiph.org/">Xiph.org
  28  * Foundation</ulink>.
  29  *
  30  * <refsect2>
  31  * <title>Example pipelines</title>
  32  * |[
  33  * gst-launch-1.0 -v filesrc location=speex.ogg ! oggdemux ! speexdec ! audioconvert ! audioresample ! alsasink
  34  * ]| Decode an Ogg/Speex file. To create an Ogg/Speex file refer to the
  35  * documentation of speexenc.
  36  * </refsect2>
  37  */
  38
  39 #ifdef HAVE_CONFIG_H
  40 #  include "config.h"
  41 #endif
  42
  43 #include "gstspeexdec.h"
  44 #include <stdlib.h>
  45 #include <string.h>
  46 #include <gst/tag/tag.h>
  47 #include <gst/audio/audio.h>
  48
  49 GST_DEBUG_CATEGORY_STATIC (speexdec_debug);
  50 #define GST_CAT_DEFAULT speexdec_debug
  51
  52 #define DEFAULT_ENH   TRUE
  53
  54 enum
  55 {
  56   ARG_0,
  57   ARG_ENH
  58 };
  59
  60 #define FORMAT_STR GST_AUDIO_NE(S16)
  61
  62 static GstStaticPadTemplate speex_dec_src_factory =
  63 GST_STATIC_PAD_TEMPLATE ("src",
  64     GST_PAD_SRC,
  65     GST_PAD_ALWAYS,
  66     GST_STATIC_CAPS ("audio/x-raw, "
  67         "format = (string) " FORMAT_STR ", "
  68         "layout = (string) interleaved, "
  69         "rate = (int) [ 6000, 48000 ], " "channels = (int) [ 1, 2 ]")
  70     );
  71
  72 static GstStaticPadTemplate speex_dec_sink_factory =
  73 GST_STATIC_PAD_TEMPLATE ("sink",
  74     GST_PAD_SINK,
  75     GST_PAD_ALWAYS,
  76     GST_STATIC_CAPS ("audio/x-speex")
  77     );
  78
  79 #define gst_speex_dec_parent_class parent_class
  80 G_DEFINE_TYPE (GstSpeexDec, gst_speex_dec, GST_TYPE_AUDIO_DECODER);
  81
  82 static gboolean gst_speex_dec_start (GstAudioDecoder * dec);
  83 static gboolean gst_speex_dec_stop (GstAudioDecoder * dec);
  84 static gboolean gst_speex_dec_set_format (GstAudioDecoder * bdec,
  85     GstCaps * caps);
  86 static GstFlowReturn gst_speex_dec_handle_frame (GstAudioDecoder * dec,
  87     GstBuffer * buffer);
  88
  89 static void gst_speex_dec_get_property (GObject * object, guint prop_id,
  90     GValue * value, GParamSpec * pspec);
  91 static void gst_speex_dec_set_property (GObject * object, guint prop_id,
  92     const GValue * value, GParamSpec * pspec);
  93
  94 static void
  95 gst_speex_dec_class_init (GstSpeexDecClass * klass)
  96 {
  97   GObjectClass *gobject_class;
  98   GstElementClass *gstelement_class;
  99   GstAudioDecoderClass *base_class;
 100
 101   gobject_class = (GObjectClass *) klass;
 102   gstelement_class = (GstElementClass *) klass;
 103   base_class = (GstAudioDecoderClass *) klass;
 104
 105   gobject_class->set_property = gst_speex_dec_set_property;
 106   gobject_class->get_property = gst_speex_dec_get_property;
 107
 108   base_class->start = GST_DEBUG_FUNCPTR (gst_speex_dec_start);
 109   base_class->stop = GST_DEBUG_FUNCPTR (gst_speex_dec_stop);
 110   base_class->set_format = GST_DEBUG_FUNCPTR (gst_speex_dec_set_format);
 111   base_class->handle_frame = GST_DEBUG_FUNCPTR (gst_speex_dec_handle_frame);
 112
 113   g_object_class_install_property (G_OBJECT_CLASS (klass), ARG_ENH,
 114       g_param_spec_boolean ("enh", "Enh", "Enable perceptual enhancement",
 115           DEFAULT_ENH, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
 116
 117   gst_element_class_add_pad_template (gstelement_class,
 118       gst_static_pad_template_get (&speex_dec_src_factory));
 119   gst_element_class_add_pad_template (gstelement_class,
 120       gst_static_pad_template_get (&speex_dec_sink_factory));
 121   gst_element_class_set_static_metadata (gstelement_class,
 122       "Speex audio decoder", "Codec/Decoder/Audio",
 123       "decode speex streams to audio", "Wim Taymans <wim@fluendo.com>");
 124
 125   GST_DEBUG_CATEGORY_INIT (speexdec_debug, "speexdec", 0,
 126       "speex decoding element");
 127 }
 128
 129 static void
 130 gst_speex_dec_reset (GstSpeexDec * dec)
 131 {
 132   dec->packetno = 0;
 133   dec->frame_size = 0;
 134   dec->frame_duration = 0;
 135   dec->mode = NULL;
 136   free (dec->header);
 137   dec->header = NULL;
 138   speex_bits_destroy (&dec->bits);
 139   speex_bits_set_bit_buffer (&dec->bits, NULL, 0);
 140
 141   gst_buffer_replace (&dec->streamheader, NULL);
 142   gst_buffer_replace (&dec->vorbiscomment, NULL);
 143
 144   if (dec->stereo) {
 145     speex_stereo_state_destroy (dec->stereo);
 146     dec->stereo = NULL;
 147   }
 148
 149   if (dec->state) {
 150     speex_decoder_destroy (dec->state);
 151     dec->state = NULL;
 152   }
 153 }
 154
 155 static void
 156 gst_speex_dec_init (GstSpeexDec * dec)
 157 {
 158   gst_audio_decoder_set_needs_format (GST_AUDIO_DECODER (dec), TRUE);
 159   gst_audio_decoder_set_use_default_pad_acceptcaps (GST_AUDIO_DECODER_CAST
 160       (dec), TRUE);
 161   GST_PAD_SET_ACCEPT_TEMPLATE (GST_AUDIO_DECODER_SINK_PAD (dec));
 162
 163   dec->enh = DEFAULT_ENH;
 164
 165   gst_speex_dec_reset (dec);
 166 }
 167
 168 static gboolean
 169 gst_speex_dec_start (GstAudioDecoder * dec)
 170 {
 171   GstSpeexDec *sd = GST_SPEEX_DEC (dec);
 172
 173   GST_DEBUG_OBJECT (dec, "start");
 174   gst_speex_dec_reset (sd);
 175
 176   /* we know about concealment */
 177   gst_audio_decoder_set_plc_aware (dec, TRUE);
 178
 179   return TRUE;
 180 }
 181
 182 static gboolean
 183 gst_speex_dec_stop (GstAudioDecoder * dec)
 184 {
 185   GstSpeexDec *sd = GST_SPEEX_DEC (dec);
 186
 187   GST_DEBUG_OBJECT (dec, "stop");
 188   gst_speex_dec_reset (sd);
 189
 190   return TRUE;
 191 }
 192
 193 static GstFlowReturn
 194 gst_speex_dec_parse_header (GstSpeexDec * dec, GstBuffer * buf)
 195 {
 196   GstMapInfo map;
 197   GstAudioInfo info;
 198   static const GstAudioChannelPosition chan_pos[2][2] = {
 199     {GST_AUDIO_CHANNEL_POSITION_MONO},
 200     {GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
 201         GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT}
 202   };
 203
 204   /* get the header */
 205   gst_buffer_map (buf, &map, GST_MAP_READ);
 206   dec->header = speex_packet_to_header ((gchar *) map.data, map.size);
 207   gst_buffer_unmap (buf, &map);
 208
 209   if (!dec->header)
 210     goto no_header;
 211
 212   if (dec->header->mode >= SPEEX_NB_MODES || dec->header->mode < 0)
 213     goto mode_too_old;
 214
 215   dec->mode = speex_lib_get_mode (dec->header->mode);
 216
 217   /* initialize the decoder */
 218   dec->state = speex_decoder_init (dec->mode);
 219   if (!dec->state)
 220     goto init_failed;
 221
 222   speex_decoder_ctl (dec->state, SPEEX_SET_ENH, &dec->enh);
 223   speex_decoder_ctl (dec->state, SPEEX_GET_FRAME_SIZE, &dec->frame_size);
 224
 225   if (dec->header->nb_channels != 1) {
 226     dec->stereo = speex_stereo_state_init ();
 227     dec->callback.callback_id = SPEEX_INBAND_STEREO;
 228     dec->callback.func = speex_std_stereo_request_handler;
 229     dec->callback.data = dec->stereo;
 230     speex_decoder_ctl (dec->state, SPEEX_SET_HANDLER, &dec->callback);
 231   }
 232
 233   speex_decoder_ctl (dec->state, SPEEX_SET_SAMPLING_RATE, &dec->header->rate);
 234
 235   dec->frame_duration = gst_util_uint64_scale_int (dec->frame_size,
 236       GST_SECOND, dec->header->rate);
 237
 238   speex_bits_init (&dec->bits);
 239
 240   /* set caps */
 241   gst_audio_info_init (&info);
 242   gst_audio_info_set_format (&info,
 243       GST_AUDIO_FORMAT_S16,
 244       dec->header->rate,
 245       dec->header->nb_channels, chan_pos[dec->header->nb_channels - 1]);
 246
 247   if (!gst_audio_decoder_set_output_format (GST_AUDIO_DECODER (dec), &info))
 248     goto nego_failed;
 249
 250   return GST_FLOW_OK;
 251
 252   /* ERRORS */
 253 no_header:
 254   {
 255     GST_ELEMENT_ERROR (GST_ELEMENT (dec), STREAM, DECODE,
 256         (NULL), ("couldn't read header"));
 257     return GST_FLOW_ERROR;
 258   }
 259 mode_too_old:
 260   {
 261     GST_ELEMENT_ERROR (GST_ELEMENT (dec), STREAM, DECODE,
 262         (NULL),
 263         ("Mode number %d does not (yet/any longer) exist in this version",
 264             dec->header->mode));
 265     return GST_FLOW_ERROR;
 266   }
 267 init_failed:
 268   {
 269     GST_ELEMENT_ERROR (GST_ELEMENT (dec), STREAM, DECODE,
 270         (NULL), ("couldn't initialize decoder"));
 271     return GST_FLOW_ERROR;
 272   }
 273 nego_failed:
 274   {
 275     GST_ELEMENT_ERROR (GST_ELEMENT (dec), STREAM, DECODE,
 276         (NULL), ("couldn't negotiate format"));
 277     return GST_FLOW_NOT_NEGOTIATED;
 278   }
 279 }
 280
 281 static GstFlowReturn
 282 gst_speex_dec_parse_comments (GstSpeexDec * dec, GstBuffer * buf)
 283 {
 284   GstTagList *list;
 285   gchar *ver, *encoder = NULL;
 286
 287   list = gst_tag_list_from_vorbiscomment_buffer (buf, NULL, 0, &encoder);
 288
 289   if (!list) {
 290     GST_WARNING_OBJECT (dec, "couldn't decode comments");
 291     list = gst_tag_list_new_empty ();
 292   }
 293
 294   if (encoder) {
 295     gst_tag_list_add (list, GST_TAG_MERGE_REPLACE,
 296         GST_TAG_ENCODER, encoder, NULL);
 297   }
 298
 299   gst_tag_list_add (list, GST_TAG_MERGE_REPLACE,
 300       GST_TAG_AUDIO_CODEC, "Speex", NULL);
 301
 302   ver = g_strndup (dec->header->speex_version, SPEEX_HEADER_VERSION_LENGTH);
 303   g_strstrip (ver);
 304
 305   if (ver != NULL && *ver != '\0') {
 306     gst_tag_list_add (list, GST_TAG_MERGE_REPLACE,
 307         GST_TAG_ENCODER_VERSION, ver, NULL);
 308   }
 309
 310   if (dec->header->bitrate > 0) {
 311     gst_tag_list_add (list, GST_TAG_MERGE_REPLACE,
 312         GST_TAG_BITRATE, (guint) dec->header->bitrate, NULL);
 313   }
 314
 315   GST_INFO_OBJECT (dec, "tags: %" GST_PTR_FORMAT, list);
 316
 317   gst_audio_decoder_merge_tags (GST_AUDIO_DECODER (dec), list,
 318       GST_TAG_MERGE_REPLACE);
 319   gst_tag_list_unref (list);
 320
 321   g_free (encoder);
 322   g_free (ver);
 323
 324   return GST_FLOW_OK;
 325 }
 326
 327 static gboolean
 328 gst_speex_dec_set_format (GstAudioDecoder * bdec, GstCaps * caps)
 329 {
 330   GstSpeexDec *dec = GST_SPEEX_DEC (bdec);
 331   gboolean ret = TRUE;
 332   GstStructure *s;
 333   const GValue *streamheader;
 334
 335   s = gst_caps_get_structure (caps, 0);
 336   if ((streamheader = gst_structure_get_value (s, "streamheader")) &&
 337       G_VALUE_HOLDS (streamheader, GST_TYPE_ARRAY) &&
 338       gst_value_array_get_size (streamheader) >= 2) {
 339     const GValue *header, *vorbiscomment;
 340     GstBuffer *buf;
 341     GstFlowReturn res = GST_FLOW_OK;
 342
 343     header = gst_value_array_get_value (streamheader, 0);
 344     if (header && G_VALUE_HOLDS (header, GST_TYPE_BUFFER)) {
 345       buf = gst_value_get_buffer (header);
 346       res = gst_speex_dec_parse_header (dec, buf);
 347       if (res != GST_FLOW_OK)
 348         goto done;
 349       gst_buffer_replace (&dec->streamheader, buf);
 350     }
 351
 352     vorbiscomment = gst_value_array_get_value (streamheader, 1);
 353     if (vorbiscomment && G_VALUE_HOLDS (vorbiscomment, GST_TYPE_BUFFER)) {
 354       buf = gst_value_get_buffer (vorbiscomment);
 355       res = gst_speex_dec_parse_comments (dec, buf);
 356       if (res != GST_FLOW_OK)
 357         goto done;
 358       gst_buffer_replace (&dec->vorbiscomment, buf);
 359     }
 360   }
 361
 362 done:
 363   return ret;
 364 }
 365
 366 static GstFlowReturn
 367 gst_speex_dec_parse_data (GstSpeexDec * dec, GstBuffer * buf)
 368 {
 369   GstFlowReturn res = GST_FLOW_OK;
 370   gint i, fpp;
 371   SpeexBits *bits;
 372   GstMapInfo map;
 373
 374   if (!dec->frame_duration)
 375     goto not_negotiated;
 376
 377   if (G_LIKELY (gst_buffer_get_size (buf))) {
 378     /* send data to the bitstream */
 379     gst_buffer_map (buf, &map, GST_MAP_READ);
 380     speex_bits_read_from (&dec->bits, (gchar *) map.data, map.size);
 381     gst_buffer_unmap (buf, &map);
 382
 383     fpp = dec->header->frames_per_packet;
 384     bits = &dec->bits;
 385
 386     GST_DEBUG_OBJECT (dec, "received buffer of size %" G_GSIZE_FORMAT
 387         ", fpp %d, %d bits", map.size, fpp, speex_bits_remaining (bits));
 388   } else {
 389     /* FIXME ? actually consider how much concealment is needed */
 390     /* concealment data, pass NULL as the bits parameters */
 391     GST_DEBUG_OBJECT (dec, "creating concealment data");
 392     fpp = dec->header->frames_per_packet;
 393     bits = NULL;
 394   }
 395
 396   /* now decode each frame, catering for unknown number of them (e.g. rtp) */
 397   for (i = 0; i < fpp; i++) {
 398     GstBuffer *outbuf;
 399     gboolean corrupted = FALSE;
 400     gint ret;
 401
 402     GST_LOG_OBJECT (dec, "decoding frame %d/%d, %d bits remaining", i, fpp,
 403         bits ? speex_bits_remaining (bits) : -1);
 404 #if 0
 405     res =
 406         gst_pad_alloc_buffer_and_set_caps (GST_AUDIO_DECODER_SRC_PAD (dec),
 407         GST_BUFFER_OFFSET_NONE, dec->frame_size * dec->header->nb_channels * 2,
 408         GST_PAD_CAPS (GST_AUDIO_DECODER_SRC_PAD (dec)), &outbuf);
 409
 410     if (res != GST_FLOW_OK) {
 411       GST_DEBUG_OBJECT (dec, "buf alloc flow: %s", gst_flow_get_name (res));
 412       return res;
 413     }
 414 #endif
 415     /* FIXME, we can use a bufferpool because we have fixed size buffers. We
 416      * could also use an allocator */
 417     outbuf =
 418         gst_buffer_new_allocate (NULL,
 419         dec->frame_size * dec->header->nb_channels * 2, NULL);
 420
 421     gst_buffer_map (outbuf, &map, GST_MAP_WRITE);
 422     ret = speex_decode_int (dec->state, bits, (spx_int16_t *) map.data);
 423
 424     if (ret == -1) {
 425       /* uh? end of stream */
 426       GST_WARNING_OBJECT (dec, "Unexpected end of stream found");
 427       corrupted = TRUE;
 428     } else if (ret == -2) {
 429       GST_WARNING_OBJECT (dec, "Decoding error: corrupted stream?");
 430       corrupted = TRUE;
 431     }
 432
 433     if (bits && speex_bits_remaining (bits) < 0) {
 434       GST_WARNING_OBJECT (dec, "Decoding overflow: corrupted stream?");
 435       corrupted = TRUE;
 436     }
 437     if (dec->header->nb_channels == 2)
 438       speex_decode_stereo_int ((spx_int16_t *) map.data, dec->frame_size,
 439           dec->stereo);
 440
 441     gst_buffer_unmap (outbuf, &map);
 442
 443     if (!corrupted) {
 444       res = gst_audio_decoder_finish_frame (GST_AUDIO_DECODER (dec), outbuf, 1);
 445     } else {
 446       res = gst_audio_decoder_finish_frame (GST_AUDIO_DECODER (dec), NULL, 1);
 447       gst_buffer_unref (outbuf);
 448     }
 449
 450     if (res != GST_FLOW_OK) {
 451       GST_DEBUG_OBJECT (dec, "flow: %s", gst_flow_get_name (res));
 452       break;
 453     }
 454   }
 455
 456   return res;
 457
 458   /* ERRORS */
 459 not_negotiated:
 460   {
 461     GST_ELEMENT_ERROR (dec, CORE, NEGOTIATION, (NULL),
 462         ("decoder not initialized"));
 463     return GST_FLOW_NOT_NEGOTIATED;
 464   }
 465 }
 466
 467 static gboolean
 468 memcmp_buffers (GstBuffer * buf1, GstBuffer * buf2)
 469 {
 470   GstMapInfo map;
 471   gsize size1, size2;
 472   gboolean res;
 473
 474   size1 = gst_buffer_get_size (buf1);
 475   size2 = gst_buffer_get_size (buf2);
 476
 477   if (size1 != size2)
 478     return FALSE;
 479
 480   gst_buffer_map (buf1, &map, GST_MAP_READ);
 481   res = gst_buffer_memcmp (buf2, 0, map.data, map.size) == 0;
 482   gst_buffer_unmap (buf1, &map);
 483
 484   return res;
 485 }
 486
 487 static GstFlowReturn
 488 gst_speex_dec_handle_frame (GstAudioDecoder * bdec, GstBuffer * buf)
 489 {
 490   GstFlowReturn res;
 491   GstSpeexDec *dec;
 492
 493   /* no fancy draining */
 494   if (G_UNLIKELY (!buf))
 495     return GST_FLOW_OK;
 496
 497   dec = GST_SPEEX_DEC (bdec);
 498
 499   /* If we have the streamheader and vorbiscomment from the caps already
 500    * ignore them here */
 501   if (dec->streamheader && dec->vorbiscomment) {
 502     if (memcmp_buffers (dec->streamheader, buf)) {
 503       GST_DEBUG_OBJECT (dec, "found streamheader");
 504       gst_audio_decoder_finish_frame (bdec, NULL, 1);
 505       res = GST_FLOW_OK;
 506     } else if (memcmp_buffers (dec->vorbiscomment, buf)) {
 507       GST_DEBUG_OBJECT (dec, "found vorbiscomments");
 508       gst_audio_decoder_finish_frame (bdec, NULL, 1);
 509       res = GST_FLOW_OK;
 510     } else {
 511       res = gst_speex_dec_parse_data (dec, buf);
 512     }
 513   } else {
 514     /* Otherwise fall back to packet counting and assume that the
 515      * first two packets are the headers. */
 516     switch (dec->packetno) {
 517       case 0:
 518         GST_DEBUG_OBJECT (dec, "counted streamheader");
 519         res = gst_speex_dec_parse_header (dec, buf);
 520         gst_audio_decoder_finish_frame (bdec, NULL, 1);
 521         break;
 522       case 1:
 523         GST_DEBUG_OBJECT (dec, "counted vorbiscomments");
 524         res = gst_speex_dec_parse_comments (dec, buf);
 525         gst_audio_decoder_finish_frame (bdec, NULL, 1);
 526         break;
 527       default:
 528       {
 529         res = gst_speex_dec_parse_data (dec, buf);
 530         break;
 531       }
 532     }
 533   }
 534
 535   dec->packetno++;
 536
 537   return res;
 538 }
 539
 540 static void
 541 gst_speex_dec_get_property (GObject * object, guint prop_id,
 542     GValue * value, GParamSpec * pspec)
 543 {
 544   GstSpeexDec *speexdec;
 545
 546   speexdec = GST_SPEEX_DEC (object);
 547
 548   switch (prop_id) {
 549     case ARG_ENH:
 550       g_value_set_boolean (value, speexdec->enh);
 551       break;
 552     default:
 553       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
 554       break;
 555   }
 556 }
 557
 558 static void
 559 gst_speex_dec_set_property (GObject * object, guint prop_id,
 560     const GValue * value, GParamSpec * pspec)
 561 {
 562   GstSpeexDec *speexdec;
 563
 564   speexdec = GST_SPEEX_DEC (object);
 565
 566   switch (prop_id) {
 567     case ARG_ENH:
 568       speexdec->enh = g_value_get_boolean (value);
 569       break;
 570     default:
 571       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
 572       break;
 573   }
 574 }