ext/speex/gstspeexdec.c

   1 /* GStreamer
   2  * Copyright (C) 2004 Wim Taymans <wim@fluendo.com>
   3  * Copyright (C) 2006 Tim-Philipp Müller <tim centricular net>
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Library General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Library General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Library General Public
  16  * License along with this library; if not, write to the
  17  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
  18  * Boston, MA 02110-1301, USA.
  19  */
  20
  21 /**
  22  * SECTION:element-speexdec
  23  * @see_also: speexenc, oggdemux
  24  *
  25  * This element decodes a Speex stream to raw integer audio.
  26  * <ulink url="http://www.speex.org/">Speex</ulink> is a royalty-free
  27  * audio codec maintained by the <ulink url="http://www.xiph.org/">Xiph.org
  28  * Foundation</ulink>.
  29  *
  30  * <refsect2>
  31  * <title>Example pipelines</title>
  32  * |[
  33  * gst-launch-1.0 -v filesrc location=speex.ogg ! oggdemux ! speexdec ! audioconvert ! audioresample ! alsasink
  34  * ]| Decode an Ogg/Speex file. To create an Ogg/Speex file refer to the
  35  * documentation of speexenc.
  36  * </refsect2>
  37  *
  38  * Last reviewed on 2006-04-05 (0.10.2)
  39  */
  40
  41 #ifdef HAVE_CONFIG_H
  42 #  include "config.h"
  43 #endif
  44
  45 #include "gstspeexdec.h"
  46 #include <stdlib.h>
  47 #include <string.h>
  48 #include <gst/tag/tag.h>
  49 #include <gst/audio/audio.h>
  50
  51 GST_DEBUG_CATEGORY_STATIC (speexdec_debug);
  52 #define GST_CAT_DEFAULT speexdec_debug
  53
  54 #define DEFAULT_ENH   TRUE
  55
  56 enum
  57 {
  58   ARG_0,
  59   ARG_ENH
  60 };
  61
  62 #define FORMAT_STR GST_AUDIO_NE(S16)
  63
  64 static GstStaticPadTemplate speex_dec_src_factory =
  65 GST_STATIC_PAD_TEMPLATE ("src",
  66     GST_PAD_SRC,
  67     GST_PAD_ALWAYS,
  68     GST_STATIC_CAPS ("audio/x-raw, "
  69         "format = (string) " FORMAT_STR ", "
  70         "layout = (string) interleaved, "
  71         "rate = (int) [ 6000, 48000 ], " "channels = (int) [ 1, 2 ]")
  72     );
  73
  74 static GstStaticPadTemplate speex_dec_sink_factory =
  75 GST_STATIC_PAD_TEMPLATE ("sink",
  76     GST_PAD_SINK,
  77     GST_PAD_ALWAYS,
  78     GST_STATIC_CAPS ("audio/x-speex")
  79     );
  80
  81 #define gst_speex_dec_parent_class parent_class
  82 G_DEFINE_TYPE (GstSpeexDec, gst_speex_dec, GST_TYPE_AUDIO_DECODER);
  83
  84 static gboolean gst_speex_dec_start (GstAudioDecoder * dec);
  85 static gboolean gst_speex_dec_stop (GstAudioDecoder * dec);
  86 static gboolean gst_speex_dec_set_format (GstAudioDecoder * bdec,
  87     GstCaps * caps);
  88 static GstFlowReturn gst_speex_dec_handle_frame (GstAudioDecoder * dec,
  89     GstBuffer * buffer);
  90
  91 static void gst_speex_dec_get_property (GObject * object, guint prop_id,
  92     GValue * value, GParamSpec * pspec);
  93 static void gst_speex_dec_set_property (GObject * object, guint prop_id,
  94     const GValue * value, GParamSpec * pspec);
  95
  96 static void
  97 gst_speex_dec_class_init (GstSpeexDecClass * klass)
  98 {
  99   GObjectClass *gobject_class;
 100   GstElementClass *gstelement_class;
 101   GstAudioDecoderClass *base_class;
 102
 103   gobject_class = (GObjectClass *) klass;
 104   gstelement_class = (GstElementClass *) klass;
 105   base_class = (GstAudioDecoderClass *) klass;
 106
 107   gobject_class->set_property = gst_speex_dec_set_property;
 108   gobject_class->get_property = gst_speex_dec_get_property;
 109
 110   base_class->start = GST_DEBUG_FUNCPTR (gst_speex_dec_start);
 111   base_class->stop = GST_DEBUG_FUNCPTR (gst_speex_dec_stop);
 112   base_class->set_format = GST_DEBUG_FUNCPTR (gst_speex_dec_set_format);
 113   base_class->handle_frame = GST_DEBUG_FUNCPTR (gst_speex_dec_handle_frame);
 114
 115   g_object_class_install_property (G_OBJECT_CLASS (klass), ARG_ENH,
 116       g_param_spec_boolean ("enh", "Enh", "Enable perceptual enhancement",
 117           DEFAULT_ENH, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
 118
 119   gst_element_class_add_pad_template (gstelement_class,
 120       gst_static_pad_template_get (&speex_dec_src_factory));
 121   gst_element_class_add_pad_template (gstelement_class,
 122       gst_static_pad_template_get (&speex_dec_sink_factory));
 123   gst_element_class_set_static_metadata (gstelement_class,
 124       "Speex audio decoder", "Codec/Decoder/Audio",
 125       "decode speex streams to audio", "Wim Taymans <wim@fluendo.com>");
 126
 127   GST_DEBUG_CATEGORY_INIT (speexdec_debug, "speexdec", 0,
 128       "speex decoding element");
 129 }
 130
 131 static void
 132 gst_speex_dec_reset (GstSpeexDec * dec)
 133 {
 134   dec->packetno = 0;
 135   dec->frame_size = 0;
 136   dec->frame_duration = 0;
 137   dec->mode = NULL;
 138   free (dec->header);
 139   dec->header = NULL;
 140   speex_bits_destroy (&dec->bits);
 141
 142   gst_buffer_replace (&dec->streamheader, NULL);
 143   gst_buffer_replace (&dec->vorbiscomment, NULL);
 144
 145   if (dec->stereo) {
 146     speex_stereo_state_destroy (dec->stereo);
 147     dec->stereo = NULL;
 148   }
 149
 150   if (dec->state) {
 151     speex_decoder_destroy (dec->state);
 152     dec->state = NULL;
 153   }
 154 }
 155
 156 static void
 157 gst_speex_dec_init (GstSpeexDec * dec)
 158 {
 159   dec->enh = DEFAULT_ENH;
 160
 161   gst_speex_dec_reset (dec);
 162 }
 163
 164 static gboolean
 165 gst_speex_dec_start (GstAudioDecoder * dec)
 166 {
 167   GstSpeexDec *sd = GST_SPEEX_DEC (dec);
 168
 169   GST_DEBUG_OBJECT (dec, "start");
 170   gst_speex_dec_reset (sd);
 171
 172   /* we know about concealment */
 173   gst_audio_decoder_set_plc_aware (dec, TRUE);
 174
 175   return TRUE;
 176 }
 177
 178 static gboolean
 179 gst_speex_dec_stop (GstAudioDecoder * dec)
 180 {
 181   GstSpeexDec *sd = GST_SPEEX_DEC (dec);
 182
 183   GST_DEBUG_OBJECT (dec, "stop");
 184   gst_speex_dec_reset (sd);
 185
 186   return TRUE;
 187 }
 188
 189 static GstFlowReturn
 190 gst_speex_dec_parse_header (GstSpeexDec * dec, GstBuffer * buf)
 191 {
 192   GstMapInfo map;
 193   GstAudioInfo info;
 194   static const GstAudioChannelPosition chan_pos[2][2] = {
 195     {GST_AUDIO_CHANNEL_POSITION_MONO},
 196     {GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
 197         GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT}
 198   };
 199
 200   /* get the header */
 201   gst_buffer_map (buf, &map, GST_MAP_READ);
 202   dec->header = speex_packet_to_header ((gchar *) map.data, map.size);
 203   gst_buffer_unmap (buf, &map);
 204
 205   if (!dec->header)
 206     goto no_header;
 207
 208   if (dec->header->mode >= SPEEX_NB_MODES || dec->header->mode < 0)
 209     goto mode_too_old;
 210
 211   dec->mode = speex_lib_get_mode (dec->header->mode);
 212
 213   /* initialize the decoder */
 214   dec->state = speex_decoder_init (dec->mode);
 215   if (!dec->state)
 216     goto init_failed;
 217
 218   speex_decoder_ctl (dec->state, SPEEX_SET_ENH, &dec->enh);
 219   speex_decoder_ctl (dec->state, SPEEX_GET_FRAME_SIZE, &dec->frame_size);
 220
 221   if (dec->header->nb_channels != 1) {
 222     dec->stereo = speex_stereo_state_init ();
 223     dec->callback.callback_id = SPEEX_INBAND_STEREO;
 224     dec->callback.func = speex_std_stereo_request_handler;
 225     dec->callback.data = dec->stereo;
 226     speex_decoder_ctl (dec->state, SPEEX_SET_HANDLER, &dec->callback);
 227   }
 228
 229   speex_decoder_ctl (dec->state, SPEEX_SET_SAMPLING_RATE, &dec->header->rate);
 230
 231   dec->frame_duration = gst_util_uint64_scale_int (dec->frame_size,
 232       GST_SECOND, dec->header->rate);
 233
 234   speex_bits_init (&dec->bits);
 235
 236   /* set caps */
 237   gst_audio_info_init (&info);
 238   gst_audio_info_set_format (&info,
 239       GST_AUDIO_FORMAT_S16,
 240       dec->header->rate,
 241       dec->header->nb_channels, chan_pos[dec->header->nb_channels - 1]);
 242
 243   if (!gst_audio_decoder_set_output_format (GST_AUDIO_DECODER (dec), &info))
 244     goto nego_failed;
 245
 246   return GST_FLOW_OK;
 247
 248   /* ERRORS */
 249 no_header:
 250   {
 251     GST_ELEMENT_ERROR (GST_ELEMENT (dec), STREAM, DECODE,
 252         (NULL), ("couldn't read header"));
 253     return GST_FLOW_ERROR;
 254   }
 255 mode_too_old:
 256   {
 257     GST_ELEMENT_ERROR (GST_ELEMENT (dec), STREAM, DECODE,
 258         (NULL),
 259         ("Mode number %d does not (yet/any longer) exist in this version",
 260             dec->header->mode));
 261     return GST_FLOW_ERROR;
 262   }
 263 init_failed:
 264   {
 265     GST_ELEMENT_ERROR (GST_ELEMENT (dec), STREAM, DECODE,
 266         (NULL), ("couldn't initialize decoder"));
 267     return GST_FLOW_ERROR;
 268   }
 269 nego_failed:
 270   {
 271     GST_ELEMENT_ERROR (GST_ELEMENT (dec), STREAM, DECODE,
 272         (NULL), ("couldn't negotiate format"));
 273     return GST_FLOW_NOT_NEGOTIATED;
 274   }
 275 }
 276
 277 static GstFlowReturn
 278 gst_speex_dec_parse_comments (GstSpeexDec * dec, GstBuffer * buf)
 279 {
 280   GstTagList *list;
 281   gchar *ver, *encoder = NULL;
 282
 283   list = gst_tag_list_from_vorbiscomment_buffer (buf, NULL, 0, &encoder);
 284
 285   if (!list) {
 286     GST_WARNING_OBJECT (dec, "couldn't decode comments");
 287     list = gst_tag_list_new_empty ();
 288   }
 289
 290   if (encoder) {
 291     gst_tag_list_add (list, GST_TAG_MERGE_REPLACE,
 292         GST_TAG_ENCODER, encoder, NULL);
 293   }
 294
 295   gst_tag_list_add (list, GST_TAG_MERGE_REPLACE,
 296       GST_TAG_AUDIO_CODEC, "Speex", NULL);
 297
 298   ver = g_strndup (dec->header->speex_version, SPEEX_HEADER_VERSION_LENGTH);
 299   g_strstrip (ver);
 300
 301   if (ver != NULL && *ver != '\0') {
 302     gst_tag_list_add (list, GST_TAG_MERGE_REPLACE,
 303         GST_TAG_ENCODER_VERSION, ver, NULL);
 304   }
 305
 306   if (dec->header->bitrate > 0) {
 307     gst_tag_list_add (list, GST_TAG_MERGE_REPLACE,
 308         GST_TAG_BITRATE, (guint) dec->header->bitrate, NULL);
 309   }
 310
 311   GST_INFO_OBJECT (dec, "tags: %" GST_PTR_FORMAT, list);
 312
 313   gst_audio_decoder_merge_tags (GST_AUDIO_DECODER (dec), list,
 314       GST_TAG_MERGE_REPLACE);
 315   gst_tag_list_unref (list);
 316
 317   g_free (encoder);
 318   g_free (ver);
 319
 320   return GST_FLOW_OK;
 321 }
 322
 323 static gboolean
 324 gst_speex_dec_set_format (GstAudioDecoder * bdec, GstCaps * caps)
 325 {
 326   GstSpeexDec *dec = GST_SPEEX_DEC (bdec);
 327   gboolean ret = TRUE;
 328   GstStructure *s;
 329   const GValue *streamheader;
 330
 331   s = gst_caps_get_structure (caps, 0);
 332   if ((streamheader = gst_structure_get_value (s, "streamheader")) &&
 333       G_VALUE_HOLDS (streamheader, GST_TYPE_ARRAY) &&
 334       gst_value_array_get_size (streamheader) >= 2) {
 335     const GValue *header, *vorbiscomment;
 336     GstBuffer *buf;
 337     GstFlowReturn res = GST_FLOW_OK;
 338
 339     header = gst_value_array_get_value (streamheader, 0);
 340     if (header && G_VALUE_HOLDS (header, GST_TYPE_BUFFER)) {
 341       buf = gst_value_get_buffer (header);
 342       res = gst_speex_dec_parse_header (dec, buf);
 343       if (res != GST_FLOW_OK)
 344         goto done;
 345       gst_buffer_replace (&dec->streamheader, buf);
 346     }
 347
 348     vorbiscomment = gst_value_array_get_value (streamheader, 1);
 349     if (vorbiscomment && G_VALUE_HOLDS (vorbiscomment, GST_TYPE_BUFFER)) {
 350       buf = gst_value_get_buffer (vorbiscomment);
 351       res = gst_speex_dec_parse_comments (dec, buf);
 352       if (res != GST_FLOW_OK)
 353         goto done;
 354       gst_buffer_replace (&dec->vorbiscomment, buf);
 355     }
 356   }
 357
 358 done:
 359   return ret;
 360 }
 361
 362 static GstFlowReturn
 363 gst_speex_dec_parse_data (GstSpeexDec * dec, GstBuffer * buf)
 364 {
 365   GstFlowReturn res = GST_FLOW_OK;
 366   gint i, fpp;
 367   SpeexBits *bits;
 368   GstMapInfo map;
 369
 370   if (!dec->frame_duration)
 371     goto not_negotiated;
 372
 373   if (G_LIKELY (gst_buffer_get_size (buf))) {
 374     /* send data to the bitstream */
 375     gst_buffer_map (buf, &map, GST_MAP_READ);
 376     speex_bits_read_from (&dec->bits, (gchar *) map.data, map.size);
 377     gst_buffer_unmap (buf, &map);
 378
 379     fpp = dec->header->frames_per_packet;
 380     bits = &dec->bits;
 381
 382     GST_DEBUG_OBJECT (dec, "received buffer of size %" G_GSIZE_FORMAT
 383         ", fpp %d, %d bits", map.size, fpp, speex_bits_remaining (bits));
 384   } else {
 385     /* FIXME ? actually consider how much concealment is needed */
 386     /* concealment data, pass NULL as the bits parameters */
 387     GST_DEBUG_OBJECT (dec, "creating concealment data");
 388     fpp = dec->header->frames_per_packet;
 389     bits = NULL;
 390   }
 391
 392   /* now decode each frame, catering for unknown number of them (e.g. rtp) */
 393   for (i = 0; i < fpp; i++) {
 394     GstBuffer *outbuf;
 395     gboolean corrupted = FALSE;
 396     gint ret;
 397
 398     GST_LOG_OBJECT (dec, "decoding frame %d/%d, %d bits remaining", i, fpp,
 399         bits ? speex_bits_remaining (bits) : -1);
 400 #if 0
 401     res =
 402         gst_pad_alloc_buffer_and_set_caps (GST_AUDIO_DECODER_SRC_PAD (dec),
 403         GST_BUFFER_OFFSET_NONE, dec->frame_size * dec->header->nb_channels * 2,
 404         GST_PAD_CAPS (GST_AUDIO_DECODER_SRC_PAD (dec)), &outbuf);
 405
 406     if (res != GST_FLOW_OK) {
 407       GST_DEBUG_OBJECT (dec, "buf alloc flow: %s", gst_flow_get_name (res));
 408       return res;
 409     }
 410 #endif
 411     /* FIXME, we can use a bufferpool because we have fixed size buffers. We
 412      * could also use an allocator */
 413     outbuf =
 414         gst_buffer_new_allocate (NULL,
 415         dec->frame_size * dec->header->nb_channels * 2, NULL);
 416
 417     gst_buffer_map (outbuf, &map, GST_MAP_WRITE);
 418     ret = speex_decode_int (dec->state, bits, (spx_int16_t *) map.data);
 419
 420     if (ret == -1) {
 421       /* uh? end of stream */
 422       if (fpp == 0 && speex_bits_remaining (bits) < 8) {
 423         /* if we did not know how many frames to expect, then we get this
 424            at the end if there are leftover bits to pad to the next byte */
 425         GST_DEBUG_OBJECT (dec, "Discarding leftover bits");
 426       } else {
 427         GST_WARNING_OBJECT (dec, "Unexpected end of stream found");
 428       }
 429       corrupted = TRUE;
 430     } else if (ret == -2) {
 431       GST_WARNING_OBJECT (dec, "Decoding error: corrupted stream?");
 432       corrupted = TRUE;
 433     }
 434
 435     if (bits && speex_bits_remaining (bits) < 0) {
 436       GST_WARNING_OBJECT (dec, "Decoding overflow: corrupted stream?");
 437       corrupted = TRUE;
 438     }
 439     if (dec->header->nb_channels == 2)
 440       speex_decode_stereo_int ((spx_int16_t *) map.data, dec->frame_size,
 441           dec->stereo);
 442
 443     gst_buffer_unmap (outbuf, &map);
 444
 445     if (!corrupted) {
 446       res = gst_audio_decoder_finish_frame (GST_AUDIO_DECODER (dec), outbuf, 1);
 447     } else {
 448       res = gst_audio_decoder_finish_frame (GST_AUDIO_DECODER (dec), NULL, 1);
 449       gst_buffer_unref (outbuf);
 450     }
 451
 452     if (res != GST_FLOW_OK) {
 453       GST_DEBUG_OBJECT (dec, "flow: %s", gst_flow_get_name (res));
 454       break;
 455     }
 456   }
 457
 458   return res;
 459
 460   /* ERRORS */
 461 not_negotiated:
 462   {
 463     GST_ELEMENT_ERROR (dec, CORE, NEGOTIATION, (NULL),
 464         ("decoder not initialized"));
 465     return GST_FLOW_NOT_NEGOTIATED;
 466   }
 467 }
 468
 469 static gboolean
 470 memcmp_buffers (GstBuffer * buf1, GstBuffer * buf2)
 471 {
 472   GstMapInfo map;
 473   gsize size1, size2;
 474   gboolean res;
 475
 476   size1 = gst_buffer_get_size (buf1);
 477   size2 = gst_buffer_get_size (buf2);
 478
 479   if (size1 != size2)
 480     return FALSE;
 481
 482   gst_buffer_map (buf1, &map, GST_MAP_READ);
 483   res = gst_buffer_memcmp (buf2, 0, map.data, map.size) == 0;
 484   gst_buffer_unmap (buf1, &map);
 485
 486   return res;
 487 }
 488
 489 static GstFlowReturn
 490 gst_speex_dec_handle_frame (GstAudioDecoder * bdec, GstBuffer * buf)
 491 {
 492   GstFlowReturn res;
 493   GstSpeexDec *dec;
 494
 495   /* no fancy draining */
 496   if (G_UNLIKELY (!buf))
 497     return GST_FLOW_OK;
 498
 499   dec = GST_SPEEX_DEC (bdec);
 500
 501   /* If we have the streamheader and vorbiscomment from the caps already
 502    * ignore them here */
 503   if (dec->streamheader && dec->vorbiscomment) {
 504     if (memcmp_buffers (dec->streamheader, buf)) {
 505       GST_DEBUG_OBJECT (dec, "found streamheader");
 506       gst_audio_decoder_finish_frame (bdec, NULL, 1);
 507       res = GST_FLOW_OK;
 508     } else if (memcmp_buffers (dec->vorbiscomment, buf)) {
 509       GST_DEBUG_OBJECT (dec, "found vorbiscomments");
 510       gst_audio_decoder_finish_frame (bdec, NULL, 1);
 511       res = GST_FLOW_OK;
 512     } else {
 513       res = gst_speex_dec_parse_data (dec, buf);
 514     }
 515   } else {
 516     /* Otherwise fall back to packet counting and assume that the
 517      * first two packets are the headers. */
 518     switch (dec->packetno) {
 519       case 0:
 520         GST_DEBUG_OBJECT (dec, "counted streamheader");
 521         res = gst_speex_dec_parse_header (dec, buf);
 522         gst_audio_decoder_finish_frame (bdec, NULL, 1);
 523         break;
 524       case 1:
 525         GST_DEBUG_OBJECT (dec, "counted vorbiscomments");
 526         res = gst_speex_dec_parse_comments (dec, buf);
 527         gst_audio_decoder_finish_frame (bdec, NULL, 1);
 528         break;
 529       default:
 530       {
 531         res = gst_speex_dec_parse_data (dec, buf);
 532         break;
 533       }
 534     }
 535   }
 536
 537   dec->packetno++;
 538
 539   return res;
 540 }
 541
 542 static void
 543 gst_speex_dec_get_property (GObject * object, guint prop_id,
 544     GValue * value, GParamSpec * pspec)
 545 {
 546   GstSpeexDec *speexdec;
 547
 548   speexdec = GST_SPEEX_DEC (object);
 549
 550   switch (prop_id) {
 551     case ARG_ENH:
 552       g_value_set_boolean (value, speexdec->enh);
 553       break;
 554     default:
 555       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
 556       break;
 557   }
 558 }
 559
 560 static void
 561 gst_speex_dec_set_property (GObject * object, guint prop_id,
 562     const GValue * value, GParamSpec * pspec)
 563 {
 564   GstSpeexDec *speexdec;
 565
 566   speexdec = GST_SPEEX_DEC (object);
 567
 568   switch (prop_id) {
 569     case ARG_ENH:
 570       speexdec->enh = g_value_get_boolean (value);
 571       break;
 572     default:
 573       G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
 574       break;
 575   }
 576 }