gst/audioparsers/gstaacparse.c

   1 /* GStreamer AAC parser plugin
   2  * Copyright (C) 2008 Nokia Corporation. All rights reserved.
   3  *
   4  * Contact: Stefan Kost <stefan.kost@nokia.com>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Library General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Library General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Library General Public
  17  * License along with this library; if not, write to the
  18  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
  19  * Boston, MA 02110-1301, USA.
  20  */
  21
  22 /**
  23  * SECTION:element-aacparse
  24  * @short_description: AAC parser
  25  * @see_also: #GstAmrParse
  26  *
  27  * This is an AAC parser which handles both ADIF and ADTS stream formats.
  28  *
  29  * As ADIF format is not framed, it is not seekable and stream duration cannot
  30  * be determined either. However, ADTS format AAC clips can be seeked, and parser
  31  * can also estimate playback position and clip duration.
  32  *
  33  * <refsect2>
  34  * <title>Example launch line</title>
  35  * |[
  36  * gst-launch-1.0 filesrc location=abc.aac ! aacparse ! faad ! audioresample ! audioconvert ! alsasink
  37  * ]|
  38  * </refsect2>
  39  */
  40
  41 #ifdef HAVE_CONFIG_H
  42 #include "config.h"
  43 #endif
  44
  45 #include <string.h>
  46
  47 #include <gst/base/gstbitreader.h>
  48 #include <gst/pbutils/pbutils.h>
  49 #include "gstaacparse.h"
  50
  51
  52 static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src",
  53     GST_PAD_SRC,
  54     GST_PAD_ALWAYS,
  55     GST_STATIC_CAPS ("audio/mpeg, "
  56         "framed = (boolean) true, " "mpegversion = (int) { 2, 4 }, "
  57         "stream-format = (string) { raw, adts, adif, loas };"));
  58
  59 static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
  60     GST_PAD_SINK,
  61     GST_PAD_ALWAYS,
  62     GST_STATIC_CAPS ("audio/mpeg, mpegversion = (int) { 2, 4 };"));
  63
  64 GST_DEBUG_CATEGORY_STATIC (aacparse_debug);
  65 #define GST_CAT_DEFAULT aacparse_debug
  66
  67
  68 #define ADIF_MAX_SIZE 40        /* Should be enough */
  69 #define ADTS_MAX_SIZE 10        /* Should be enough */
  70 #define LOAS_MAX_SIZE 3         /* Should be enough */
  71 #define RAW_MAX_SIZE  1         /* Correct framing is required */
  72
  73 #define ADTS_HEADERS_LENGTH 7UL /* Total byte-length of fixed and variable
  74                                    headers prepended during raw to ADTS
  75                                    conversion */
  76
  77 #define AAC_FRAME_DURATION(parse) (GST_SECOND/parse->frames_per_sec)
  78
  79 static const gint loas_sample_rate_table[16] = {
  80   96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
  81   16000, 12000, 11025, 8000, 7350, 0, 0, 0
  82 };
  83
  84 static const gint loas_channels_table[16] = {
  85   0, 1, 2, 3, 4, 5, 6, 8,
  86   0, 0, 0, 7, 8, 0, 8, 0
  87 };
  88
  89 static gboolean gst_aac_parse_start (GstBaseParse * parse);
  90 static gboolean gst_aac_parse_stop (GstBaseParse * parse);
  91
  92 static gboolean gst_aac_parse_sink_setcaps (GstBaseParse * parse,
  93     GstCaps * caps);
  94 static GstCaps *gst_aac_parse_sink_getcaps (GstBaseParse * parse,
  95     GstCaps * filter);
  96
  97 static GstFlowReturn gst_aac_parse_handle_frame (GstBaseParse * parse,
  98     GstBaseParseFrame * frame, gint * skipsize);
  99 static GstFlowReturn gst_aac_parse_pre_push_frame (GstBaseParse * parse,
 100     GstBaseParseFrame * frame);
 101 static gboolean gst_aac_parse_src_event (GstBaseParse * parse,
 102     GstEvent * event);
 103
 104 static gboolean gst_aac_parse_read_audio_specific_config (GstAacParse *
 105     aacparse, GstBitReader * br, gint * object_type, gint * sample_rate,
 106     gint * channels, gint * frame_samples);
 107
 108
 109 #define gst_aac_parse_parent_class parent_class
 110 G_DEFINE_TYPE (GstAacParse, gst_aac_parse, GST_TYPE_BASE_PARSE);
 111
 112 /**
 113  * gst_aac_parse_class_init:
 114  * @klass: #GstAacParseClass.
 115  *
 116  */
 117 static void
 118 gst_aac_parse_class_init (GstAacParseClass * klass)
 119 {
 120   GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
 121   GstBaseParseClass *parse_class = GST_BASE_PARSE_CLASS (klass);
 122
 123   GST_DEBUG_CATEGORY_INIT (aacparse_debug, "aacparse", 0,
 124       "AAC audio stream parser");
 125
 126   gst_element_class_add_static_pad_template (element_class, &sink_template);
 127   gst_element_class_add_static_pad_template (element_class, &src_template);
 128
 129   gst_element_class_set_static_metadata (element_class,
 130       "AAC audio stream parser", "Codec/Parser/Audio",
 131       "Advanced Audio Coding parser", "Stefan Kost <stefan.kost@nokia.com>");
 132
 133   parse_class->start = GST_DEBUG_FUNCPTR (gst_aac_parse_start);
 134   parse_class->stop = GST_DEBUG_FUNCPTR (gst_aac_parse_stop);
 135   parse_class->set_sink_caps = GST_DEBUG_FUNCPTR (gst_aac_parse_sink_setcaps);
 136   parse_class->get_sink_caps = GST_DEBUG_FUNCPTR (gst_aac_parse_sink_getcaps);
 137   parse_class->handle_frame = GST_DEBUG_FUNCPTR (gst_aac_parse_handle_frame);
 138   parse_class->pre_push_frame =
 139       GST_DEBUG_FUNCPTR (gst_aac_parse_pre_push_frame);
 140   parse_class->src_event = GST_DEBUG_FUNCPTR (gst_aac_parse_src_event);
 141 }
 142
 143
 144 /**
 145  * gst_aac_parse_init:
 146  * @aacparse: #GstAacParse.
 147  * @klass: #GstAacParseClass.
 148  *
 149  */
 150 static void
 151 gst_aac_parse_init (GstAacParse * aacparse)
 152 {
 153   GST_DEBUG ("initialized");
 154   GST_PAD_SET_ACCEPT_INTERSECT (GST_BASE_PARSE_SINK_PAD (aacparse));
 155   GST_PAD_SET_ACCEPT_TEMPLATE (GST_BASE_PARSE_SINK_PAD (aacparse));
 156
 157   aacparse->last_parsed_sample_rate = 0;
 158   aacparse->last_parsed_channels = 0;
 159 }
 160
 161
 162 /**
 163  * gst_aac_parse_set_src_caps:
 164  * @aacparse: #GstAacParse.
 165  * @sink_caps: (proposed) caps of sink pad
 166  *
 167  * Set source pad caps according to current knowledge about the
 168  * audio stream.
 169  *
 170  * Returns: TRUE if caps were successfully set.
 171  */
 172 static gboolean
 173 gst_aac_parse_set_src_caps (GstAacParse * aacparse, GstCaps * sink_caps)
 174 {
 175   GstStructure *s;
 176   GstCaps *src_caps = NULL, *allowed;
 177   gboolean res = FALSE;
 178   const gchar *stream_format;
 179   guint8 codec_data[2];
 180   guint16 codec_data_data;
 181   gint sample_rate_idx;
 182
 183   GST_DEBUG_OBJECT (aacparse, "sink caps: %" GST_PTR_FORMAT, sink_caps);
 184   if (sink_caps)
 185     src_caps = gst_caps_copy (sink_caps);
 186   else
 187     src_caps = gst_caps_new_empty_simple ("audio/mpeg");
 188
 189   gst_caps_set_simple (src_caps, "framed", G_TYPE_BOOLEAN, TRUE,
 190       "mpegversion", G_TYPE_INT, aacparse->mpegversion, NULL);
 191
 192   aacparse->output_header_type = aacparse->header_type;
 193   switch (aacparse->header_type) {
 194     case DSPAAC_HEADER_NONE:
 195       stream_format = "raw";
 196       break;
 197     case DSPAAC_HEADER_ADTS:
 198       stream_format = "adts";
 199       break;
 200     case DSPAAC_HEADER_ADIF:
 201       stream_format = "adif";
 202       break;
 203     case DSPAAC_HEADER_LOAS:
 204       stream_format = "loas";
 205       break;
 206     default:
 207       stream_format = NULL;
 208   }
 209
 210   /* Generate codec data to be able to set profile/level on the caps */
 211   sample_rate_idx =
 212       gst_codec_utils_aac_get_index_from_sample_rate (aacparse->sample_rate);
 213   if (sample_rate_idx < 0)
 214     goto not_a_known_rate;
 215   codec_data_data =
 216       (aacparse->object_type << 11) |
 217       (sample_rate_idx << 7) | (aacparse->channels << 3);
 218   GST_WRITE_UINT16_BE (codec_data, codec_data_data);
 219   gst_codec_utils_aac_caps_set_level_and_profile (src_caps, codec_data, 2);
 220
 221   s = gst_caps_get_structure (src_caps, 0);
 222   if (aacparse->sample_rate > 0)
 223     gst_structure_set (s, "rate", G_TYPE_INT, aacparse->sample_rate, NULL);
 224   if (aacparse->channels > 0)
 225     gst_structure_set (s, "channels", G_TYPE_INT, aacparse->channels, NULL);
 226   if (stream_format)
 227     gst_structure_set (s, "stream-format", G_TYPE_STRING, stream_format, NULL);
 228
 229   allowed = gst_pad_get_allowed_caps (GST_BASE_PARSE (aacparse)->srcpad);
 230   if (allowed && !gst_caps_can_intersect (src_caps, allowed)) {
 231     GST_DEBUG_OBJECT (GST_BASE_PARSE (aacparse)->srcpad,
 232         "Caps can not intersect");
 233     if (aacparse->header_type == DSPAAC_HEADER_ADTS) {
 234       GST_DEBUG_OBJECT (GST_BASE_PARSE (aacparse)->srcpad,
 235           "Input is ADTS, trying raw");
 236       gst_caps_set_simple (src_caps, "stream-format", G_TYPE_STRING, "raw",
 237           NULL);
 238       if (gst_caps_can_intersect (src_caps, allowed)) {
 239         GstBuffer *codec_data_buffer;
 240
 241         GST_DEBUG_OBJECT (GST_BASE_PARSE (aacparse)->srcpad,
 242             "Caps can intersect, we will drop the ADTS layer");
 243         aacparse->output_header_type = DSPAAC_HEADER_NONE;
 244
 245         /* The codec_data data is according to AudioSpecificConfig,
 246            ISO/IEC 14496-3, 1.6.2.1 */
 247         codec_data_buffer = gst_buffer_new_and_alloc (2);
 248         gst_buffer_fill (codec_data_buffer, 0, codec_data, 2);
 249         gst_caps_set_simple (src_caps, "codec_data", GST_TYPE_BUFFER,
 250             codec_data_buffer, NULL);
 251         gst_buffer_unref (codec_data_buffer);
 252       }
 253     } else if (aacparse->header_type == DSPAAC_HEADER_NONE) {
 254       GST_DEBUG_OBJECT (GST_BASE_PARSE (aacparse)->srcpad,
 255           "Input is raw, trying ADTS");
 256       gst_caps_set_simple (src_caps, "stream-format", G_TYPE_STRING, "adts",
 257           NULL);
 258       if (gst_caps_can_intersect (src_caps, allowed)) {
 259         GST_DEBUG_OBJECT (GST_BASE_PARSE (aacparse)->srcpad,
 260             "Caps can intersect, we will prepend ADTS headers");
 261         aacparse->output_header_type = DSPAAC_HEADER_ADTS;
 262       }
 263     }
 264   }
 265   if (allowed)
 266     gst_caps_unref (allowed);
 267
 268   aacparse->last_parsed_channels = 0;
 269   aacparse->last_parsed_sample_rate = 0;
 270
 271   GST_DEBUG_OBJECT (aacparse, "setting src caps: %" GST_PTR_FORMAT, src_caps);
 272
 273   res = gst_pad_set_caps (GST_BASE_PARSE (aacparse)->srcpad, src_caps);
 274   gst_caps_unref (src_caps);
 275   return res;
 276
 277 not_a_known_rate:
 278   GST_ERROR_OBJECT (aacparse, "Not a known sample rate: %d",
 279       aacparse->sample_rate);
 280   gst_caps_unref (src_caps);
 281   return FALSE;
 282 }
 283
 284
 285 /**
 286  * gst_aac_parse_sink_setcaps:
 287  * @sinkpad: GstPad
 288  * @caps: GstCaps
 289  *
 290  * Implementation of "set_sink_caps" vmethod in #GstBaseParse class.
 291  *
 292  * Returns: TRUE on success.
 293  */
 294 static gboolean
 295 gst_aac_parse_sink_setcaps (GstBaseParse * parse, GstCaps * caps)
 296 {
 297   GstAacParse *aacparse;
 298   GstStructure *structure;
 299   gchar *caps_str;
 300   const GValue *value;
 301
 302   aacparse = GST_AAC_PARSE (parse);
 303   structure = gst_caps_get_structure (caps, 0);
 304   caps_str = gst_caps_to_string (caps);
 305
 306   GST_DEBUG_OBJECT (aacparse, "setcaps: %s", caps_str);
 307   g_free (caps_str);
 308
 309   /* This is needed at least in case of RTP
 310    * Parses the codec_data information to get ObjectType,
 311    * number of channels and samplerate */
 312   value = gst_structure_get_value (structure, "codec_data");
 313   if (value) {
 314     GstBuffer *buf = gst_value_get_buffer (value);
 315
 316     if (buf && gst_buffer_get_size (buf) >= 2) {
 317       GstMapInfo map;
 318       GstBitReader br;
 319
 320       if (!gst_buffer_map (buf, &map, GST_MAP_READ))
 321         return FALSE;
 322       gst_bit_reader_init (&br, map.data, map.size);
 323       gst_aac_parse_read_audio_specific_config (aacparse, &br,
 324           &aacparse->object_type, &aacparse->sample_rate, &aacparse->channels,
 325           &aacparse->frame_samples);
 326
 327       aacparse->header_type = DSPAAC_HEADER_NONE;
 328       aacparse->mpegversion = 4;
 329       gst_buffer_unmap (buf, &map);
 330
 331       GST_DEBUG ("codec_data: object_type=%d, sample_rate=%d, channels=%d, "
 332           "samples=%d", aacparse->object_type, aacparse->sample_rate,
 333           aacparse->channels, aacparse->frame_samples);
 334
 335       /* arrange for metadata and get out of the way */
 336       gst_aac_parse_set_src_caps (aacparse, caps);
 337       if (aacparse->header_type == aacparse->output_header_type)
 338         gst_base_parse_set_passthrough (parse, TRUE);
 339
 340       /* input is already correctly framed */
 341       gst_base_parse_set_min_frame_size (parse, RAW_MAX_SIZE);
 342     } else {
 343       return FALSE;
 344     }
 345
 346     /* caps info overrides */
 347     gst_structure_get_int (structure, "rate", &aacparse->sample_rate);
 348     gst_structure_get_int (structure, "channels", &aacparse->channels);
 349   } else {
 350     const gchar *stream_format =
 351         gst_structure_get_string (structure, "stream-format");
 352
 353     if (g_strcmp0 (stream_format, "raw") == 0) {
 354       GST_ERROR_OBJECT (parse, "Need codec_data for raw AAC");
 355       return FALSE;
 356     } else {
 357       aacparse->sample_rate = 0;
 358       aacparse->channels = 0;
 359       aacparse->header_type = DSPAAC_HEADER_NOT_PARSED;
 360       gst_base_parse_set_passthrough (parse, FALSE);
 361     }
 362   }
 363   return TRUE;
 364 }
 365
 366
 367 /**
 368  * gst_aac_parse_adts_get_frame_len:
 369  * @data: block of data containing an ADTS header.
 370  *
 371  * This function calculates ADTS frame length from the given header.
 372  *
 373  * Returns: size of the ADTS frame.
 374  */
 375 static inline guint
 376 gst_aac_parse_adts_get_frame_len (const guint8 * data)
 377 {
 378   return ((data[3] & 0x03) << 11) | (data[4] << 3) | ((data[5] & 0xe0) >> 5);
 379 }
 380
 381
 382 /**
 383  * gst_aac_parse_check_adts_frame:
 384  * @aacparse: #GstAacParse.
 385  * @data: Data to be checked.
 386  * @avail: Amount of data passed.
 387  * @framesize: If valid ADTS frame was found, this will be set to tell the
 388  *             found frame size in bytes.
 389  * @needed_data: If frame was not found, this may be set to tell how much
 390  *               more data is needed in the next round to detect the frame
 391  *               reliably. This may happen when a frame header candidate
 392  *               is found but it cannot be guaranteed to be the header without
 393  *               peeking the following data.
 394  *
 395  * Check if the given data contains contains ADTS frame. The algorithm
 396  * will examine ADTS frame header and calculate the frame size. Also, another
 397  * consecutive ADTS frame header need to be present after the found frame.
 398  * Otherwise the data is not considered as a valid ADTS frame. However, this
 399  * "extra check" is omitted when EOS has been received. In this case it is
 400  * enough when data[0] contains a valid ADTS header.
 401  *
 402  * This function may set the #needed_data to indicate that a possible frame
 403  * candidate has been found, but more data (#needed_data bytes) is needed to
 404  * be absolutely sure. When this situation occurs, FALSE will be returned.
 405  *
 406  * When a valid frame is detected, this function will use
 407  * gst_base_parse_set_min_frame_size() function from #GstBaseParse class
 408  * to set the needed bytes for next frame.This way next data chunk is already
 409  * of correct size.
 410  *
 411  * Returns: TRUE if the given data contains a valid ADTS header.
 412  */
 413 static gboolean
 414 gst_aac_parse_check_adts_frame (GstAacParse * aacparse,
 415     const guint8 * data, const guint avail, gboolean drain,
 416     guint * framesize, guint * needed_data)
 417 {
 418   guint crc_size;
 419
 420   *needed_data = 0;
 421
 422   /* Absolute minimum to perform the ADTS syncword,
 423      layer and sampling frequency tests */
 424   if (G_UNLIKELY (avail < 3)) {
 425     *needed_data = 3;
 426     return FALSE;
 427   }
 428
 429   /* Syncword and layer tests */
 430   if ((data[0] == 0xff) && ((data[1] & 0xf6) == 0xf0)) {
 431
 432     /* Sampling frequency test */
 433     if (G_UNLIKELY ((data[2] & 0x3C) >> 2 == 15))
 434       return FALSE;
 435
 436     /* This looks like an ADTS frame header but
 437        we need at least 6 bytes to proceed */
 438     if (G_UNLIKELY (avail < 6)) {
 439       *needed_data = 6;
 440       return FALSE;
 441     }
 442
 443     *framesize = gst_aac_parse_adts_get_frame_len (data);
 444
 445     /* If frame has CRC, it needs 2 bytes
 446        for it at the end of the header */
 447     crc_size = (data[1] & 0x01) ? 0 : 2;
 448
 449     /* CRC size test */
 450     if (*framesize < 7 + crc_size) {
 451       *needed_data = 7 + crc_size;
 452       return FALSE;
 453     }
 454
 455     /* In EOS mode this is enough. No need to examine the data further.
 456        We also relax the check when we have sync, on the assumption that
 457        if we're not looking at random data, we have a much higher chance
 458        to get the correct sync, and this avoids losing two frames when
 459        a single bit corruption happens. */
 460     if (drain || !GST_BASE_PARSE_LOST_SYNC (aacparse)) {
 461       return TRUE;
 462     }
 463
 464     if (*framesize + ADTS_MAX_SIZE > avail) {
 465       /* We have found a possible frame header candidate, but can't be
 466          sure since we don't have enough data to check the next frame */
 467       GST_DEBUG ("NEED MORE DATA: we need %d, available %d",
 468           *framesize + ADTS_MAX_SIZE, avail);
 469       *needed_data = *framesize + ADTS_MAX_SIZE;
 470       gst_base_parse_set_min_frame_size (GST_BASE_PARSE (aacparse),
 471           *framesize + ADTS_MAX_SIZE);
 472       return FALSE;
 473     }
 474
 475     if ((data[*framesize] == 0xff) && ((data[*framesize + 1] & 0xf6) == 0xf0)) {
 476       guint nextlen = gst_aac_parse_adts_get_frame_len (data + (*framesize));
 477
 478       GST_LOG ("ADTS frame found, len: %d bytes", *framesize);
 479       gst_base_parse_set_min_frame_size (GST_BASE_PARSE (aacparse),
 480           nextlen + ADTS_MAX_SIZE);
 481       return TRUE;
 482     }
 483   }
 484   return FALSE;
 485 }
 486
 487 static gboolean
 488 gst_aac_parse_latm_get_value (GstAacParse * aacparse, GstBitReader * br,
 489     guint32 * value)
 490 {
 491   guint8 bytes, i, byte;
 492
 493   *value = 0;
 494   if (!gst_bit_reader_get_bits_uint8 (br, &bytes, 2))
 495     return FALSE;
 496   for (i = 0; i <= bytes; ++i) {
 497     *value <<= 8;
 498     if (!gst_bit_reader_get_bits_uint8 (br, &byte, 8))
 499       return FALSE;
 500     *value += byte;
 501   }
 502   return TRUE;
 503 }
 504
 505 static gboolean
 506 gst_aac_parse_get_audio_object_type (GstAacParse * aacparse, GstBitReader * br,
 507     guint8 * audio_object_type)
 508 {
 509   if (!gst_bit_reader_get_bits_uint8 (br, audio_object_type, 5))
 510     return FALSE;
 511   if (*audio_object_type == 31) {
 512     if (!gst_bit_reader_get_bits_uint8 (br, audio_object_type, 6))
 513       return FALSE;
 514     *audio_object_type += 32;
 515   }
 516   GST_LOG_OBJECT (aacparse, "audio object type %u", *audio_object_type);
 517   return TRUE;
 518 }
 519
 520 static gboolean
 521 gst_aac_parse_get_audio_sample_rate (GstAacParse * aacparse, GstBitReader * br,
 522     gint * sample_rate)
 523 {
 524   guint8 sampling_frequency_index;
 525   if (!gst_bit_reader_get_bits_uint8 (br, &sampling_frequency_index, 4))
 526     return FALSE;
 527   GST_LOG_OBJECT (aacparse, "sampling_frequency_index: %u",
 528       sampling_frequency_index);
 529   if (sampling_frequency_index == 0xf) {
 530     guint32 sampling_rate;
 531     if (!gst_bit_reader_get_bits_uint32 (br, &sampling_rate, 24))
 532       return FALSE;
 533     *sample_rate = sampling_rate;
 534   } else {
 535     *sample_rate = loas_sample_rate_table[sampling_frequency_index];
 536     if (!*sample_rate)
 537       return FALSE;
 538   }
 539   aacparse->last_parsed_sample_rate = *sample_rate;
 540   return TRUE;
 541 }
 542
 543 /* See table 1.13 in ISO/IEC 14496-3 */
 544 static gboolean
 545 gst_aac_parse_read_audio_specific_config (GstAacParse * aacparse,
 546     GstBitReader * br, gint * object_type, gint * sample_rate, gint * channels,
 547     gint * frame_samples)
 548 {
 549   guint8 audio_object_type;
 550   guint8 G_GNUC_UNUSED extension_audio_object_type;
 551   guint8 channel_configuration, extension_channel_configuration;
 552   gboolean G_GNUC_UNUSED sbr = FALSE, ps = FALSE;
 553
 554   if (!gst_aac_parse_get_audio_object_type (aacparse, br, &audio_object_type))
 555     return FALSE;
 556   if (object_type)
 557     *object_type = audio_object_type;
 558
 559   if (!gst_aac_parse_get_audio_sample_rate (aacparse, br, sample_rate))
 560     return FALSE;
 561
 562   if (!gst_bit_reader_get_bits_uint8 (br, &channel_configuration, 4))
 563     return FALSE;
 564   *channels = loas_channels_table[channel_configuration];
 565   GST_LOG_OBJECT (aacparse, "channel_configuration: %d", channel_configuration);
 566   if (!*channels)
 567     return FALSE;
 568
 569   if (audio_object_type == 5 || audio_object_type == 29) {
 570     extension_audio_object_type = 5;
 571     sbr = TRUE;
 572     if (audio_object_type == 29) {
 573       ps = TRUE;
 574       /* Parametric stereo. If we have a one-channel configuration, we can
 575        * override it to stereo */
 576       if (*channels == 1)
 577         *channels = 2;
 578     }
 579
 580     GST_LOG_OBJECT (aacparse,
 581         "Audio object type 5 or 29, so rereading sampling rate (was %d)...",
 582         *sample_rate);
 583     if (!gst_aac_parse_get_audio_sample_rate (aacparse, br, sample_rate))
 584       return FALSE;
 585
 586     if (!gst_aac_parse_get_audio_object_type (aacparse, br, &audio_object_type))
 587       return FALSE;
 588
 589     if (audio_object_type == 22) {
 590       /* extension channel configuration */
 591       if (!gst_bit_reader_get_bits_uint8 (br, &extension_channel_configuration,
 592               4))
 593         return FALSE;
 594       GST_LOG_OBJECT (aacparse, "extension channel_configuration: %d",
 595           extension_channel_configuration);
 596       *channels = loas_channels_table[extension_channel_configuration];
 597       if (!*channels)
 598         return FALSE;
 599     }
 600   } else {
 601     extension_audio_object_type = 0;
 602   }
 603
 604   GST_INFO_OBJECT (aacparse, "Parsed AudioSpecificConfig: %d Hz, %d channels",
 605       *sample_rate, *channels);
 606
 607   if (frame_samples && audio_object_type == 23) {
 608     guint8 frame_flag;
 609     /* Read the Decoder Configuration (GASpecificConfig) if present */
 610     /* We only care about the first bit to know what the number of samples
 611      * in a frame is */
 612     if (!gst_bit_reader_get_bits_uint8 (br, &frame_flag, 1))
 613       return FALSE;
 614     *frame_samples = frame_flag ? 960 : 1024;
 615   }
 616
 617   /* There's LOTS of stuff next, but we ignore it for now as we have
 618      what we want (sample rate and number of channels */
 619   GST_DEBUG_OBJECT (aacparse,
 620       "Need more code to parse humongous LOAS data, currently ignored");
 621   aacparse->last_parsed_channels = *channels;
 622   return TRUE;
 623 }
 624
 625
 626 static gboolean
 627 gst_aac_parse_read_loas_config (GstAacParse * aacparse, const guint8 * data,
 628     guint avail, gint * sample_rate, gint * channels, gint * version)
 629 {
 630   GstBitReader br;
 631   guint8 u8, v, vA;
 632
 633   /* No version in the bitstream, but the spec has LOAS in the MPEG-4 section */
 634   if (version)
 635     *version = 4;
 636
 637   gst_bit_reader_init (&br, data, avail);
 638
 639   /* skip sync word (11 bits) and size (13 bits) */
 640   if (!gst_bit_reader_skip (&br, 11 + 13))
 641     return FALSE;
 642
 643   /* First bit is "use last config" */
 644   if (!gst_bit_reader_get_bits_uint8 (&br, &u8, 1))
 645     return FALSE;
 646   if (u8) {
 647     GST_LOG_OBJECT (aacparse, "Frame uses previous config");
 648     if (!aacparse->last_parsed_sample_rate || !aacparse->last_parsed_channels) {
 649       GST_DEBUG_OBJECT (aacparse,
 650           "No previous config to use. We'll look for more data.");
 651       return FALSE;
 652     }
 653     *sample_rate = aacparse->last_parsed_sample_rate;
 654     *channels = aacparse->last_parsed_channels;
 655     return TRUE;
 656   }
 657
 658   GST_DEBUG_OBJECT (aacparse, "Frame contains new config");
 659
 660   /* audioMuxVersion */
 661   if (!gst_bit_reader_get_bits_uint8 (&br, &v, 1))
 662     return FALSE;
 663   if (v) {
 664     /* audioMuxVersionA */
 665     if (!gst_bit_reader_get_bits_uint8 (&br, &vA, 1))
 666       return FALSE;
 667   } else
 668     vA = 0;
 669
 670   GST_LOG_OBJECT (aacparse, "v %d, vA %d", v, vA);
 671   if (vA == 0) {
 672     guint8 same_time, subframes, num_program, prog;
 673     if (v == 1) {
 674       guint32 value;
 675       /* taraBufferFullness */
 676       if (!gst_aac_parse_latm_get_value (aacparse, &br, &value))
 677         return FALSE;
 678     }
 679     if (!gst_bit_reader_get_bits_uint8 (&br, &same_time, 1))
 680       return FALSE;
 681     if (!gst_bit_reader_get_bits_uint8 (&br, &subframes, 6))
 682       return FALSE;
 683     if (!gst_bit_reader_get_bits_uint8 (&br, &num_program, 4))
 684       return FALSE;
 685     GST_LOG_OBJECT (aacparse, "same_time %d, subframes %d, num_program %d",
 686         same_time, subframes, num_program);
 687
 688     for (prog = 0; prog <= num_program; ++prog) {
 689       guint8 num_layer, layer;
 690       if (!gst_bit_reader_get_bits_uint8 (&br, &num_layer, 3))
 691         return FALSE;
 692       GST_LOG_OBJECT (aacparse, "Program %d: %d layers", prog, num_layer);
 693
 694       for (layer = 0; layer <= num_layer; ++layer) {
 695         guint8 use_same_config;
 696         if (prog == 0 && layer == 0) {
 697           use_same_config = 0;
 698         } else {
 699           if (!gst_bit_reader_get_bits_uint8 (&br, &use_same_config, 1))
 700             return FALSE;
 701         }
 702         if (!use_same_config) {
 703           if (v == 0) {
 704             if (!gst_aac_parse_read_audio_specific_config (aacparse, &br, NULL,
 705                     sample_rate, channels, NULL))
 706               return FALSE;
 707           } else {
 708             guint32 asc_len;
 709             if (!gst_aac_parse_latm_get_value (aacparse, &br, &asc_len))
 710               return FALSE;
 711             if (!gst_aac_parse_read_audio_specific_config (aacparse, &br, NULL,
 712                     sample_rate, channels, NULL))
 713               return FALSE;
 714             if (!gst_bit_reader_skip (&br, asc_len))
 715               return FALSE;
 716           }
 717         }
 718       }
 719     }
 720     GST_LOG_OBJECT (aacparse, "More data ignored");
 721   } else {
 722     GST_WARNING_OBJECT (aacparse, "Spec says \"TBD\"...");
 723     return FALSE;
 724   }
 725   return TRUE;
 726 }
 727
 728 /**
 729  * gst_aac_parse_loas_get_frame_len:
 730  * @data: block of data containing a LOAS header.
 731  *
 732  * This function calculates LOAS frame length from the given header.
 733  *
 734  * Returns: size of the LOAS frame.
 735  */
 736 static inline guint
 737 gst_aac_parse_loas_get_frame_len (const guint8 * data)
 738 {
 739   return (((data[1] & 0x1f) << 8) | data[2]) + 3;
 740 }
 741
 742
 743 /**
 744  * gst_aac_parse_check_loas_frame:
 745  * @aacparse: #GstAacParse.
 746  * @data: Data to be checked.
 747  * @avail: Amount of data passed.
 748  * @framesize: If valid LOAS frame was found, this will be set to tell the
 749  *             found frame size in bytes.
 750  * @needed_data: If frame was not found, this may be set to tell how much
 751  *               more data is needed in the next round to detect the frame
 752  *               reliably. This may happen when a frame header candidate
 753  *               is found but it cannot be guaranteed to be the header without
 754  *               peeking the following data.
 755  *
 756  * Check if the given data contains contains LOAS frame. The algorithm
 757  * will examine LOAS frame header and calculate the frame size. Also, another
 758  * consecutive LOAS frame header need to be present after the found frame.
 759  * Otherwise the data is not considered as a valid LOAS frame. However, this
 760  * "extra check" is omitted when EOS has been received. In this case it is
 761  * enough when data[0] contains a valid LOAS header.
 762  *
 763  * This function may set the #needed_data to indicate that a possible frame
 764  * candidate has been found, but more data (#needed_data bytes) is needed to
 765  * be absolutely sure. When this situation occurs, FALSE will be returned.
 766  *
 767  * When a valid frame is detected, this function will use
 768  * gst_base_parse_set_min_frame_size() function from #GstBaseParse class
 769  * to set the needed bytes for next frame.This way next data chunk is already
 770  * of correct size.
 771  *
 772  * LOAS can have three different formats, if I read the spec correctly. Only
 773  * one of them is supported here, as the two samples I have use this one.
 774  *
 775  * Returns: TRUE if the given data contains a valid LOAS header.
 776  */
 777 static gboolean
 778 gst_aac_parse_check_loas_frame (GstAacParse * aacparse,
 779     const guint8 * data, const guint avail, gboolean drain,
 780     guint * framesize, guint * needed_data)
 781 {
 782   *needed_data = 0;
 783
 784   /* 3 byte header */
 785   if (G_UNLIKELY (avail < 3)) {
 786     *needed_data = 3;
 787     return FALSE;
 788   }
 789
 790   if ((data[0] == 0x56) && ((data[1] & 0xe0) == 0xe0)) {
 791     *framesize = gst_aac_parse_loas_get_frame_len (data);
 792     GST_DEBUG_OBJECT (aacparse, "Found possible %u byte LOAS frame",
 793         *framesize);
 794
 795     /* In EOS mode this is enough. No need to examine the data further.
 796        We also relax the check when we have sync, on the assumption that
 797        if we're not looking at random data, we have a much higher chance
 798        to get the correct sync, and this avoids losing two frames when
 799        a single bit corruption happens. */
 800     if (drain || !GST_BASE_PARSE_LOST_SYNC (aacparse)) {
 801       return TRUE;
 802     }
 803
 804     if (*framesize + LOAS_MAX_SIZE > avail) {
 805       /* We have found a possible frame header candidate, but can't be
 806          sure since we don't have enough data to check the next frame */
 807       GST_DEBUG ("NEED MORE DATA: we need %d, available %d",
 808           *framesize + LOAS_MAX_SIZE, avail);
 809       *needed_data = *framesize + LOAS_MAX_SIZE;
 810       gst_base_parse_set_min_frame_size (GST_BASE_PARSE (aacparse),
 811           *framesize + LOAS_MAX_SIZE);
 812       return FALSE;
 813     }
 814
 815     if ((data[*framesize] == 0x56) && ((data[*framesize + 1] & 0xe0) == 0xe0)) {
 816       guint nextlen = gst_aac_parse_loas_get_frame_len (data + (*framesize));
 817
 818       GST_LOG ("LOAS frame found, len: %d bytes", *framesize);
 819       gst_base_parse_set_min_frame_size (GST_BASE_PARSE (aacparse),
 820           nextlen + LOAS_MAX_SIZE);
 821       return TRUE;
 822     } else {
 823       GST_DEBUG_OBJECT (aacparse, "That was a false positive");
 824     }
 825   }
 826   return FALSE;
 827 }
 828
 829 /* caller ensure sufficient data */
 830 static inline void
 831 gst_aac_parse_parse_adts_header (GstAacParse * aacparse, const guint8 * data,
 832     gint * rate, gint * channels, gint * object, gint * version)
 833 {
 834
 835   if (rate) {
 836     gint sr_idx = (data[2] & 0x3c) >> 2;
 837
 838     *rate = gst_codec_utils_aac_get_sample_rate_from_index (sr_idx);
 839   }
 840   if (channels) {
 841     *channels = ((data[2] & 0x01) << 2) | ((data[3] & 0xc0) >> 6);
 842     if (*channels == 7)
 843       *channels = 8;
 844   }
 845
 846   if (version)
 847     *version = (data[1] & 0x08) ? 2 : 4;
 848   if (object)
 849     *object = ((data[2] & 0xc0) >> 6) + 1;
 850 }
 851
 852 /**
 853  * gst_aac_parse_detect_stream:
 854  * @aacparse: #GstAacParse.
 855  * @data: A block of data that needs to be examined for stream characteristics.
 856  * @avail: Size of the given datablock.
 857  * @framesize: If valid stream was found, this will be set to tell the
 858  *             first frame size in bytes.
 859  * @skipsize: If valid stream was found, this will be set to tell the first
 860  *            audio frame position within the given data.
 861  *
 862  * Examines the given piece of data and try to detect the format of it. It
 863  * checks for "ADIF" header (in the beginning of the clip) and ADTS frame
 864  * header. If the stream is detected, TRUE will be returned and #framesize
 865  * is set to indicate the found frame size. Additionally, #skipsize might
 866  * be set to indicate the number of bytes that need to be skipped, a.k.a. the
 867  * position of the frame inside given data chunk.
 868  *
 869  * Returns: TRUE on success.
 870  */
 871 static gboolean
 872 gst_aac_parse_detect_stream (GstAacParse * aacparse,
 873     const guint8 * data, const guint avail, gboolean drain,
 874     guint * framesize, gint * skipsize)
 875 {
 876   gboolean found = FALSE;
 877   guint need_data_adts = 0, need_data_loas;
 878   guint i = 0;
 879
 880   GST_DEBUG_OBJECT (aacparse, "Parsing header data");
 881
 882   /* FIXME: No need to check for ADIF if we are not in the beginning of the
 883      stream */
 884
 885   /* Can we even parse the header? */
 886   if (avail < MAX (ADTS_MAX_SIZE, LOAS_MAX_SIZE)) {
 887     GST_DEBUG_OBJECT (aacparse, "Not enough data to check");
 888     return FALSE;
 889   }
 890
 891   for (i = 0; i < avail - 4; i++) {
 892     if (((data[i] == 0xff) && ((data[i + 1] & 0xf6) == 0xf0)) ||
 893         ((data[i] == 0x56) && ((data[i + 1] & 0xe0) == 0xe0)) ||
 894         strncmp ((char *) data + i, "ADIF", 4) == 0) {
 895       GST_DEBUG_OBJECT (aacparse, "Found signature at offset %u", i);
 896       found = TRUE;
 897
 898       if (i) {
 899         /* Trick: tell the parent class that we didn't find the frame yet,
 900            but make it skip 'i' amount of bytes. Next time we arrive
 901            here we have full frame in the beginning of the data. */
 902         *skipsize = i;
 903         return FALSE;
 904       }
 905       break;
 906     }
 907   }
 908   if (!found) {
 909     if (i)
 910       *skipsize = i;
 911     return FALSE;
 912   }
 913
 914   if (gst_aac_parse_check_adts_frame (aacparse, data, avail, drain,
 915           framesize, &need_data_adts)) {
 916     gint rate, channels;
 917
 918     GST_INFO ("ADTS ID: %d, framesize: %d", (data[1] & 0x08) >> 3, *framesize);
 919
 920     gst_aac_parse_parse_adts_header (aacparse, data, &rate, &channels,
 921         &aacparse->object_type, &aacparse->mpegversion);
 922
 923     if (!channels || !framesize) {
 924       GST_DEBUG_OBJECT (aacparse, "impossible ADTS configuration");
 925       return FALSE;
 926     }
 927
 928     aacparse->header_type = DSPAAC_HEADER_ADTS;
 929     gst_base_parse_set_frame_rate (GST_BASE_PARSE (aacparse), rate,
 930         aacparse->frame_samples, 2, 2);
 931
 932     GST_DEBUG ("ADTS: samplerate %d, channels %d, objtype %d, version %d",
 933         rate, channels, aacparse->object_type, aacparse->mpegversion);
 934
 935     gst_base_parse_set_syncable (GST_BASE_PARSE (aacparse), TRUE);
 936
 937     return TRUE;
 938   }
 939
 940   if (gst_aac_parse_check_loas_frame (aacparse, data, avail, drain,
 941           framesize, &need_data_loas)) {
 942     gint rate = 0, channels = 0;
 943
 944     GST_INFO ("LOAS, framesize: %d", *framesize);
 945
 946     aacparse->header_type = DSPAAC_HEADER_LOAS;
 947
 948     if (!gst_aac_parse_read_loas_config (aacparse, data, avail, &rate,
 949             &channels, &aacparse->mpegversion)) {
 950       /* This is pretty normal when skipping data at the start of
 951        * random stream (MPEG-TS capture for example) */
 952       GST_LOG_OBJECT (aacparse, "Error reading LOAS config");
 953       return FALSE;
 954     }
 955
 956     if (rate && channels) {
 957       gst_base_parse_set_frame_rate (GST_BASE_PARSE (aacparse), rate,
 958           aacparse->frame_samples, 2, 2);
 959
 960       /* Don't store the sample rate and channels yet -
 961        * this is just format detection. */
 962       GST_DEBUG ("LOAS: samplerate %d, channels %d, objtype %d, version %d",
 963           rate, channels, aacparse->object_type, aacparse->mpegversion);
 964     }
 965
 966     gst_base_parse_set_syncable (GST_BASE_PARSE (aacparse), TRUE);
 967
 968     return TRUE;
 969   }
 970
 971   if (need_data_adts || need_data_loas) {
 972     /* This tells the parent class not to skip any data */
 973     *skipsize = 0;
 974     return FALSE;
 975   }
 976
 977   if (avail < ADIF_MAX_SIZE)
 978     return FALSE;
 979
 980   if (memcmp (data + i, "ADIF", 4) == 0) {
 981     const guint8 *adif;
 982     int skip_size = 0;
 983     int bitstream_type;
 984     int sr_idx;
 985     GstCaps *sinkcaps;
 986
 987     aacparse->header_type = DSPAAC_HEADER_ADIF;
 988     aacparse->mpegversion = 4;
 989
 990     /* Skip the "ADIF" bytes */
 991     adif = data + i + 4;
 992
 993     /* copyright string */
 994     if (adif[0] & 0x80)
 995       skip_size += 9;           /* skip 9 bytes */
 996
 997     bitstream_type = adif[0 + skip_size] & 0x10;
 998     aacparse->bitrate =
 999         ((unsigned int) (adif[0 + skip_size] & 0x0f) << 19) |
1000         ((unsigned int) adif[1 + skip_size] << 11) |
1001         ((unsigned int) adif[2 + skip_size] << 3) |
1002         ((unsigned int) adif[3 + skip_size] & 0xe0);
1003
1004     /* CBR */
1005     if (bitstream_type == 0) {
1006 #if 0
1007       /* Buffer fullness parsing. Currently not needed... */
1008       guint num_elems = 0;
1009       guint fullness = 0;
1010
1011       num_elems = (adif[3 + skip_size] & 0x1e);
1012       GST_INFO ("ADIF num_config_elems: %d", num_elems);
1013
1014       fullness = ((unsigned int) (adif[3 + skip_size] & 0x01) << 19) |
1015           ((unsigned int) adif[4 + skip_size] << 11) |
1016           ((unsigned int) adif[5 + skip_size] << 3) |
1017           ((unsigned int) (adif[6 + skip_size] & 0xe0) >> 5);
1018
1019       GST_INFO ("ADIF buffer fullness: %d", fullness);
1020 #endif
1021       aacparse->object_type = ((adif[6 + skip_size] & 0x01) << 1) |
1022           ((adif[7 + skip_size] & 0x80) >> 7);
1023       sr_idx = (adif[7 + skip_size] & 0x78) >> 3;
1024     }
1025     /* VBR */
1026     else {
1027       aacparse->object_type = (adif[4 + skip_size] & 0x18) >> 3;
1028       sr_idx = ((adif[4 + skip_size] & 0x07) << 1) |
1029           ((adif[5 + skip_size] & 0x80) >> 7);
1030     }
1031
1032     /* FIXME: This gives totally wrong results. Duration calculation cannot
1033        be based on this */
1034     aacparse->sample_rate =
1035         gst_codec_utils_aac_get_sample_rate_from_index (sr_idx);
1036
1037     /* baseparse is not given any fps,
1038      * so it will give up on timestamps, seeking, etc */
1039
1040     /* FIXME: Can we assume this? */
1041     aacparse->channels = 2;
1042
1043     GST_INFO ("ADIF: br=%d, samplerate=%d, objtype=%d",
1044         aacparse->bitrate, aacparse->sample_rate, aacparse->object_type);
1045
1046     gst_base_parse_set_min_frame_size (GST_BASE_PARSE (aacparse), 512);
1047
1048     /* arrange for metadata and get out of the way */
1049     sinkcaps = gst_pad_get_current_caps (GST_BASE_PARSE_SINK_PAD (aacparse));
1050     gst_aac_parse_set_src_caps (aacparse, sinkcaps);
1051     if (sinkcaps)
1052       gst_caps_unref (sinkcaps);
1053
1054     /* not syncable, not easily seekable (unless we push data from start */
1055     gst_base_parse_set_syncable (GST_BASE_PARSE_CAST (aacparse), FALSE);
1056     gst_base_parse_set_passthrough (GST_BASE_PARSE_CAST (aacparse), TRUE);
1057     gst_base_parse_set_average_bitrate (GST_BASE_PARSE_CAST (aacparse), 0);
1058
1059     *framesize = avail;
1060     return TRUE;
1061   }
1062
1063   /* This should never happen */
1064   return FALSE;
1065 }
1066
1067 /**
1068  * gst_aac_parse_get_audio_profile_object_type
1069  * @aacparse: #GstAacParse.
1070  *
1071  * Gets the MPEG-2 profile or the MPEG-4 object type value corresponding to the
1072  * mpegversion and profile of @aacparse's src pad caps, according to the
1073  * values defined by table 1.A.11 in ISO/IEC 14496-3.
1074  *
1075  * Returns: the profile or object type value corresponding to @aacparse's src
1076  * pad caps, if such a value exists; otherwise G_MAXUINT8.
1077  */
1078 static guint8
1079 gst_aac_parse_get_audio_profile_object_type (GstAacParse * aacparse)
1080 {
1081   GstCaps *srccaps;
1082   GstStructure *srcstruct;
1083   const gchar *profile;
1084   guint8 ret;
1085
1086   srccaps = gst_pad_get_current_caps (GST_BASE_PARSE_SRC_PAD (aacparse));
1087   if (G_UNLIKELY (srccaps == NULL)) {
1088     return G_MAXUINT8;
1089   }
1090
1091   srcstruct = gst_caps_get_structure (srccaps, 0);
1092   profile = gst_structure_get_string (srcstruct, "profile");
1093   if (G_UNLIKELY (profile == NULL)) {
1094     gst_caps_unref (srccaps);
1095     return G_MAXUINT8;
1096   }
1097
1098   if (g_strcmp0 (profile, "main") == 0) {
1099     ret = (guint8) 0U;
1100   } else if (g_strcmp0 (profile, "lc") == 0) {
1101     ret = (guint8) 1U;
1102   } else if (g_strcmp0 (profile, "ssr") == 0) {
1103     ret = (guint8) 2U;
1104   } else if (g_strcmp0 (profile, "ltp") == 0) {
1105     if (G_LIKELY (aacparse->mpegversion == 4))
1106       ret = (guint8) 3U;
1107     else
1108       ret = G_MAXUINT8;         /* LTP Object Type allowed only for MPEG-4 */
1109   } else {
1110     ret = G_MAXUINT8;
1111   }
1112
1113   gst_caps_unref (srccaps);
1114   return ret;
1115 }
1116
1117 /**
1118  * gst_aac_parse_get_audio_channel_configuration
1119  * @num_channels: number of audio channels.
1120  *
1121  * Gets the Channel Configuration value, as defined by table 1.19 in ISO/IEC
1122  * 14496-3, for a given number of audio channels.
1123  *
1124  * Returns: the Channel Configuration value corresponding to @num_channels, if
1125  * such a value exists; otherwise G_MAXUINT8.
1126  */
1127 static guint8
1128 gst_aac_parse_get_audio_channel_configuration (gint num_channels)
1129 {
1130   if (num_channels >= 1 && num_channels <= 6)   /* Mono up to & including 5.1 */
1131     return (guint8) num_channels;
1132   else if (num_channels == 8)   /* 7.1 */
1133     return (guint8) 7U;
1134   else
1135     return G_MAXUINT8;
1136
1137   /* FIXME: Add support for configurations 11, 12 and 14 from
1138    * ISO/IEC 14496-3:2009/PDAM 4 based on the actual channel layout
1139    */
1140 }
1141
1142 /**
1143  * gst_aac_parse_get_audio_sampling_frequency_index:
1144  * @sample_rate: audio sampling rate.
1145  *
1146  * Gets the Sampling Frequency Index value, as defined by table 1.18 in ISO/IEC
1147  * 14496-3, for a given sampling rate.
1148  *
1149  * Returns: the Sampling Frequency Index value corresponding to @sample_rate,
1150  * if such a value exists; otherwise G_MAXUINT8.
1151  */
1152 static guint8
1153 gst_aac_parse_get_audio_sampling_frequency_index (gint sample_rate)
1154 {
1155   switch (sample_rate) {
1156     case 96000:
1157       return 0x0U;
1158     case 88200:
1159       return 0x1U;
1160     case 64000:
1161       return 0x2U;
1162     case 48000:
1163       return 0x3U;
1164     case 44100:
1165       return 0x4U;
1166     case 32000:
1167       return 0x5U;
1168     case 24000:
1169       return 0x6U;
1170     case 22050:
1171       return 0x7U;
1172     case 16000:
1173       return 0x8U;
1174     case 12000:
1175       return 0x9U;
1176     case 11025:
1177       return 0xAU;
1178     case 8000:
1179       return 0xBU;
1180     case 7350:
1181       return 0xCU;
1182     default:
1183       return G_MAXUINT8;
1184   }
1185 }
1186
1187 /**
1188  * gst_aac_parse_prepend_adts_headers:
1189  * @aacparse: #GstAacParse.
1190  * @frame: raw AAC frame to which ADTS headers shall be prepended.
1191  *
1192  * Prepends ADTS headers to a raw AAC audio frame.
1193  *
1194  * Returns: TRUE if ADTS headers were successfully prepended; FALSE otherwise.
1195  */
1196 static gboolean
1197 gst_aac_parse_prepend_adts_headers (GstAacParse * aacparse,
1198     GstBaseParseFrame * frame)
1199 {
1200   GstMemory *mem;
1201   guint8 *adts_headers;
1202   gsize buf_size;
1203   gsize frame_size;
1204   guint8 id, profile, channel_configuration, sampling_frequency_index;
1205
1206   id = (aacparse->mpegversion == 4) ? 0x0U : 0x1U;
1207   profile = gst_aac_parse_get_audio_profile_object_type (aacparse);
1208   if (profile == G_MAXUINT8) {
1209     GST_ERROR_OBJECT (aacparse, "Unsupported audio profile or object type");
1210     return FALSE;
1211   }
1212   channel_configuration =
1213       gst_aac_parse_get_audio_channel_configuration (aacparse->channels);
1214   if (channel_configuration == G_MAXUINT8) {
1215     GST_ERROR_OBJECT (aacparse, "Unsupported number of channels");
1216     return FALSE;
1217   }
1218   sampling_frequency_index =
1219       gst_aac_parse_get_audio_sampling_frequency_index (aacparse->sample_rate);
1220   if (sampling_frequency_index == G_MAXUINT8) {
1221     GST_ERROR_OBJECT (aacparse, "Unsupported sampling frequency");
1222     return FALSE;
1223   }
1224
1225   frame->out_buffer = gst_buffer_copy (frame->buffer);
1226   buf_size = gst_buffer_get_size (frame->out_buffer);
1227   frame_size = buf_size + ADTS_HEADERS_LENGTH;
1228
1229   if (G_UNLIKELY (frame_size >= 0x4000)) {
1230     GST_ERROR_OBJECT (aacparse, "Frame size is too big for ADTS");
1231     return FALSE;
1232   }
1233
1234   adts_headers = (guint8 *) g_malloc0 (ADTS_HEADERS_LENGTH);
1235
1236   /* Note: no error correction bits are added to the resulting ADTS frames */
1237   adts_headers[0] = 0xFFU;
1238   adts_headers[1] = 0xF0U | (id << 3) | 0x1U;
1239   adts_headers[2] = (profile << 6) | (sampling_frequency_index << 2) | 0x2U |
1240       ((channel_configuration & 0x4U) >> 2);
1241   adts_headers[3] = ((channel_configuration & 0x3U) << 6) | 0x30U |
1242       (guint8) (frame_size >> 11);
1243   adts_headers[4] = (guint8) ((frame_size >> 3) & 0x00FF);
1244   adts_headers[5] = (guint8) (((frame_size & 0x0007) << 5) + 0x1FU);
1245   adts_headers[6] = 0xFCU;
1246
1247   mem = gst_memory_new_wrapped (0, adts_headers, ADTS_HEADERS_LENGTH, 0,
1248       ADTS_HEADERS_LENGTH, adts_headers, g_free);
1249   gst_buffer_prepend_memory (frame->out_buffer, mem);
1250
1251   return TRUE;
1252 }
1253
1254 /**
1255  * gst_aac_parse_check_valid_frame:
1256  * @parse: #GstBaseParse.
1257  * @frame: #GstBaseParseFrame.
1258  * @skipsize: How much data parent class should skip in order to find the
1259  *            frame header.
1260  *
1261  * Implementation of "handle_frame" vmethod in #GstBaseParse class.
1262  *
1263  * Also determines frame overhead.
1264  * ADTS streams have a 7 byte header in each frame. MP4 and ADIF streams don't have
1265  * a per-frame header. LOAS has 3 bytes.
1266  *
1267  * We're making a couple of simplifying assumptions:
1268  *
1269  * 1. We count Program Configuration Elements rather than searching for them
1270  *    in the streams to discount them - the overhead is negligible.
1271  *
1272  * 2. We ignore CRC. This has a worst-case impact of (num_raw_blocks + 1)*16
1273  *    bits, which should still not be significant enough to warrant the
1274  *    additional parsing through the headers
1275  *
1276  * Returns: a #GstFlowReturn.
1277  */
1278 static GstFlowReturn
1279 gst_aac_parse_handle_frame (GstBaseParse * parse,
1280     GstBaseParseFrame * frame, gint * skipsize)
1281 {
1282   GstMapInfo map;
1283   GstAacParse *aacparse;
1284   gboolean ret = FALSE;
1285   gboolean lost_sync;
1286   GstBuffer *buffer;
1287   guint framesize;
1288   gint rate = 0, channels = 0;
1289
1290   aacparse = GST_AAC_PARSE (parse);
1291   buffer = frame->buffer;
1292
1293   gst_buffer_map (buffer, &map, GST_MAP_READ);
1294
1295   *skipsize = -1;
1296   lost_sync = GST_BASE_PARSE_LOST_SYNC (parse);
1297
1298   if (aacparse->header_type == DSPAAC_HEADER_ADIF ||
1299       aacparse->header_type == DSPAAC_HEADER_NONE) {
1300     /* There is nothing to parse */
1301     framesize = map.size;
1302     ret = TRUE;
1303
1304   } else if (aacparse->header_type == DSPAAC_HEADER_NOT_PARSED || lost_sync) {
1305
1306     ret = gst_aac_parse_detect_stream (aacparse, map.data, map.size,
1307         GST_BASE_PARSE_DRAINING (parse), &framesize, skipsize);
1308
1309   } else if (aacparse->header_type == DSPAAC_HEADER_ADTS) {
1310     guint needed_data = 1024;
1311
1312     ret = gst_aac_parse_check_adts_frame (aacparse, map.data, map.size,
1313         GST_BASE_PARSE_DRAINING (parse), &framesize, &needed_data);
1314
1315     if (!ret && needed_data) {
1316       GST_DEBUG ("buffer didn't contain valid frame");
1317       *skipsize = 0;
1318       gst_base_parse_set_min_frame_size (GST_BASE_PARSE (aacparse),
1319           needed_data);
1320     }
1321
1322   } else if (aacparse->header_type == DSPAAC_HEADER_LOAS) {
1323     guint needed_data = 1024;
1324
1325     ret = gst_aac_parse_check_loas_frame (aacparse, map.data,
1326         map.size, GST_BASE_PARSE_DRAINING (parse), &framesize, &needed_data);
1327
1328     if (!ret && needed_data) {
1329       GST_DEBUG ("buffer didn't contain valid frame");
1330       *skipsize = 0;
1331       gst_base_parse_set_min_frame_size (GST_BASE_PARSE (aacparse),
1332           needed_data);
1333     }
1334
1335   } else {
1336     GST_DEBUG ("buffer didn't contain valid frame");
1337     gst_base_parse_set_min_frame_size (GST_BASE_PARSE (aacparse),
1338         ADTS_MAX_SIZE);
1339   }
1340
1341   if (G_UNLIKELY (!ret))
1342     goto exit;
1343
1344   if (aacparse->header_type == DSPAAC_HEADER_ADTS) {
1345     /* see above */
1346     frame->overhead = 7;
1347
1348     gst_aac_parse_parse_adts_header (aacparse, map.data,
1349         &rate, &channels, NULL, NULL);
1350
1351     GST_LOG_OBJECT (aacparse, "rate: %d, chans: %d", rate, channels);
1352
1353     if (G_UNLIKELY (rate != aacparse->sample_rate
1354             || channels != aacparse->channels)) {
1355       aacparse->sample_rate = rate;
1356       aacparse->channels = channels;
1357
1358       if (!gst_aac_parse_set_src_caps (aacparse, NULL)) {
1359         /* If linking fails, we need to return appropriate error */
1360         ret = GST_FLOW_NOT_LINKED;
1361       }
1362
1363       gst_base_parse_set_frame_rate (GST_BASE_PARSE (aacparse),
1364           aacparse->sample_rate, aacparse->frame_samples, 2, 2);
1365     }
1366   } else if (aacparse->header_type == DSPAAC_HEADER_LOAS) {
1367     gboolean setcaps = FALSE;
1368
1369     /* see above */
1370     frame->overhead = 3;
1371
1372     if (!gst_aac_parse_read_loas_config (aacparse, map.data, map.size, &rate,
1373             &channels, NULL) || !rate || !channels) {
1374       /* This is pretty normal when skipping data at the start of
1375        * random stream (MPEG-TS capture for example) */
1376       GST_DEBUG_OBJECT (aacparse, "Error reading LOAS config. Skipping.");
1377       /* Since we don't fully parse the LOAS config, we don't know for sure
1378        * how much to skip. Just skip 1 to end up to the next marker and
1379        * resume parsing from there */
1380       *skipsize = 1;
1381       goto exit;
1382     }
1383
1384     if (G_UNLIKELY (rate != aacparse->sample_rate
1385             || channels != aacparse->channels)) {
1386       aacparse->sample_rate = rate;
1387       aacparse->channels = channels;
1388       setcaps = TRUE;
1389       GST_INFO_OBJECT (aacparse, "New LOAS config: %d Hz, %d channels", rate,
1390           channels);
1391     }
1392
1393     /* We want to set caps both at start, and when rate/channels change.
1394        Since only some LOAS frames have that info, we may receive frames
1395        before knowing about rate/channels. */
1396     if (setcaps
1397         || !gst_pad_has_current_caps (GST_BASE_PARSE_SRC_PAD (aacparse))) {
1398       if (!gst_aac_parse_set_src_caps (aacparse, NULL)) {
1399         /* If linking fails, we need to return appropriate error */
1400         ret = GST_FLOW_NOT_LINKED;
1401       }
1402
1403       gst_base_parse_set_frame_rate (GST_BASE_PARSE (aacparse),
1404           aacparse->sample_rate, aacparse->frame_samples, 2, 2);
1405     }
1406   }
1407
1408   if (aacparse->header_type == DSPAAC_HEADER_NONE
1409       && aacparse->output_header_type == DSPAAC_HEADER_ADTS) {
1410     if (!gst_aac_parse_prepend_adts_headers (aacparse, frame)) {
1411       GST_ERROR_OBJECT (aacparse, "Failed to prepend ADTS headers to frame");
1412       ret = GST_FLOW_ERROR;
1413     }
1414   }
1415
1416 exit:
1417   gst_buffer_unmap (buffer, &map);
1418
1419   if (ret) {
1420     /* found, skip if needed */
1421     if (*skipsize > 0)
1422       return GST_FLOW_OK;
1423     *skipsize = 0;
1424   } else {
1425     if (*skipsize < 0)
1426       *skipsize = 1;
1427   }
1428
1429   if (ret && framesize <= map.size) {
1430     return gst_base_parse_finish_frame (parse, frame, framesize);
1431   }
1432
1433   return GST_FLOW_OK;
1434 }
1435
1436 static GstFlowReturn
1437 gst_aac_parse_pre_push_frame (GstBaseParse * parse, GstBaseParseFrame * frame)
1438 {
1439   GstAacParse *aacparse = GST_AAC_PARSE (parse);
1440
1441   if (!aacparse->sent_codec_tag) {
1442     GstTagList *taglist;
1443     GstCaps *caps;
1444
1445     /* codec tag */
1446     caps = gst_pad_get_current_caps (GST_BASE_PARSE_SRC_PAD (parse));
1447     if (caps == NULL) {
1448       if (GST_PAD_IS_FLUSHING (GST_BASE_PARSE_SRC_PAD (parse))) {
1449         GST_INFO_OBJECT (parse, "Src pad is flushing");
1450         return GST_FLOW_FLUSHING;
1451       } else {
1452         GST_INFO_OBJECT (parse, "Src pad is not negotiated!");
1453         return GST_FLOW_NOT_NEGOTIATED;
1454       }
1455     }
1456
1457     taglist = gst_tag_list_new_empty ();
1458     gst_pb_utils_add_codec_description_to_tag_list (taglist,
1459         GST_TAG_AUDIO_CODEC, caps);
1460     gst_caps_unref (caps);
1461
1462     gst_base_parse_merge_tags (parse, taglist, GST_TAG_MERGE_REPLACE);
1463     gst_tag_list_unref (taglist);
1464
1465     /* also signals the end of first-frame processing */
1466     aacparse->sent_codec_tag = TRUE;
1467   }
1468
1469   /* As a special case, we can remove the ADTS framing and output raw AAC. */
1470   if (aacparse->header_type == DSPAAC_HEADER_ADTS
1471       && aacparse->output_header_type == DSPAAC_HEADER_NONE) {
1472     guint header_size;
1473     GstMapInfo map;
1474     frame->out_buffer = gst_buffer_make_writable (frame->buffer);
1475     frame->buffer = NULL;
1476     gst_buffer_map (frame->out_buffer, &map, GST_MAP_READ);
1477     header_size = (map.data[1] & 1) ? 7 : 9;    /* optional CRC */
1478     gst_buffer_unmap (frame->out_buffer, &map);
1479     gst_buffer_resize (frame->out_buffer, header_size,
1480         gst_buffer_get_size (frame->out_buffer) - header_size);
1481   }
1482
1483   return GST_FLOW_OK;
1484 }
1485
1486
1487 /**
1488  * gst_aac_parse_start:
1489  * @parse: #GstBaseParse.
1490  *
1491  * Implementation of "start" vmethod in #GstBaseParse class.
1492  *
1493  * Returns: TRUE if startup succeeded.
1494  */
1495 static gboolean
1496 gst_aac_parse_start (GstBaseParse * parse)
1497 {
1498   GstAacParse *aacparse;
1499
1500   aacparse = GST_AAC_PARSE (parse);
1501   GST_DEBUG ("start");
1502   aacparse->frame_samples = 1024;
1503   gst_base_parse_set_min_frame_size (GST_BASE_PARSE (aacparse), ADTS_MAX_SIZE);
1504   aacparse->sent_codec_tag = FALSE;
1505   aacparse->last_parsed_channels = 0;
1506   aacparse->last_parsed_sample_rate = 0;
1507   aacparse->object_type = 0;
1508   aacparse->bitrate = 0;
1509   aacparse->header_type = DSPAAC_HEADER_NOT_PARSED;
1510   aacparse->output_header_type = DSPAAC_HEADER_NOT_PARSED;
1511   aacparse->channels = 0;
1512   aacparse->sample_rate = 0;
1513   return TRUE;
1514 }
1515
1516
1517 /**
1518  * gst_aac_parse_stop:
1519  * @parse: #GstBaseParse.
1520  *
1521  * Implementation of "stop" vmethod in #GstBaseParse class.
1522  *
1523  * Returns: TRUE is stopping succeeded.
1524  */
1525 static gboolean
1526 gst_aac_parse_stop (GstBaseParse * parse)
1527 {
1528   GST_DEBUG ("stop");
1529   return TRUE;
1530 }
1531
1532 static void
1533 remove_fields (GstCaps * caps)
1534 {
1535   guint i, n;
1536
1537   n = gst_caps_get_size (caps);
1538   for (i = 0; i < n; i++) {
1539     GstStructure *s = gst_caps_get_structure (caps, i);
1540
1541     gst_structure_remove_field (s, "framed");
1542   }
1543 }
1544
1545 static void
1546 add_conversion_fields (GstCaps * caps)
1547 {
1548   guint i, n;
1549
1550   n = gst_caps_get_size (caps);
1551   for (i = 0; i < n; i++) {
1552     GstStructure *s = gst_caps_get_structure (caps, i);
1553
1554     if (gst_structure_has_field (s, "stream-format")) {
1555       const GValue *v = gst_structure_get_value (s, "stream-format");
1556
1557       if (G_VALUE_HOLDS_STRING (v)) {
1558         const gchar *str = g_value_get_string (v);
1559
1560         if (strcmp (str, "adts") == 0 || strcmp (str, "raw") == 0) {
1561           GValue va = G_VALUE_INIT;
1562           GValue vs = G_VALUE_INIT;
1563
1564           g_value_init (&va, GST_TYPE_LIST);
1565           g_value_init (&vs, G_TYPE_STRING);
1566           g_value_set_string (&vs, "adts");
1567           gst_value_list_append_value (&va, &vs);
1568           g_value_set_string (&vs, "raw");
1569           gst_value_list_append_value (&va, &vs);
1570           gst_structure_set_value (s, "stream-format", &va);
1571           g_value_unset (&va);
1572           g_value_unset (&vs);
1573         }
1574       } else if (GST_VALUE_HOLDS_LIST (v)) {
1575         gboolean contains_raw = FALSE;
1576         gboolean contains_adts = FALSE;
1577         guint m = gst_value_list_get_size (v), j;
1578
1579         for (j = 0; j < m; j++) {
1580           const GValue *ve = gst_value_list_get_value (v, j);
1581           const gchar *str;
1582
1583           if (G_VALUE_HOLDS_STRING (ve) && (str = g_value_get_string (ve))) {
1584             if (strcmp (str, "adts") == 0)
1585               contains_adts = TRUE;
1586             else if (strcmp (str, "raw") == 0)
1587               contains_raw = TRUE;
1588           }
1589         }
1590
1591         if (contains_adts || contains_raw) {
1592           GValue va = G_VALUE_INIT;
1593           GValue vs = G_VALUE_INIT;
1594
1595           g_value_init (&va, GST_TYPE_LIST);
1596           g_value_init (&vs, G_TYPE_STRING);
1597           g_value_copy (v, &va);
1598
1599           if (!contains_raw) {
1600             g_value_set_string (&vs, "raw");
1601             gst_value_list_append_value (&va, &vs);
1602           }
1603           if (!contains_adts) {
1604             g_value_set_string (&vs, "adts");
1605             gst_value_list_append_value (&va, &vs);
1606           }
1607
1608           gst_structure_set_value (s, "stream-format", &va);
1609
1610           g_value_unset (&vs);
1611           g_value_unset (&va);
1612         }
1613       }
1614     }
1615   }
1616 }
1617
1618 static GstCaps *
1619 gst_aac_parse_sink_getcaps (GstBaseParse * parse, GstCaps * filter)
1620 {
1621   GstCaps *peercaps, *templ;
1622   GstCaps *res;
1623
1624   templ = gst_pad_get_pad_template_caps (GST_BASE_PARSE_SINK_PAD (parse));
1625
1626   if (filter) {
1627     GstCaps *fcopy = gst_caps_copy (filter);
1628     /* Remove the fields we convert */
1629     remove_fields (fcopy);
1630     add_conversion_fields (fcopy);
1631     peercaps = gst_pad_peer_query_caps (GST_BASE_PARSE_SRC_PAD (parse), fcopy);
1632     gst_caps_unref (fcopy);
1633   } else
1634     peercaps = gst_pad_peer_query_caps (GST_BASE_PARSE_SRC_PAD (parse), NULL);
1635
1636   if (peercaps) {
1637     peercaps = gst_caps_make_writable (peercaps);
1638     /* Remove the fields we convert */
1639     remove_fields (peercaps);
1640     add_conversion_fields (peercaps);
1641
1642     res = gst_caps_intersect_full (peercaps, templ, GST_CAPS_INTERSECT_FIRST);
1643     gst_caps_unref (peercaps);
1644     gst_caps_unref (templ);
1645   } else {
1646     res = templ;
1647   }
1648
1649   if (filter) {
1650     GstCaps *intersection;
1651
1652     intersection =
1653         gst_caps_intersect_full (filter, res, GST_CAPS_INTERSECT_FIRST);
1654     gst_caps_unref (res);
1655     res = intersection;
1656   }
1657
1658   return res;
1659 }
1660
1661 static gboolean
1662 gst_aac_parse_src_event (GstBaseParse * parse, GstEvent * event)
1663 {
1664   GstAacParse *aacparse = GST_AAC_PARSE (parse);
1665
1666   if (GST_EVENT_TYPE (event) == GST_EVENT_FLUSH_STOP) {
1667     aacparse->last_parsed_channels = 0;
1668     aacparse->last_parsed_sample_rate = 0;
1669   }
1670
1671   return GST_BASE_PARSE_CLASS (parent_class)->src_event (parse, event);
1672 }