src/media/mp3/mp3_stream_parser.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/mp3/mp3_stream_parser.h"
   6
   7 #include "base/bind.h"
   8 #include "base/callback_helpers.h"
   9 #include "base/message_loop/message_loop.h"
  10 #include "media/base/bit_reader.h"
  11 #include "media/base/buffers.h"
  12 #include "media/base/stream_parser_buffer.h"
  13 #include "media/base/video_decoder_config.h"
  14 #include "net/http/http_util.h"
  15
  16 namespace media {
  17
  18 static const uint32 kMP3StartCodeMask = 0xffe00000;
  19 static const uint32 kICYStartCode = 0x49435920; // 'ICY '
  20
  21 // Arbitrary upper bound on the size of an IceCast header before it
  22 // triggers an error.
  23 static const int kMaxIcecastHeaderSize = 4096;
  24
  25 static const uint32 kID3StartCodeMask = 0xffffff00;
  26 static const uint32 kID3v1StartCode = 0x54414700; // 'TAG\0'
  27 static const int kID3v1Size = 128;
  28 static const int kID3v1ExtendedSize = 227;
  29 static const uint32 kID3v2StartCode = 0x49443300; // 'ID3\0'
  30
  31 // Map that determines which bitrate_index & channel_mode combinations
  32 // are allowed.
  33 // Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html
  34 static const bool kIsAllowed[17][4] = {
  35   { true, true, true, true },      // free
  36   { true, false, false, false },   // 32
  37   { true, false, false, false },   // 48
  38   { true, false, false, false },   // 56
  39   { true, true, true, true },      // 64
  40   { true, false, false, false },   // 80
  41   { true, true, true, true },      // 96
  42   { true, true, true, true },      // 112
  43   { true, true, true, true },      // 128
  44   { true, true, true, true },      // 160
  45   { true, true, true, true },      // 192
  46   { false, true, true, true },     // 224
  47   { false, true, true, true },     // 256
  48   { false, true, true, true },     // 320
  49   { false, true, true, true },     // 384
  50   { false, false, false, false }   // bad
  51 };
  52
  53 // Maps version and layer information in the frame header
  54 // into an index for the |kBitrateMap|.
  55 // Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html
  56 static const int kVersionLayerMap[4][4] = {
  57   // { reserved, L3, L2, L1 }
  58   { 5, 4, 4, 3 },  // MPEG 2.5
  59   { 5, 5, 5, 5 },  // reserved
  60   { 5, 4, 4, 3 },  // MPEG 2
  61   { 5, 2, 1, 0 }   // MPEG 1
  62 };
  63
  64 // Maps the bitrate index field in the header and an index
  65 // from |kVersionLayerMap| to a frame bitrate.
  66 // Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html
  67 static const int kBitrateMap[16][6] = {
  68   // { V1L1, V1L2, V1L3, V2L1, V2L2 & V2L3, reserved }
  69   { 0, 0, 0, 0, 0, 0 },
  70   { 32, 32, 32, 32, 8, 0 },
  71   { 64, 48, 40, 48, 16, 0 },
  72   { 96, 56, 48, 56, 24, 0 },
  73   { 128, 64, 56, 64, 32, 0 },
  74   { 160, 80, 64, 80, 40, 0 },
  75   { 192, 96, 80, 96, 48, 0 },
  76   { 224, 112, 96, 112, 56, 0 },
  77   { 256, 128, 112, 128, 64, 0 },
  78   { 288, 160, 128, 144, 80, 0 },
  79   { 320, 192, 160, 160, 96, 0 },
  80   { 352, 224, 192, 176, 112, 0 },
  81   { 384, 256, 224, 192, 128, 0 },
  82   { 416, 320, 256, 224, 144, 0 },
  83   { 448, 384, 320, 256, 160, 0 },
  84   { 0, 0, 0, 0, 0}
  85 };
  86
  87 // Maps the sample rate index and version fields from the frame header
  88 // to a sample rate.
  89 // Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html
  90 static const int kSampleRateMap[4][4] = {
  91   // { V2.5, reserved, V2, V1 }
  92   { 11025, 0, 22050, 44100 },
  93   { 12000, 0, 24000, 48000 },
  94   { 8000, 0, 16000, 32000 },
  95   { 0, 0, 0, 0 }
  96 };
  97
  98 // Frame header field constants.
  99 static const int kVersion2 = 2;
 100 static const int kVersionReserved = 1;
 101 static const int kVersion2_5 = 0;
 102 static const int kLayerReserved = 0;
 103 static const int kLayer1 = 3;
 104 static const int kLayer2 = 2;
 105 static const int kLayer3 = 1;
 106 static const int kBitrateFree = 0;
 107 static const int kBitrateBad = 0xf;
 108 static const int kSampleRateReserved = 3;
 109
 110 MP3StreamParser::MP3StreamParser()
 111     : state_(UNINITIALIZED),
 112       in_media_segment_(false) {
 113 }
 114
 115 MP3StreamParser::~MP3StreamParser() {}
 116
 117 void MP3StreamParser::Init(const InitCB& init_cb,
 118                            const NewConfigCB& config_cb,
 119                            const NewBuffersCB& new_buffers_cb,
 120                            const NewTextBuffersCB& text_cb,
 121                            const NeedKeyCB& need_key_cb,
 122                            const AddTextTrackCB& add_text_track_cb,
 123                            const NewMediaSegmentCB& new_segment_cb,
 124                            const base::Closure& end_of_segment_cb,
 125                            const LogCB& log_cb) {
 126   DVLOG(1) << __FUNCTION__;
 127   DCHECK_EQ(state_, UNINITIALIZED);
 128   init_cb_ = init_cb;
 129   config_cb_ = config_cb;
 130   new_buffers_cb_ = new_buffers_cb;
 131   new_segment_cb_ = new_segment_cb;
 132   end_of_segment_cb_ = end_of_segment_cb;
 133   log_cb_ = log_cb;
 134
 135   ChangeState(INITIALIZED);
 136 }
 137
 138 void MP3StreamParser::Flush() {
 139   DVLOG(1) << __FUNCTION__;
 140   DCHECK_NE(state_, UNINITIALIZED);
 141   queue_.Reset();
 142   timestamp_helper_->SetBaseTimestamp(base::TimeDelta());
 143   in_media_segment_ = false;
 144 }
 145
 146 bool MP3StreamParser::Parse(const uint8* buf, int size) {
 147   DVLOG(1) << __FUNCTION__ << "(" << size << ")";
 148   DCHECK(buf);
 149   DCHECK_GT(size, 0);
 150   DCHECK_NE(state_, UNINITIALIZED);
 151
 152   if (state_ == PARSE_ERROR)
 153     return false;
 154
 155   DCHECK_EQ(state_, INITIALIZED);
 156
 157   queue_.Push(buf, size);
 158
 159   bool end_of_segment = true;
 160   BufferQueue buffers;
 161   for (;;) {
 162     const uint8* data;
 163     int data_size;
 164     queue_.Peek(&data, &data_size);
 165
 166     if (data_size < 4)
 167       break;
 168
 169     uint32 start_code = data[0] << 24 | data[1] << 16 | data[2] << 8 | data[3];
 170     int bytes_read = 0;
 171     bool parsed_metadata = true;
 172     if ((start_code & kMP3StartCodeMask) == kMP3StartCodeMask) {
 173       bytes_read = ParseMP3Frame(data, data_size, &buffers);
 174
 175       // Only allow the current segment to end if a full frame has been parsed.
 176       end_of_segment = bytes_read > 0;
 177       parsed_metadata = false;
 178     } else if (start_code == kICYStartCode) {
 179       bytes_read = ParseIcecastHeader(data, data_size);
 180     } else if ((start_code & kID3StartCodeMask) == kID3v1StartCode) {
 181       bytes_read = ParseID3v1(data, data_size);
 182     } else if ((start_code & kID3StartCodeMask) == kID3v2StartCode) {
 183       bytes_read = ParseID3v2(data, data_size);
 184     } else {
 185       bytes_read = FindNextValidStartCode(data, data_size);
 186
 187       if (bytes_read > 0) {
 188         DVLOG(1) << "Unexpected start code 0x" << std::hex << start_code;
 189         DVLOG(1) << "SKIPPING " << bytes_read << " bytes of garbage.";
 190       }
 191     }
 192
 193     CHECK_LE(bytes_read, data_size);
 194
 195     if (bytes_read < 0) {
 196       ChangeState(PARSE_ERROR);
 197       return false;
 198     } else if (bytes_read == 0) {
 199       // Need more data.
 200       break;
 201     }
 202
 203     // Send pending buffers if we have encountered metadata.
 204     if (parsed_metadata && !buffers.empty() && !SendBuffers(&buffers, true))
 205       return false;
 206
 207     queue_.Pop(bytes_read);
 208     end_of_segment = true;
 209   }
 210
 211   if (buffers.empty())
 212     return true;
 213
 214   // Send buffers collected in this append that haven't been sent yet.
 215   return SendBuffers(&buffers, end_of_segment);
 216 }
 217
 218 void MP3StreamParser::ChangeState(State state) {
 219   DVLOG(1) << __FUNCTION__ << "() : " << state_ << " -> " << state;
 220   state_ = state;
 221 }
 222
 223 int MP3StreamParser::ParseFrameHeader(const uint8* data, int size,
 224                                       int* frame_size,
 225                                       int* sample_rate,
 226                                       ChannelLayout* channel_layout,
 227                                       int* sample_count) const {
 228   DCHECK(data);
 229   DCHECK_GE(size, 0);
 230   DCHECK(frame_size);
 231
 232   if (size < 4)
 233     return 0;
 234
 235   BitReader reader(data, size);
 236   int sync;
 237   int version;
 238   int layer;
 239   int is_protected;
 240   int bitrate_index;
 241   int sample_rate_index;
 242   int has_padding;
 243   int is_private;
 244   int channel_mode;
 245   int other_flags;
 246
 247   if (!reader.ReadBits(11, &sync) ||
 248       !reader.ReadBits(2, &version) ||
 249       !reader.ReadBits(2, &layer) ||
 250       !reader.ReadBits(1, &is_protected) ||
 251       !reader.ReadBits(4, &bitrate_index) ||
 252       !reader.ReadBits(2, &sample_rate_index) ||
 253       !reader.ReadBits(1, &has_padding) ||
 254       !reader.ReadBits(1, &is_private) ||
 255       !reader.ReadBits(2, &channel_mode) ||
 256       !reader.ReadBits(6, &other_flags)) {
 257     return -1;
 258   }
 259
 260   DVLOG(2) << "Header data :" << std::hex
 261            << " sync 0x" << sync
 262            << " version 0x" << version
 263            << " layer 0x" << layer
 264            << " bitrate_index 0x" << bitrate_index
 265            << " sample_rate_index 0x" << sample_rate_index
 266            << " channel_mode 0x" << channel_mode;
 267
 268   if (sync != 0x7ff ||
 269       version == kVersionReserved ||
 270       layer == kLayerReserved ||
 271       bitrate_index == kBitrateFree || bitrate_index == kBitrateBad ||
 272       sample_rate_index == kSampleRateReserved) {
 273     MEDIA_LOG(log_cb_) << "Invalid header data :" << std::hex
 274                        << " sync 0x" << sync
 275                        << " version 0x" << version
 276                        << " layer 0x" << layer
 277                        << " bitrate_index 0x" << bitrate_index
 278                        << " sample_rate_index 0x" << sample_rate_index
 279                        << " channel_mode 0x" << channel_mode;
 280     return -1;
 281   }
 282
 283   if (layer == kLayer2 && kIsAllowed[bitrate_index][channel_mode]) {
 284     MEDIA_LOG(log_cb_) << "Invalid (bitrate_index, channel_mode) combination :"
 285                        << std::hex
 286                        << " bitrate_index " << bitrate_index
 287                        << " channel_mode " << channel_mode;
 288     return -1;
 289   }
 290
 291   int bitrate = kBitrateMap[bitrate_index][kVersionLayerMap[version][layer]];
 292
 293   if (bitrate == 0) {
 294     MEDIA_LOG(log_cb_) << "Invalid bitrate :" << std::hex
 295                        << " version " << version
 296                        << " layer " << layer
 297                        << " bitrate_index " << bitrate_index;
 298     return -1;
 299   }
 300
 301   DVLOG(2) << " bitrate " << bitrate;
 302
 303   int frame_sample_rate = kSampleRateMap[sample_rate_index][version];
 304   if (frame_sample_rate == 0) {
 305     MEDIA_LOG(log_cb_) << "Invalid sample rate :" << std::hex
 306                        << " version " << version
 307                        << " sample_rate_index " << sample_rate_index;
 308     return -1;
 309   }
 310
 311   if (sample_rate)
 312     *sample_rate = frame_sample_rate;
 313
 314   // http://teslabs.com/openplayer/docs/docs/specs/mp3_structure2.pdf
 315   // Table 2.1.5
 316   int samples_per_frame;
 317   switch (layer) {
 318     case kLayer1:
 319       samples_per_frame = 384;
 320       break;
 321
 322     case kLayer2:
 323       samples_per_frame = 1152;
 324       break;
 325
 326     case kLayer3:
 327       if (version == kVersion2 || version == kVersion2_5)
 328         samples_per_frame = 576;
 329       else
 330         samples_per_frame = 1152;
 331       break;
 332
 333     default:
 334       return -1;
 335   }
 336
 337   if (sample_count)
 338     *sample_count = samples_per_frame;
 339
 340   // http://teslabs.com/openplayer/docs/docs/specs/mp3_structure2.pdf
 341   // Text just below Table 2.1.5.
 342   if (layer == kLayer1) {
 343     // This formulation is a slight variation on the equation below,
 344     // but has slightly different truncation characteristics to deal
 345     // with the fact that Layer 1 has 4 byte "slots" instead of single
 346     // byte ones.
 347     *frame_size = 4 * (12 * bitrate * 1000 / frame_sample_rate);
 348   } else {
 349     *frame_size =
 350         ((samples_per_frame / 8) * bitrate * 1000) / frame_sample_rate;
 351   }
 352
 353   if (has_padding)
 354     *frame_size += (layer == kLayer1) ? 4 : 1;
 355
 356   if (channel_layout) {
 357     // Map Stereo(0), Joint Stereo(1), and Dual Channel (2) to
 358     // CHANNEL_LAYOUT_STEREO and Single Channel (3) to CHANNEL_LAYOUT_MONO.
 359     *channel_layout =
 360         (channel_mode == 3) ? CHANNEL_LAYOUT_MONO : CHANNEL_LAYOUT_STEREO;
 361   }
 362
 363   return 4;
 364 }
 365
 366 int MP3StreamParser::ParseMP3Frame(const uint8* data,
 367                                    int size,
 368                                    BufferQueue* buffers) {
 369   DVLOG(2) << __FUNCTION__ << "(" << size << ")";
 370
 371   int sample_rate;
 372   ChannelLayout channel_layout;
 373   int frame_size;
 374   int sample_count;
 375   int bytes_read = ParseFrameHeader(
 376       data, size, &frame_size, &sample_rate, &channel_layout, &sample_count);
 377
 378   if (bytes_read <= 0)
 379     return bytes_read;
 380
 381   // Make sure data contains the entire frame.
 382   if (size < frame_size)
 383     return 0;
 384
 385   DVLOG(2) << " sample_rate " << sample_rate
 386            << " channel_layout " << channel_layout
 387            << " frame_size " << frame_size;
 388
 389   if (config_.IsValidConfig() &&
 390       (config_.samples_per_second() != sample_rate ||
 391        config_.channel_layout() != channel_layout)) {
 392     // Clear config data so that a config change is initiated.
 393     config_ = AudioDecoderConfig();
 394
 395     // Send all buffers associated with the previous config.
 396     if (!buffers->empty() && !SendBuffers(buffers, true))
 397       return -1;
 398   }
 399
 400   if (!config_.IsValidConfig()) {
 401     config_.Initialize(kCodecMP3, kSampleFormatF32, channel_layout,
 402                        sample_rate, NULL, 0, false, false,
 403                        base::TimeDelta(), base::TimeDelta());
 404
 405     base::TimeDelta base_timestamp;
 406     if (timestamp_helper_)
 407       base_timestamp = timestamp_helper_->GetTimestamp();
 408
 409     timestamp_helper_.reset(new AudioTimestampHelper(sample_rate));
 410     timestamp_helper_->SetBaseTimestamp(base_timestamp);
 411
 412     VideoDecoderConfig video_config;
 413     bool success = config_cb_.Run(config_, video_config);
 414
 415     if (!init_cb_.is_null())
 416       base::ResetAndReturn(&init_cb_).Run(success, kInfiniteDuration());
 417
 418     if (!success)
 419       return -1;
 420   }
 421
 422   scoped_refptr<StreamParserBuffer> buffer =
 423       StreamParserBuffer::CopyFrom(data, frame_size, true);
 424   buffer->set_timestamp(timestamp_helper_->GetTimestamp());
 425   buffer->set_duration(timestamp_helper_->GetFrameDuration(sample_count));
 426   buffers->push_back(buffer);
 427
 428   timestamp_helper_->AddFrames(sample_count);
 429
 430   return frame_size;
 431 }
 432
 433 int MP3StreamParser::ParseIcecastHeader(const uint8* data, int size) {
 434   DVLOG(1) << __FUNCTION__ << "(" << size << ")";
 435
 436   if (size < 4)
 437     return 0;
 438
 439   if (memcmp("ICY ", data, 4))
 440     return -1;
 441
 442   int locate_size = std::min(size, kMaxIcecastHeaderSize);
 443   int offset = net::HttpUtil::LocateEndOfHeaders(
 444       reinterpret_cast<const char*>(data), locate_size, 4);
 445   if (offset < 0) {
 446     if (locate_size == kMaxIcecastHeaderSize) {
 447       MEDIA_LOG(log_cb_) << "Icecast header is too large.";
 448       return -1;
 449     }
 450
 451     return 0;
 452   }
 453
 454   return offset;
 455 }
 456
 457 int MP3StreamParser::ParseID3v1(const uint8* data, int size) {
 458   DVLOG(1) << __FUNCTION__ << "(" << size << ")";
 459
 460   if (size < kID3v1Size)
 461     return 0;
 462
 463   // TODO(acolwell): Add code to actually validate ID3v1 data and
 464   // expose it as a metadata text track.
 465   return !memcmp(data, "TAG+", 4) ? kID3v1ExtendedSize : kID3v1Size;
 466 }
 467
 468 int MP3StreamParser::ParseID3v2(const uint8* data, int size) {
 469   DVLOG(1) << __FUNCTION__ << "(" << size << ")";
 470
 471   if (size < 10)
 472     return 0;
 473
 474   BitReader reader(data, size);
 475   int32 id;
 476   int version;
 477   uint8 flags;
 478   int32 id3_size;
 479
 480   if (!reader.ReadBits(24, &id) ||
 481       !reader.ReadBits(16, &version) ||
 482       !reader.ReadBits(8, &flags) ||
 483       !ParseSyncSafeInt(&reader, &id3_size)) {
 484     return -1;
 485   }
 486
 487   int32 actual_tag_size = 10 + id3_size;
 488
 489   // Increment size if 'Footer present' flag is set.
 490   if (flags & 0x10)
 491     actual_tag_size += 10;
 492
 493   // Make sure we have the entire tag.
 494   if (size < actual_tag_size)
 495     return 0;
 496
 497   // TODO(acolwell): Add code to actually validate ID3v2 data and
 498   // expose it as a metadata text track.
 499   return actual_tag_size;
 500 }
 501
 502 bool MP3StreamParser::ParseSyncSafeInt(BitReader* reader, int32* value) {
 503   *value = 0;
 504   for (int i = 0; i < 4; ++i) {
 505     uint8 tmp;
 506     if (!reader->ReadBits(1, &tmp) || tmp != 0) {
 507       MEDIA_LOG(log_cb_) << "ID3 syncsafe integer byte MSb is not 0!";
 508       return false;
 509     }
 510
 511     if (!reader->ReadBits(7, &tmp))
 512       return false;
 513
 514     *value <<= 7;
 515     *value += tmp;
 516   }
 517
 518   return true;
 519 }
 520
 521 int MP3StreamParser::FindNextValidStartCode(const uint8* data, int size) const {
 522   const uint8* start = data;
 523   const uint8* end = data + size;
 524
 525   while (start < end) {
 526     int bytes_left = end - start;
 527     const uint8* candidate_start_code =
 528         static_cast<const uint8*>(memchr(start, 0xff, bytes_left));
 529
 530     if (!candidate_start_code)
 531       return 0;
 532
 533     bool parse_header_failed = false;
 534     const uint8* sync = candidate_start_code;
 535     // Try to find 3 valid frames in a row. 3 was selected to decrease
 536     // the probability of false positives.
 537     for (int i = 0; i < 3; ++i) {
 538       int sync_size = end - sync;
 539       int frame_size;
 540       int sync_bytes = ParseFrameHeader(
 541           sync, sync_size, &frame_size, NULL, NULL, NULL);
 542
 543       if (sync_bytes == 0)
 544         return 0;
 545
 546       if (sync_bytes > 0) {
 547         DCHECK_LT(sync_bytes, sync_size);
 548
 549         // Skip over this frame so we can check the next one.
 550         sync += frame_size;
 551
 552         // Make sure the next frame starts inside the buffer.
 553         if (sync >= end)
 554           return 0;
 555       } else {
 556         DVLOG(1) << "ParseFrameHeader() " << i << " failed @" << (sync - data);
 557         parse_header_failed = true;
 558         break;
 559       }
 560     }
 561
 562     if (parse_header_failed) {
 563       // One of the frame header parses failed so |candidate_start_code|
 564       // did not point to the start of a real frame. Move |start| forward
 565       // so we can find the next candidate.
 566       start = candidate_start_code + 1;
 567       continue;
 568     }
 569
 570     return candidate_start_code - data;
 571   }
 572
 573   return 0;
 574 }
 575
 576 bool MP3StreamParser::SendBuffers(BufferQueue* buffers, bool end_of_segment) {
 577   DCHECK(!buffers->empty());
 578
 579   if (!in_media_segment_) {
 580     in_media_segment_ = true;
 581     new_segment_cb_.Run();
 582   }
 583
 584   BufferQueue empty_video_buffers;
 585   if (!new_buffers_cb_.Run(*buffers, empty_video_buffers))
 586     return false;
 587   buffers->clear();
 588
 589   if (end_of_segment) {
 590     in_media_segment_ = false;
 591     end_of_segment_cb_.Run();
 592   }
 593
 594   return true;
 595 }
 596
 597 }  // namespace media