1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/formats/webm/webm_cluster_parser.h"
9 #include "base/logging.h"
10 #include "base/sys_byteorder.h"
11 #include "media/base/buffers.h"
12 #include "media/base/decrypt_config.h"
13 #include "media/filters/webvtt_util.h"
14 #include "media/formats/webm/webm_constants.h"
15 #include "media/formats/webm/webm_crypto_helpers.h"
16 #include "media/formats/webm/webm_webvtt_parser.h"
20 WebMClusterParser::WebMClusterParser(
23 base::TimeDelta audio_default_duration,
25 base::TimeDelta video_default_duration,
26 const WebMTracksParser::TextTracks& text_tracks,
27 const std::set<int64>& ignored_tracks,
28 const std::string& audio_encryption_key_id,
29 const std::string& video_encryption_key_id,
31 : timecode_multiplier_(timecode_scale / 1000.0),
32 ignored_tracks_(ignored_tracks),
33 audio_encryption_key_id_(audio_encryption_key_id),
34 video_encryption_key_id_(video_encryption_key_id),
35 parser_(kWebMIdCluster, this),
36 last_block_timecode_(-1),
40 block_additional_data_size_(-1),
42 cluster_timecode_(-1),
43 cluster_start_time_(kNoTimestamp()),
44 cluster_ended_(false),
45 audio_(audio_track_num, false, audio_default_duration, log_cb),
46 video_(video_track_num, true, video_default_duration, log_cb),
47 ready_buffer_upper_bound_(kNoDecodeTimestamp()),
49 for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
50 it != text_tracks.end();
52 text_track_map_.insert(std::make_pair(
53 it->first, Track(it->first, false, kNoTimestamp(), log_cb_)));
57 WebMClusterParser::~WebMClusterParser() {}
59 void WebMClusterParser::Reset() {
60 last_block_timecode_ = -1;
61 cluster_timecode_ = -1;
62 cluster_start_time_ = kNoTimestamp();
63 cluster_ended_ = false;
68 ready_buffer_upper_bound_ = kNoDecodeTimestamp();
71 int WebMClusterParser::Parse(const uint8* buf, int size) {
72 audio_.ClearReadyBuffers();
73 video_.ClearReadyBuffers();
74 ClearTextTrackReadyBuffers();
75 ready_buffer_upper_bound_ = kNoDecodeTimestamp();
77 int result = parser_.Parse(buf, size);
80 cluster_ended_ = false;
84 cluster_ended_ = parser_.IsParsingComplete();
86 // If there were no buffers in this cluster, set the cluster start time to
87 // be the |cluster_timecode_|.
88 if (cluster_start_time_ == kNoTimestamp()) {
89 // If the cluster did not even have a |cluster_timecode_|, signal parse
91 if (cluster_timecode_ < 0)
94 cluster_start_time_ = base::TimeDelta::FromMicroseconds(
95 cluster_timecode_ * timecode_multiplier_);
98 // Reset the parser if we're done parsing so that
99 // it is ready to accept another cluster on the next
103 last_block_timecode_ = -1;
104 cluster_timecode_ = -1;
110 const WebMClusterParser::BufferQueue& WebMClusterParser::GetAudioBuffers() {
111 if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
112 UpdateReadyBuffers();
114 return audio_.ready_buffers();
117 const WebMClusterParser::BufferQueue& WebMClusterParser::GetVideoBuffers() {
118 if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
119 UpdateReadyBuffers();
121 return video_.ready_buffers();
124 const WebMClusterParser::TextBufferQueueMap&
125 WebMClusterParser::GetTextBuffers() {
126 if (ready_buffer_upper_bound_ == kNoDecodeTimestamp())
127 UpdateReadyBuffers();
129 // Translate our |text_track_map_| into |text_buffers_map_|, inserting rows in
130 // the output only for non-empty ready_buffer() queues in |text_track_map_|.
131 text_buffers_map_.clear();
132 for (TextTrackMap::const_iterator itr = text_track_map_.begin();
133 itr != text_track_map_.end();
135 const BufferQueue& text_buffers = itr->second.ready_buffers();
136 if (!text_buffers.empty())
137 text_buffers_map_.insert(std::make_pair(itr->first, text_buffers));
140 return text_buffers_map_;
143 WebMParserClient* WebMClusterParser::OnListStart(int id) {
144 if (id == kWebMIdCluster) {
145 cluster_timecode_ = -1;
146 cluster_start_time_ = kNoTimestamp();
147 } else if (id == kWebMIdBlockGroup) {
149 block_data_size_ = -1;
150 block_duration_ = -1;
151 discard_padding_ = -1;
152 discard_padding_set_ = false;
153 } else if (id == kWebMIdBlockAdditions) {
155 block_additional_data_.reset();
156 block_additional_data_size_ = -1;
162 bool WebMClusterParser::OnListEnd(int id) {
163 if (id != kWebMIdBlockGroup)
166 // Make sure the BlockGroup actually had a Block.
167 if (block_data_size_ == -1) {
168 MEDIA_LOG(log_cb_) << "Block missing from BlockGroup.";
172 bool result = ParseBlock(false, block_data_.get(), block_data_size_,
173 block_additional_data_.get(),
174 block_additional_data_size_, block_duration_,
175 discard_padding_set_ ? discard_padding_ : 0);
177 block_data_size_ = -1;
178 block_duration_ = -1;
180 block_additional_data_.reset();
181 block_additional_data_size_ = -1;
182 discard_padding_ = -1;
183 discard_padding_set_ = false;
187 bool WebMClusterParser::OnUInt(int id, int64 val) {
190 case kWebMIdTimecode:
191 dst = &cluster_timecode_;
193 case kWebMIdBlockDuration:
194 dst = &block_duration_;
196 case kWebMIdBlockAddID:
197 dst = &block_add_id_;
208 bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf,
209 int size, const uint8* additional,
210 int additional_size, int duration,
211 int64 discard_padding) {
215 // Return an error if the trackNum > 127. We just aren't
216 // going to support large track numbers right now.
217 if (!(buf[0] & 0x80)) {
218 MEDIA_LOG(log_cb_) << "TrackNumber over 127 not supported";
222 int track_num = buf[0] & 0x7f;
223 int timecode = buf[1] << 8 | buf[2];
224 int flags = buf[3] & 0xff;
225 int lacing = (flags >> 1) & 0x3;
228 MEDIA_LOG(log_cb_) << "Lacing " << lacing << " is not supported yet.";
232 // Sign extend negative timecode offsets.
233 if (timecode & 0x8000)
236 const uint8* frame_data = buf + 4;
237 int frame_size = size - (frame_data - buf);
238 return OnBlock(is_simple_block, track_num, timecode, duration, flags,
239 frame_data, frame_size, additional, additional_size,
243 bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) {
245 case kWebMIdSimpleBlock:
246 return ParseBlock(true, data, size, NULL, -1, -1, 0);
250 MEDIA_LOG(log_cb_) << "More than 1 Block in a BlockGroup is not "
254 block_data_.reset(new uint8[size]);
255 memcpy(block_data_.get(), data, size);
256 block_data_size_ = size;
259 case kWebMIdBlockAdditional: {
260 uint64 block_add_id = base::HostToNet64(block_add_id_);
261 if (block_additional_data_) {
262 // TODO(vigneshv): Technically, more than 1 BlockAdditional is allowed
263 // as per matroska spec. But for now we don't have a use case to
264 // support parsing of such files. Take a look at this again when such a
266 MEDIA_LOG(log_cb_) << "More than 1 BlockAdditional in a BlockGroup is "
270 // First 8 bytes of side_data in DecoderBuffer is the BlockAddID
271 // element's value in Big Endian format. This is done to mimic ffmpeg
272 // demuxer's behavior.
273 block_additional_data_size_ = size + sizeof(block_add_id);
274 block_additional_data_.reset(new uint8[block_additional_data_size_]);
275 memcpy(block_additional_data_.get(), &block_add_id,
276 sizeof(block_add_id));
277 memcpy(block_additional_data_.get() + 8, data, size);
280 case kWebMIdDiscardPadding: {
281 if (discard_padding_set_ || size <= 0 || size > 8)
283 discard_padding_set_ = true;
285 // Read in the big-endian integer.
286 discard_padding_ = static_cast<int8>(data[0]);
287 for (int i = 1; i < size; ++i)
288 discard_padding_ = (discard_padding_ << 8) | data[i];
297 bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
301 const uint8* data, int size,
302 const uint8* additional, int additional_size,
303 int64 discard_padding) {
305 if (cluster_timecode_ == -1) {
306 MEDIA_LOG(log_cb_) << "Got a block before cluster timecode.";
310 // TODO(acolwell): Should relative negative timecode offsets be rejected? Or
311 // only when the absolute timecode is negative? See http://crbug.com/271794
313 MEDIA_LOG(log_cb_) << "Got a block with negative timecode offset "
318 if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
320 << "Got a block with a timecode before the previous block.";
325 StreamParserBuffer::Type buffer_type = DemuxerStream::AUDIO;
326 std::string encryption_key_id;
327 if (track_num == audio_.track_num()) {
329 encryption_key_id = audio_encryption_key_id_;
330 } else if (track_num == video_.track_num()) {
332 encryption_key_id = video_encryption_key_id_;
333 buffer_type = DemuxerStream::VIDEO;
334 } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
336 } else if (Track* const text_track = FindTextTrack(track_num)) {
337 if (is_simple_block) // BlockGroup is required for WebVTT cues
339 if (block_duration < 0) // not specified
342 buffer_type = DemuxerStream::TEXT;
344 MEDIA_LOG(log_cb_) << "Unexpected track number " << track_num;
348 last_block_timecode_ = timecode;
350 base::TimeDelta timestamp = base::TimeDelta::FromMicroseconds(
351 (cluster_timecode_ + timecode) * timecode_multiplier_);
353 scoped_refptr<StreamParserBuffer> buffer;
354 if (buffer_type != DemuxerStream::TEXT) {
355 // The first bit of the flags is set when a SimpleBlock contains only
356 // keyframes. If this is a Block, then inspection of the payload is
357 // necessary to determine whether it contains a keyframe or not.
358 // http://www.matroska.org/technical/specs/index.html
360 is_simple_block ? (flags & 0x80) != 0 : track->IsKeyframe(data, size);
362 // Every encrypted Block has a signal byte and IV prepended to it. Current
363 // encrypted WebM request for comments specification is here
364 // http://wiki.webmproject.org/encryption/webm-encryption-rfc
365 scoped_ptr<DecryptConfig> decrypt_config;
367 if (!encryption_key_id.empty() &&
368 !WebMCreateDecryptConfig(
370 reinterpret_cast<const uint8*>(encryption_key_id.data()),
371 encryption_key_id.size(),
372 &decrypt_config, &data_offset)) {
376 // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
377 // type with remapped bytestream track numbers and allow multiple tracks as
378 // applicable. See https://crbug.com/341581.
379 buffer = StreamParserBuffer::CopyFrom(
380 data + data_offset, size - data_offset,
381 additional, additional_size,
382 is_keyframe, buffer_type, track_num);
385 buffer->set_decrypt_config(decrypt_config.Pass());
387 std::string id, settings, content;
388 WebMWebVTTParser::Parse(data, size, &id, &settings, &content);
390 std::vector<uint8> side_data;
391 MakeSideData(id.begin(), id.end(),
392 settings.begin(), settings.end(),
395 // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
396 // type with remapped bytestream track numbers and allow multiple tracks as
397 // applicable. See https://crbug.com/341581.
398 buffer = StreamParserBuffer::CopyFrom(
399 reinterpret_cast<const uint8*>(content.data()),
403 true, buffer_type, track_num);
406 buffer->set_timestamp(timestamp);
407 if (cluster_start_time_ == kNoTimestamp())
408 cluster_start_time_ = timestamp;
410 if (block_duration >= 0) {
411 buffer->set_duration(base::TimeDelta::FromMicroseconds(
412 block_duration * timecode_multiplier_));
414 DCHECK_NE(buffer_type, DemuxerStream::TEXT);
415 buffer->set_duration(track->default_duration());
418 if (discard_padding != 0) {
419 buffer->set_discard_padding(std::make_pair(
421 base::TimeDelta::FromMicroseconds(discard_padding / 1000)));
424 return track->AddBuffer(buffer);
427 WebMClusterParser::Track::Track(int track_num,
429 base::TimeDelta default_duration,
431 : track_num_(track_num),
433 default_duration_(default_duration),
434 estimated_next_frame_duration_(kNoTimestamp()),
436 DCHECK(default_duration_ == kNoTimestamp() ||
437 default_duration_ > base::TimeDelta());
440 WebMClusterParser::Track::~Track() {}
442 DecodeTimestamp WebMClusterParser::Track::GetReadyUpperBound() {
443 DCHECK(ready_buffers_.empty());
444 if (last_added_buffer_missing_duration_)
445 return last_added_buffer_missing_duration_->GetDecodeTimestamp();
447 return DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max());
450 void WebMClusterParser::Track::ExtractReadyBuffers(
451 const DecodeTimestamp before_timestamp) {
452 DCHECK(ready_buffers_.empty());
453 DCHECK(DecodeTimestamp() <= before_timestamp);
454 DCHECK(kNoDecodeTimestamp() != before_timestamp);
456 if (buffers_.empty())
459 if (buffers_.back()->GetDecodeTimestamp() < before_timestamp) {
460 // All of |buffers_| are ready.
461 ready_buffers_.swap(buffers_);
462 DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " All "
463 << ready_buffers_.size() << " are ready: before upper bound ts "
464 << before_timestamp.InSecondsF();
468 // Not all of |buffers_| are ready yet. Move any that are ready to
471 const scoped_refptr<StreamParserBuffer>& buffer = buffers_.front();
472 if (buffer->GetDecodeTimestamp() >= before_timestamp)
474 ready_buffers_.push_back(buffer);
475 buffers_.pop_front();
476 DCHECK(!buffers_.empty());
479 DVLOG(3) << __FUNCTION__ << " : " << track_num_ << " Only "
480 << ready_buffers_.size() << " ready, " << buffers_.size()
481 << " at or after upper bound ts " << before_timestamp.InSecondsF();
484 bool WebMClusterParser::Track::AddBuffer(
485 const scoped_refptr<StreamParserBuffer>& buffer) {
486 DVLOG(2) << "AddBuffer() : " << track_num_
487 << " ts " << buffer->timestamp().InSecondsF()
488 << " dur " << buffer->duration().InSecondsF()
489 << " kf " << buffer->IsKeyframe()
490 << " size " << buffer->data_size();
492 if (last_added_buffer_missing_duration_) {
493 base::TimeDelta derived_duration =
494 buffer->timestamp() - last_added_buffer_missing_duration_->timestamp();
495 last_added_buffer_missing_duration_->set_duration(derived_duration);
497 DVLOG(2) << "AddBuffer() : applied derived duration to held-back buffer : "
499 << last_added_buffer_missing_duration_->timestamp().InSecondsF()
501 << last_added_buffer_missing_duration_->duration().InSecondsF()
502 << " kf " << last_added_buffer_missing_duration_->IsKeyframe()
503 << " size " << last_added_buffer_missing_duration_->data_size();
504 scoped_refptr<StreamParserBuffer> updated_buffer =
505 last_added_buffer_missing_duration_;
506 last_added_buffer_missing_duration_ = NULL;
507 if (!QueueBuffer(updated_buffer))
511 if (buffer->duration() == kNoTimestamp()) {
512 last_added_buffer_missing_duration_ = buffer;
513 DVLOG(2) << "AddBuffer() : holding back buffer that is missing duration";
517 return QueueBuffer(buffer);
520 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
521 if (!last_added_buffer_missing_duration_)
524 last_added_buffer_missing_duration_->set_duration(GetDurationEstimate());
526 DVLOG(2) << "ApplyDurationEstimateIfNeeded() : new dur : "
528 << last_added_buffer_missing_duration_->timestamp().InSecondsF()
530 << last_added_buffer_missing_duration_->duration().InSecondsF()
531 << " kf " << last_added_buffer_missing_duration_->IsKeyframe()
532 << " size " << last_added_buffer_missing_duration_->data_size();
534 // Don't use the applied duration as a future estimation (don't use
535 // QueueBuffer() here.)
536 buffers_.push_back(last_added_buffer_missing_duration_);
537 last_added_buffer_missing_duration_ = NULL;
540 void WebMClusterParser::Track::ClearReadyBuffers() {
541 // Note that |buffers_| are kept and |estimated_next_frame_duration_| is not
543 ready_buffers_.clear();
546 void WebMClusterParser::Track::Reset() {
549 last_added_buffer_missing_duration_ = NULL;
552 bool WebMClusterParser::Track::IsKeyframe(const uint8* data, int size) const {
553 // For now, assume that all blocks are keyframes for datatypes other than
554 // video. This is a valid assumption for Vorbis, WebVTT, & Opus.
558 // Make sure the block is big enough for the minimal keyframe header size.
562 // The LSb of the first byte must be a 0 for a keyframe.
563 // http://tools.ietf.org/html/rfc6386 Section 19.1
564 if ((data[0] & 0x01) != 0)
567 // Verify VP8 keyframe startcode.
568 // http://tools.ietf.org/html/rfc6386 Section 19.1
569 if (data[3] != 0x9d || data[4] != 0x01 || data[5] != 0x2a)
575 bool WebMClusterParser::Track::QueueBuffer(
576 const scoped_refptr<StreamParserBuffer>& buffer) {
577 DCHECK(!last_added_buffer_missing_duration_);
579 // WebMClusterParser::OnBlock() gives MEDIA_LOG and parse error on decreasing
580 // block timecode detection within a cluster. Therefore, we should not see
582 DecodeTimestamp previous_buffers_timestamp = buffers_.empty() ?
583 DecodeTimestamp() : buffers_.back()->GetDecodeTimestamp();
584 CHECK(previous_buffers_timestamp <= buffer->GetDecodeTimestamp());
586 base::TimeDelta duration = buffer->duration();
587 if (duration < base::TimeDelta() || duration == kNoTimestamp()) {
588 MEDIA_LOG(log_cb_) << "Invalid buffer duration: " << duration.InSecondsF();
592 // The estimated frame duration is the minimum non-zero duration since the
593 // last initialization segment. The minimum is used to ensure frame durations
594 // aren't overestimated.
595 if (duration > base::TimeDelta()) {
596 if (estimated_next_frame_duration_ == kNoTimestamp()) {
597 estimated_next_frame_duration_ = duration;
599 estimated_next_frame_duration_ =
600 std::min(duration, estimated_next_frame_duration_);
604 buffers_.push_back(buffer);
608 base::TimeDelta WebMClusterParser::Track::GetDurationEstimate() {
609 base::TimeDelta duration = estimated_next_frame_duration_;
610 if (duration != kNoTimestamp()) {
611 DVLOG(3) << __FUNCTION__ << " : using estimated duration";
613 DVLOG(3) << __FUNCTION__ << " : using hardcoded default duration";
615 duration = base::TimeDelta::FromMilliseconds(
616 kDefaultVideoBufferDurationInMs);
618 duration = base::TimeDelta::FromMilliseconds(
619 kDefaultAudioBufferDurationInMs);
623 DCHECK(duration > base::TimeDelta());
624 DCHECK(duration != kNoTimestamp());
628 void WebMClusterParser::ClearTextTrackReadyBuffers() {
629 text_buffers_map_.clear();
630 for (TextTrackMap::iterator it = text_track_map_.begin();
631 it != text_track_map_.end();
633 it->second.ClearReadyBuffers();
637 void WebMClusterParser::ResetTextTracks() {
638 ClearTextTrackReadyBuffers();
639 for (TextTrackMap::iterator it = text_track_map_.begin();
640 it != text_track_map_.end();
646 void WebMClusterParser::UpdateReadyBuffers() {
647 DCHECK(ready_buffer_upper_bound_ == kNoDecodeTimestamp());
648 DCHECK(text_buffers_map_.empty());
650 if (cluster_ended_) {
651 audio_.ApplyDurationEstimateIfNeeded();
652 video_.ApplyDurationEstimateIfNeeded();
653 // Per OnBlock(), all text buffers should already have valid durations, so
654 // there is no need to call ApplyDurationEstimateIfNeeded() on text tracks
656 ready_buffer_upper_bound_ =
657 DecodeTimestamp::FromPresentationTime(base::TimeDelta::Max());
658 DCHECK(ready_buffer_upper_bound_ == audio_.GetReadyUpperBound());
659 DCHECK(ready_buffer_upper_bound_ == video_.GetReadyUpperBound());
661 ready_buffer_upper_bound_ = std::min(audio_.GetReadyUpperBound(),
662 video_.GetReadyUpperBound());
663 DCHECK(DecodeTimestamp() <= ready_buffer_upper_bound_);
664 DCHECK(kNoDecodeTimestamp() != ready_buffer_upper_bound_);
667 // Prepare each track's ready buffers for retrieval.
668 audio_.ExtractReadyBuffers(ready_buffer_upper_bound_);
669 video_.ExtractReadyBuffers(ready_buffer_upper_bound_);
670 for (TextTrackMap::iterator itr = text_track_map_.begin();
671 itr != text_track_map_.end();
673 itr->second.ExtractReadyBuffers(ready_buffer_upper_bound_);
677 WebMClusterParser::Track*
678 WebMClusterParser::FindTextTrack(int track_num) {
679 const TextTrackMap::iterator it = text_track_map_.find(track_num);
681 if (it == text_track_map_.end())