1 // Copyright 2014 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/filters/frame_processor.h"
12 #include "base/memory/raw_ptr.h"
13 #include "media/base/stream_parser_buffer.h"
14 #include "media/base/timestamp_constants.h"
18 const int kMaxDroppedPrerollWarnings = 10;
19 const int kMaxAudioNonKeyframeWarnings = 10;
20 const int kMaxNumKeyframeTimeGreaterThanDependantWarnings = 1;
21 const int kMaxMuxedSequenceModeWarnings = 1;
22 const int kMaxSkippedEmptyFrameWarnings = 5;
23 const int kMaxPartialDiscardWarnings = 5;
24 const int kMaxDroppedFrameWarnings = 10;
26 // Helper class to capture per-track details needed by a frame processor. Some
27 // of this information may be duplicated in the short-term in the associated
28 // ChunkDemuxerStream and SourceBufferStream for a track.
29 // This parallels the MSE spec each of a SourceBuffer's Track Buffers at
30 // http://www.w3.org/TR/media-source/#track-buffers.
31 class MseTrackBuffer {
33 MseTrackBuffer(ChunkDemuxerStream* stream,
35 SourceBufferParseWarningCB parse_warning_cb);
37 MseTrackBuffer(const MseTrackBuffer&) = delete;
38 MseTrackBuffer& operator=(const MseTrackBuffer&) = delete;
42 // Get/set |last_decode_timestamp_|.
43 DecodeTimestamp last_decode_timestamp() const {
44 return last_decode_timestamp_;
46 void set_last_decode_timestamp(DecodeTimestamp timestamp) {
47 last_decode_timestamp_ = timestamp;
50 // Get/set |last_frame_duration_|.
51 base::TimeDelta last_frame_duration() const {
52 return last_frame_duration_;
54 void set_last_frame_duration(base::TimeDelta duration) {
55 last_frame_duration_ = duration;
58 // Gets |highest_presentation_timestamp_|.
59 base::TimeDelta highest_presentation_timestamp() const {
60 return highest_presentation_timestamp_;
63 // Get/set |needs_random_access_point_|.
64 bool needs_random_access_point() const {
65 return needs_random_access_point_;
67 void set_needs_random_access_point(bool needs_random_access_point) {
68 needs_random_access_point_ = needs_random_access_point;
71 DecodeTimestamp last_processed_decode_timestamp() const {
72 return last_processed_decode_timestamp_;
75 base::TimeDelta last_keyframe_presentation_timestamp() const {
76 return last_keyframe_presentation_timestamp_;
79 base::TimeDelta pending_group_start_pts() const {
80 return pending_group_start_pts_;
83 // Gets a pointer to this track's ChunkDemuxerStream.
84 ChunkDemuxerStream* stream() const { return stream_; }
86 // Unsets |last_decode_timestamp_|, unsets |last_frame_duration_|,
87 // unsets |highest_presentation_timestamp_|, and sets
88 // |needs_random_access_point_| to true.
91 // Unsets |highest_presentation_timestamp_|.
92 void ResetHighestPresentationTimestamp();
94 // If |highest_presentation_timestamp_| is unset or |timestamp| is greater
95 // than |highest_presentation_timestamp_|, sets
96 // |highest_presentation_timestamp_| to |timestamp|. Note that bidirectional
97 // prediction between coded frames can cause |timestamp| to not be
98 // monotonically increasing even though the decode timestamps are
99 // monotonically increasing.
100 void SetHighestPresentationTimestampIfIncreased(base::TimeDelta timestamp);
102 // Adds |frame| to the end of |processed_frames_|. In some SAP-Type-2
103 // conditions, may also flush any previously enqueued frames, which can fail.
104 // Returns the result of such flushing, or true if no flushing was done.
105 bool EnqueueProcessedFrame(scoped_refptr<StreamParserBuffer> frame);
107 // Appends |processed_frames_|, if not empty, to |stream_| and clears
108 // |processed_frames_|. Returns false if append failed, true otherwise.
109 // |processed_frames_| is cleared in both cases.
110 bool FlushProcessedFrames();
112 // Signals this track buffer's stream that a coded frame group is starting
113 // with |start_dts| and |start_pts|.
114 void NotifyStartOfCodedFrameGroup(DecodeTimestamp start_dts,
115 base::TimeDelta start_pts);
118 // The decode timestamp of the last coded frame appended in the current coded
119 // frame group. Initially kNoTimestamp, meaning "unset".
120 DecodeTimestamp last_decode_timestamp_;
122 // On signalling the stream of a new coded frame group start, this is reset to
123 // that start decode time. Any buffers subsequently enqueued for emission to
124 // the stream update this. This is managed separately from
125 // |last_decode_timestamp_| because |last_processed_decode_timestamp_| is not
126 // reset during Reset(), to especially be able to track the need to signal
127 // coded frame group start time for muxed post-discontinuity edge cases. See
128 // also FrameProcessor::ProcessFrame().
129 DecodeTimestamp last_processed_decode_timestamp_;
131 // On signalling the stream of a new coded frame group start, this is set to
132 // the group start PTS. If the first frame for this track in the coded frame
133 // group has a lower PTS, then this must be reset to that time. Once the first
134 // frame for this track has been queued, this is reset to kNoTimestamp. Like
135 // |last_processed_decode_timestamp_|, this is helpful for signalling an
136 // updated coded frame group start time for muxed post-discontinuity edge
137 // cases. See also FrameProcessor::ProcessFrame().
138 base::TimeDelta pending_group_start_pts_;
140 // This is kNoTimestamp if no frames have been enqueued ever or since the last
141 // NotifyStartOfCodedFrameGroup() or Reset(). Otherwise, this is the most
142 // recently enqueued keyframe's presentation timestamp.
144 // 1) to understand if the stream parser is producing random access
145 // points that are not SAP Type 1, whose support is likely going to be
146 // deprecated from MSE API pending real-world usage data, and
147 // 2) (by owning FrameProcessor) to determine if it's hit a decreasing
148 // keyframe PTS sequence when buffering by PTS intervals, such that a new
149 // coded frame group needs to be signalled.
150 base::TimeDelta last_keyframe_presentation_timestamp_;
152 // These are used to determine if more incremental flushing is needed to
153 // correctly buffer a SAP-Type-2 non-keyframe when buffering by PTS. They are
154 // updated (if necessary) in FlushProcessedFrames() and
155 // NotifyStartOfCodedFrameGroup(), and they are consulted (if necessary) in
156 // EnqueueProcessedFrame().
157 base::TimeDelta last_signalled_group_start_pts_;
158 bool have_flushed_since_last_group_start_;
160 // The coded frame duration of the last coded frame appended in the current
161 // coded frame group. Initially kNoTimestamp, meaning "unset".
162 base::TimeDelta last_frame_duration_;
164 // The highest presentation timestamp encountered in a coded frame appended
165 // in the current coded frame group. Initially kNoTimestamp, meaning
167 base::TimeDelta highest_presentation_timestamp_;
169 // Keeps track of whether the track buffer is waiting for a random access
170 // point coded frame. Initially set to true to indicate that a random access
171 // point coded frame is needed before anything can be added to the track
173 bool needs_random_access_point_;
175 // Pointer to the stream associated with this track. The stream is not owned
177 const raw_ptr<ChunkDemuxerStream, DanglingUntriaged> stream_;
179 // Queue of processed frames that have not yet been appended to |stream_|.
180 // EnqueueProcessedFrame() adds to this queue, and FlushProcessedFrames()
182 StreamParser::BufferQueue processed_frames_;
184 // MediaLog for reporting messages and properties to debug content and engine.
185 raw_ptr<MediaLog> media_log_;
187 // Callback for reporting problematic conditions that are not necessarily
189 SourceBufferParseWarningCB parse_warning_cb_;
191 // Counter that limits spam to |media_log_| for MseTrackBuffer warnings.
192 int num_keyframe_time_greater_than_dependant_warnings_ = 0;
195 MseTrackBuffer::MseTrackBuffer(ChunkDemuxerStream* stream,
197 SourceBufferParseWarningCB parse_warning_cb)
198 : last_decode_timestamp_(kNoDecodeTimestamp),
199 pending_group_start_pts_(kNoTimestamp),
200 last_keyframe_presentation_timestamp_(kNoTimestamp),
201 last_signalled_group_start_pts_(kNoTimestamp),
202 have_flushed_since_last_group_start_(false),
203 last_frame_duration_(kNoTimestamp),
204 highest_presentation_timestamp_(kNoTimestamp),
205 needs_random_access_point_(true),
207 media_log_(media_log),
208 parse_warning_cb_(std::move(parse_warning_cb)) {
210 DCHECK(parse_warning_cb_);
213 MseTrackBuffer::~MseTrackBuffer() {
214 DVLOG(2) << __func__ << "()";
217 void MseTrackBuffer::Reset() {
218 DVLOG(2) << __func__ << "()";
220 last_decode_timestamp_ = kNoDecodeTimestamp;
221 last_frame_duration_ = kNoTimestamp;
222 highest_presentation_timestamp_ = kNoTimestamp;
223 needs_random_access_point_ = true;
224 last_keyframe_presentation_timestamp_ = kNoTimestamp;
227 void MseTrackBuffer::ResetHighestPresentationTimestamp() {
228 highest_presentation_timestamp_ = kNoTimestamp;
231 void MseTrackBuffer::SetHighestPresentationTimestampIfIncreased(
232 base::TimeDelta timestamp) {
233 if (highest_presentation_timestamp_ == kNoTimestamp ||
234 timestamp > highest_presentation_timestamp_) {
235 highest_presentation_timestamp_ = timestamp;
239 bool MseTrackBuffer::EnqueueProcessedFrame(
240 scoped_refptr<StreamParserBuffer> frame) {
241 if (frame->is_key_frame()) {
242 last_keyframe_presentation_timestamp_ = frame->timestamp();
244 DCHECK(last_keyframe_presentation_timestamp_ != kNoTimestamp);
245 // This is just one case of potentially problematic GOP structures, though
246 // others are more clearly disallowed in at least some of the MSE bytestream
247 // specs, especially ISOBMFF. See https://crbug.com/739931 for more
249 if (frame->timestamp() < last_keyframe_presentation_timestamp_) {
250 if (!num_keyframe_time_greater_than_dependant_warnings_) {
251 // At most once per each track (but potentially multiple times per
252 // playback, if there are more than one tracks that exhibit this
253 // sequence in a playback) run the warning's callback.
254 DCHECK(parse_warning_cb_);
255 parse_warning_cb_.Run(
256 SourceBufferParseWarning::kKeyframeTimeGreaterThanDependant);
259 LIMITED_MEDIA_LOG(DEBUG, media_log_,
260 num_keyframe_time_greater_than_dependant_warnings_,
261 kMaxNumKeyframeTimeGreaterThanDependantWarnings)
262 << "Warning: presentation time of most recently processed random "
264 << last_keyframe_presentation_timestamp_
265 << ") is later than the presentation time of a non-keyframe ("
266 << frame->timestamp()
267 << ") that depends on it. This type of random access point is not "
268 "well supported by MSE; buffered range reporting may be less "
271 // SAP-Type-2 GOPs, by definition, contain at least one non-keyframe with
272 // PTS prior to the keyframe's PTS, with DTS continuous from keyframe
273 // forward to at least that non-keyframe. If such a non-keyframe overlaps
274 // the end of a previously buffered GOP sufficiently (such that, say, some
275 // previous GOP's non-keyframes depending on the overlapped
276 // non-keyframe(s) must be dropped), then a gap might need to result. But
277 // if we attempt to buffer the new GOP's keyframe through at least that
278 // first non-keyframe that does such overlapping all at once, the
279 // buffering mechanism doesn't expect such a discontinuity could occur
280 // (failing assumptions in places like SourceBufferRange).
282 // To prevent such failure, we can first flush what's previously been
283 // enqueued (if anything), but do this conservatively to not flush
284 // unnecessarily: we suppress such a flush if this nonkeyframe's PTS is
285 // still higher than the last coded frame group start time signalled for
286 // this track and no flush has yet occurred for this track since then, or
287 // if there has been a flush since then but this nonkeyframe's PTS is no
288 // lower than the PTS of the first frame pending flush currently.
289 if (!processed_frames_.empty()) {
290 DCHECK(kNoTimestamp != last_signalled_group_start_pts_);
292 if (!have_flushed_since_last_group_start_) {
293 if (frame->timestamp() < last_signalled_group_start_pts_) {
294 if (!FlushProcessedFrames())
298 if (frame->timestamp() < processed_frames_.front()->timestamp()) {
299 if (!FlushProcessedFrames())
307 DCHECK(pending_group_start_pts_ == kNoTimestamp ||
308 pending_group_start_pts_ <= frame->timestamp());
309 pending_group_start_pts_ = kNoTimestamp;
310 last_processed_decode_timestamp_ = frame->GetDecodeTimestamp();
311 processed_frames_.emplace_back(std::move(frame));
315 bool MseTrackBuffer::FlushProcessedFrames() {
316 if (processed_frames_.empty())
319 bool result = stream_->Append(processed_frames_);
320 processed_frames_.clear();
321 have_flushed_since_last_group_start_ = true;
323 DVLOG_IF(3, !result) << __func__
324 << "(): Failure appending processed frames to stream";
329 void MseTrackBuffer::NotifyStartOfCodedFrameGroup(DecodeTimestamp start_dts,
330 base::TimeDelta start_pts) {
331 last_keyframe_presentation_timestamp_ = kNoTimestamp;
332 last_processed_decode_timestamp_ = start_dts;
333 pending_group_start_pts_ = start_pts;
334 have_flushed_since_last_group_start_ = false;
335 last_signalled_group_start_pts_ = start_pts;
336 stream_->OnStartOfCodedFrameGroup(start_dts, start_pts);
339 FrameProcessor::FrameProcessor(UpdateDurationCB update_duration_cb,
341 : group_start_timestamp_(kNoTimestamp),
342 update_duration_cb_(std::move(update_duration_cb)),
343 media_log_(media_log) {
344 DVLOG(2) << __func__ << "()";
345 DCHECK(update_duration_cb_);
348 FrameProcessor::~FrameProcessor() {
349 DVLOG(2) << __func__ << "()";
352 void FrameProcessor::SetParseWarningCallback(
353 SourceBufferParseWarningCB parse_warning_cb) {
354 DCHECK(!parse_warning_cb_);
355 DCHECK(parse_warning_cb);
356 parse_warning_cb_ = std::move(parse_warning_cb);
359 void FrameProcessor::SetSequenceMode(bool sequence_mode) {
360 DVLOG(2) << __func__ << "(" << sequence_mode << ")";
361 // Per June 9, 2016 MSE spec editor's draft:
362 // https://rawgit.com/w3c/media-source/d8f901f22/
363 // index.html#widl-SourceBuffer-mode
364 // Step 7: If the new mode equals "sequence", then set the group start
365 // timestamp to the group end timestamp.
367 DCHECK(kNoTimestamp != group_end_timestamp_);
368 group_start_timestamp_ = group_end_timestamp_;
369 } else if (sequence_mode_) {
370 // We're switching from 'sequence' to 'segments' mode. Be safe and signal a
371 // new coded frame group on the next frame emitted.
372 pending_notify_all_group_start_ = true;
375 // Step 8: Update the attribute to new mode.
376 sequence_mode_ = sequence_mode;
379 bool FrameProcessor::ProcessFrames(
380 const StreamParser::BufferQueueMap& buffer_queue_map,
381 base::TimeDelta append_window_start,
382 base::TimeDelta append_window_end,
383 base::TimeDelta* timestamp_offset) {
384 StreamParser::BufferQueue frames;
385 if (!MergeBufferQueues(buffer_queue_map, &frames)) {
386 MEDIA_LOG(ERROR, media_log_) << "Parsed buffers not in DTS sequence";
390 DCHECK(!frames.empty());
392 if (sequence_mode_ && track_buffers_.size() > 1) {
393 if (!num_muxed_sequence_mode_warnings_) {
394 // At most once per SourceBuffer (but potentially multiple times per
395 // playback, if there are more than one SourceBuffers used this way in a
396 // playback) run the warning's callback.
397 DCHECK(parse_warning_cb_);
398 parse_warning_cb_.Run(SourceBufferParseWarning::kMuxedSequenceMode);
401 LIMITED_MEDIA_LOG(DEBUG, media_log_, num_muxed_sequence_mode_warnings_,
402 kMaxMuxedSequenceModeWarnings)
403 << "Warning: using MSE 'sequence' AppendMode for a SourceBuffer with "
404 "multiple tracks may cause loss of track synchronization. In some "
405 "cases, buffered range gaps and playback stalls can occur. It is "
406 "recommended to instead use 'segments' mode for a multitrack "
410 // Monitor |group_end_timestamp_| to detect any cases where it decreases while
411 // processing |frames| (which should all be from no more than 1 media
412 // segment), to see if (outside of mediasource fuzzers) real API usage hits
413 // this case frequently enough to potentially warrant MSE spec clarification
414 // of the last step in the coded frame processing algorithm. The previous
415 // value is not used as a baseline, since the spec would already handle that
416 // case interoperably (since we may be starting the processing of frames from
417 // a new media segment.) See https://crbug.com/920853 and
418 // https://github.com/w3c/media-source/issues/203.
419 base::TimeDelta max_group_end_timestamp = kNoTimestamp;
421 // Implements the coded frame processing algorithm's outer loop for step 1.
422 // Note that ProcessFrame() implements an inner loop for a single frame that
423 // handles "jump to the Loop Top step to restart processing of the current
424 // coded frame" per June 9, 2016 MSE spec editor's draft:
425 // https://rawgit.com/w3c/media-source/d8f901f22/
426 // index.html#sourcebuffer-coded-frame-processing
427 // 1. For each coded frame in the media segment run the following steps:
428 for (const auto& frame : frames) {
429 // Skip any 0-byte audio or video buffers, since they cannot produce any
430 // valid decode output (and are rejected by FFmpeg A/V decode.)
431 if (!frame->data_size()) {
432 LIMITED_MEDIA_LOG(DEBUG, media_log_, num_skipped_empty_frame_warnings_,
433 kMaxSkippedEmptyFrameWarnings)
434 << "Discarding empty audio or video coded frame, PTS="
435 << frame->timestamp().InMicroseconds()
436 << "us, DTS=" << frame->GetDecodeTimestamp().InMicroseconds() << "us";
440 if (!ProcessFrame(frame, append_window_start, append_window_end,
442 FlushProcessedFrames();
446 max_group_end_timestamp =
447 std::max(group_end_timestamp_, max_group_end_timestamp);
450 if (!FlushProcessedFrames())
453 // 2. - 4. Are handled by the WebMediaPlayer / Pipeline / Media Element.
455 // 5. If the media segment contains data beyond the current duration, then run
456 // the duration change algorithm with new duration set to the maximum of
457 // the current duration and the group end timestamp.
458 if (max_group_end_timestamp > group_end_timestamp_) {
459 // Log a parse warning. For now at least, we don't also log this to
461 DCHECK(parse_warning_cb_);
462 parse_warning_cb_.Run(
463 SourceBufferParseWarning::kGroupEndTimestampDecreaseWithinMediaSegment);
465 update_duration_cb_.Run(group_end_timestamp_);
470 void FrameProcessor::SetGroupStartTimestampIfInSequenceMode(
471 base::TimeDelta timestamp_offset) {
472 DVLOG(2) << __func__ << "(" << timestamp_offset.InMicroseconds() << "us)";
473 DCHECK(kNoTimestamp != timestamp_offset);
475 group_start_timestamp_ = timestamp_offset;
477 // Changes to timestampOffset should invalidate the preroll buffer.
478 audio_preroll_buffer_.reset();
481 bool FrameProcessor::AddTrack(StreamParser::TrackId id,
482 ChunkDemuxerStream* stream) {
483 DVLOG(2) << __func__ << "(): id=" << id;
485 MseTrackBuffer* existing_track = FindTrack(id);
486 DCHECK(!existing_track);
487 if (existing_track) {
488 MEDIA_LOG(ERROR, media_log_) << "Failure adding track with duplicate ID "
494 std::make_unique<MseTrackBuffer>(stream, media_log_, parse_warning_cb_);
498 bool FrameProcessor::UpdateTrackIds(const TrackIdChanges& track_id_changes) {
499 TrackBuffersMap& old_track_buffers = track_buffers_;
500 TrackBuffersMap new_track_buffers;
502 for (const auto& ids : track_id_changes) {
503 if (old_track_buffers.find(ids.first) == old_track_buffers.end() ||
504 new_track_buffers.find(ids.second) != new_track_buffers.end()) {
505 MEDIA_LOG(ERROR, media_log_) << "Failure updating track id from "
506 << ids.first << " to " << ids.second;
509 new_track_buffers[ids.second] = std::move(old_track_buffers[ids.first]);
510 CHECK_EQ(1u, old_track_buffers.erase(ids.first));
513 // Process remaining track buffers with unchanged ids.
514 for (const auto& t : old_track_buffers) {
515 if (new_track_buffers.find(t.first) != new_track_buffers.end()) {
516 MEDIA_LOG(ERROR, media_log_) << "Track id " << t.first << " conflict";
519 new_track_buffers[t.first] = std::move(old_track_buffers[t.first]);
522 std::swap(track_buffers_, new_track_buffers);
526 void FrameProcessor::SetAllTrackBuffersNeedRandomAccessPoint() {
527 for (auto itr = track_buffers_.begin(); itr != track_buffers_.end(); ++itr) {
528 itr->second->set_needs_random_access_point(true);
532 void FrameProcessor::Reset() {
533 DVLOG(2) << __func__ << "()";
534 for (auto itr = track_buffers_.begin(); itr != track_buffers_.end(); ++itr) {
535 itr->second->Reset();
538 // Maintain current |pending_notify_all_group_start_| state for Reset() during
539 // sequence mode. Reset it here only if in segments mode. In sequence mode,
540 // the current coded frame group may be continued across Reset() operations to
541 // allow the stream to coalesce what might otherwise be gaps in the buffered
542 // ranges. See also the declaration for |pending_notify_all_group_start_|.
543 if (!sequence_mode_) {
544 pending_notify_all_group_start_ = true;
549 DCHECK(kNoTimestamp != group_end_timestamp_);
550 group_start_timestamp_ = group_end_timestamp_;
553 void FrameProcessor::OnPossibleAudioConfigUpdate(
554 const AudioDecoderConfig& config) {
555 DCHECK(config.IsValidConfig());
557 // Always clear the preroll buffer when a config update is received.
558 audio_preroll_buffer_.reset();
560 if (config.Matches(current_audio_config_))
563 current_audio_config_ = config;
565 base::Seconds(1.0 / current_audio_config_.samples_per_second());
566 has_dependent_audio_frames_ =
567 current_audio_config_.profile() == AudioCodecProfile::kXHE_AAC;
568 last_audio_pts_for_nonkeyframe_monotonicity_check_ = kNoTimestamp;
571 MseTrackBuffer* FrameProcessor::FindTrack(StreamParser::TrackId id) {
572 auto itr = track_buffers_.find(id);
573 if (itr == track_buffers_.end())
576 return itr->second.get();
579 void FrameProcessor::NotifyStartOfCodedFrameGroup(DecodeTimestamp start_dts,
580 base::TimeDelta start_pts) {
581 DVLOG(2) << __func__ << "(dts " << start_dts.InMicroseconds() << "us, pts "
582 << start_pts.InMicroseconds() << "us)";
584 for (auto itr = track_buffers_.begin(); itr != track_buffers_.end(); ++itr) {
585 itr->second->NotifyStartOfCodedFrameGroup(start_dts, start_pts);
589 bool FrameProcessor::FlushProcessedFrames() {
590 DVLOG(2) << __func__ << "()";
593 for (auto itr = track_buffers_.begin(); itr != track_buffers_.end(); ++itr) {
594 if (!itr->second->FlushProcessedFrames())
601 bool FrameProcessor::HandlePartialAppendWindowTrimming(
602 base::TimeDelta append_window_start,
603 base::TimeDelta append_window_end,
604 scoped_refptr<StreamParserBuffer> buffer) {
605 DCHECK(buffer->duration() >= base::TimeDelta());
606 DCHECK_EQ(DemuxerStream::AUDIO, buffer->type());
607 DCHECK(has_dependent_audio_frames_ || buffer->is_key_frame());
609 const base::TimeDelta frame_end_timestamp =
610 buffer->timestamp() + buffer->duration();
612 // If the buffer is entirely before |append_window_start|, save it as preroll
613 // for the first buffer which overlaps |append_window_start|.
614 if (buffer->timestamp() < append_window_start &&
615 frame_end_timestamp <= append_window_start) {
616 // But if the buffer is not a keyframe, do not use it for preroll, nor use
617 // any previous preroll buffer for simplicity here.
618 if (has_dependent_audio_frames_ && !buffer->is_key_frame()) {
619 audio_preroll_buffer_.reset();
621 audio_preroll_buffer_ = std::move(buffer);
626 // If the buffer is entirely after |append_window_end| there's nothing to do.
627 if (buffer->timestamp() >= append_window_end)
630 DCHECK(buffer->timestamp() >= append_window_start ||
631 frame_end_timestamp > append_window_start);
633 bool processed_buffer = false;
635 // If we have a preroll buffer see if we can attach it to the first buffer
636 // overlapping or after |append_window_start|.
637 if (audio_preroll_buffer_) {
638 // We only want to use the preroll buffer if it directly precedes (less
639 // than one sample apart) the current buffer.
640 const int64_t delta =
641 (audio_preroll_buffer_->timestamp() +
642 audio_preroll_buffer_->duration() - buffer->timestamp())
644 if (std::abs(delta) < sample_duration_.InMicroseconds() &&
645 audio_preroll_buffer_->timestamp() <= buffer->timestamp()) {
646 DVLOG(1) << "Attaching audio preroll buffer ["
647 << audio_preroll_buffer_->timestamp().InMicroseconds() << "us, "
648 << (audio_preroll_buffer_->timestamp() +
649 audio_preroll_buffer_->duration())
651 << "us) to " << buffer->timestamp().InMicroseconds() << "us";
652 buffer->SetPrerollBuffer(std::move(audio_preroll_buffer_));
653 processed_buffer = true;
655 LIMITED_MEDIA_LOG(DEBUG, media_log_, num_dropped_preroll_warnings_,
656 kMaxDroppedPrerollWarnings)
657 << "Partial append window trimming dropping unused audio preroll "
659 << audio_preroll_buffer_->timestamp().InMicroseconds()
660 << "us that ends too far (" << delta
661 << "us) from next buffer with PTS "
662 << buffer->timestamp().InMicroseconds() << "us";
663 audio_preroll_buffer_.reset();
667 // See if a partial discard can be done around |append_window_start|.
668 if (buffer->timestamp() < append_window_start) {
669 LIMITED_MEDIA_LOG(INFO, media_log_, num_partial_discard_warnings_,
670 kMaxPartialDiscardWarnings)
671 << "Truncating audio buffer which overlaps append window start."
672 << " PTS " << buffer->timestamp().InMicroseconds()
673 << "us frame_end_timestamp " << frame_end_timestamp.InMicroseconds()
674 << "us append_window_start " << append_window_start.InMicroseconds()
677 // Mark the overlapping portion of the buffer for discard.
678 // TODO(wolenetz): Is this correct to ignore any pre-existing discard
679 // padding (e.g. WebM discard padding)? See https://crbug.com/969195.
680 buffer->set_discard_padding(std::make_pair(
681 append_window_start - buffer->timestamp(), base::TimeDelta()));
683 // Adjust the timestamp of this buffer forward to |append_window_start| and
684 // decrease the duration to compensate. Adjust DTS by the same delta as PTS
685 // to help prevent spurious discontinuities when DTS > PTS.
686 base::TimeDelta pts_delta = append_window_start - buffer->timestamp();
687 buffer->set_timestamp(append_window_start);
688 buffer->SetDecodeTimestamp(buffer->GetDecodeTimestamp() + pts_delta);
689 buffer->set_duration(frame_end_timestamp - append_window_start);
690 processed_buffer = true;
693 // See if a partial discard can be done around |append_window_end|.
694 if (frame_end_timestamp > append_window_end) {
695 LIMITED_MEDIA_LOG(INFO, media_log_, num_partial_discard_warnings_,
696 kMaxPartialDiscardWarnings)
697 << "Truncating audio buffer which overlaps append window end."
698 << " PTS " << buffer->timestamp().InMicroseconds()
699 << "us frame_end_timestamp " << frame_end_timestamp.InMicroseconds()
700 << "us append_window_end " << append_window_end.InMicroseconds() << "us"
701 << (buffer->is_duration_estimated() ? " (frame duration is estimated)"
704 // Mark the overlapping portion of the buffer for discard.
705 // TODO(wolenetz): Is this correct to ignore any pre-existing discard
706 // padding (e.g. WebM discard padding)? See https://crbug.com/969195.
707 buffer->set_discard_padding(
708 std::make_pair(buffer->discard_padding().first,
709 frame_end_timestamp - append_window_end));
711 // Decrease the duration of the buffer to remove the discarded portion.
712 buffer->set_duration(append_window_end - buffer->timestamp());
713 processed_buffer = true;
716 return processed_buffer;
719 bool FrameProcessor::CheckAudioPresentationOrder(
720 const StreamParserBuffer& frame,
721 bool track_buffer_needs_random_access_point) {
722 DCHECK_EQ(DemuxerStream::AUDIO, frame.type());
723 DCHECK(has_dependent_audio_frames_);
724 if (frame.is_key_frame()) {
725 // Audio keyframes trivially succeed here. They start a new PTS baseline for
726 // the purpose of the checks in this method.
727 last_audio_pts_for_nonkeyframe_monotonicity_check_ = frame.timestamp();
730 if (track_buffer_needs_random_access_point) {
731 // This nonkeyframe trivially succeeds here, though it will not be buffered
732 // later in the caller since a keyframe is required first.
733 last_audio_pts_for_nonkeyframe_monotonicity_check_ = kNoTimestamp;
737 // We're not waiting for a random access point, so we must have a valid PTS
739 DCHECK_NE(kNoTimestamp, last_audio_pts_for_nonkeyframe_monotonicity_check_);
741 if (frame.timestamp() >= last_audio_pts_for_nonkeyframe_monotonicity_check_) {
742 last_audio_pts_for_nonkeyframe_monotonicity_check_ = frame.timestamp();
746 last_audio_pts_for_nonkeyframe_monotonicity_check_ = kNoTimestamp;
747 return false; // Caller should fail parse in this case.
750 bool FrameProcessor::ProcessFrame(scoped_refptr<StreamParserBuffer> frame,
751 base::TimeDelta append_window_start,
752 base::TimeDelta append_window_end,
753 base::TimeDelta* timestamp_offset) {
754 // Implements the loop within step 1 of the coded frame processing algorithm
755 // for a single input frame per June 9, 2016 MSE spec editor's draft:
756 // https://rawgit.com/w3c/media-source/d8f901f22/
757 // index.html#sourcebuffer-coded-frame-processing
760 // Otherwise case: (See also SourceBufferState::OnNewBuffer's conditional
761 // modification of timestamp_offset after frame processing returns, when
762 // generate_timestamps_flag is true).
763 // 1.1. Let presentation timestamp be a double precision floating point
764 // representation of the coded frame's presentation timestamp in
766 // 1.2. Let decode timestamp be a double precision floating point
767 // representation of the coded frame's decode timestamp in seconds.
768 // 2. Let frame duration be a double precision floating point representation
769 // of the coded frame's duration in seconds.
770 // We use base::TimeDelta and DecodeTimestamp instead of double.
771 base::TimeDelta presentation_timestamp = frame->timestamp();
772 DecodeTimestamp decode_timestamp = frame->GetDecodeTimestamp();
773 base::TimeDelta frame_duration = frame->duration();
775 DVLOG(3) << __func__ << ": Processing frame Type=" << frame->type()
776 << ", TrackID=" << frame->track_id()
777 << ", PTS=" << presentation_timestamp.InMicroseconds()
778 << "us, DTS=" << decode_timestamp.InMicroseconds()
779 << "us, DUR=" << frame_duration.InMicroseconds()
780 << "us, RAP=" << frame->is_key_frame();
782 // Buffering, splicing, append window trimming, etc., all depend on the
783 // assumption that all audio coded frames are key frames. Metadata in the
784 // bytestream may not indicate that, so we need to enforce that assumption
785 // here with a warning log.
786 if (frame->type() == DemuxerStream::AUDIO && !has_dependent_audio_frames_ &&
787 !frame->is_key_frame()) {
788 LIMITED_MEDIA_LOG(DEBUG, media_log_, num_audio_non_keyframe_warnings_,
789 kMaxAudioNonKeyframeWarnings)
790 << "Bytestream with audio frame PTS "
791 << presentation_timestamp.InMicroseconds() << "us and DTS "
792 << decode_timestamp.InMicroseconds()
793 << "us indicated the frame is not a random access point (key frame). "
794 "All audio frames are expected to be key frames for the current "
796 frame->set_is_key_frame(true);
799 // Sanity check the timestamps.
800 if (presentation_timestamp == kNoTimestamp) {
801 MEDIA_LOG(ERROR, media_log_) << "Unknown PTS for " << frame->GetTypeName()
806 // StreamParserBuffer's GetDecodeTimestamp() shouldn't return
807 // kNoDecodeTimestamp if we already found the frame's PTS was kNoTimestamp
808 // and failed processing.
809 DCHECK(decode_timestamp != kNoDecodeTimestamp);
811 if (presentation_timestamp.is_inf()) {
812 MEDIA_LOG(ERROR, media_log_)
813 << "Before adjusting by timestampOffset, PTS for "
814 << frame->GetTypeName()
815 << " frame exceeds range allowed by implementation";
819 if (decode_timestamp.is_inf()) {
820 MEDIA_LOG(ERROR, media_log_)
821 << "Before adjusting by timestampOffset, DTS for "
822 << frame->GetTypeName()
823 << " frame exceeds range allowed by implementation";
827 // TODO(wolenetz): Determine whether any DTS>PTS logging is needed. See
828 // http://crbug.com/354518.
829 DVLOG_IF(2, decode_timestamp.ToPresentationTime() > presentation_timestamp)
830 << __func__ << ": WARNING: Frame DTS("
831 << decode_timestamp.InMicroseconds() << "us) > PTS("
832 << presentation_timestamp.InMicroseconds()
833 << "us), frame type=" << frame->GetTypeName();
835 // All stream parsers should emit valid (non-negative) frame durations.
836 // Note that duration of 0 can occur for at least WebM alt-ref frames.
837 if (frame_duration == kNoTimestamp) {
838 MEDIA_LOG(ERROR, media_log_)
839 << "Unknown duration for " << frame->GetTypeName() << " frame at PTS "
840 << presentation_timestamp.InMicroseconds() << "us";
844 // See also partial protections in DecoderBuffer::set_duration().
845 // Using stronger CHECK here in case any of the parsers become fragile to
846 // fuzzer coverage gaps when calculating buffer durations.
847 CHECK(frame_duration >= base::TimeDelta() &&
848 frame_duration != kInfiniteDuration);
850 // 3. If mode equals "sequence" and group start timestamp is set, then run
851 // the following steps:
852 if (sequence_mode_ && group_start_timestamp_ != kNoTimestamp) {
853 // 3.1. Set timestampOffset equal to group start timestamp -
854 // presentation timestamp.
855 if (group_start_timestamp_.is_inf()) {
856 // +Infinity may be set when app sets timestampOffset. We emit error in
857 // such case upon next potential use of that offset here.
858 DCHECK(group_start_timestamp_ == kInfiniteDuration);
859 MEDIA_LOG(ERROR, media_log_)
860 << "Sequence mode timestampOffset update prevented by a group "
861 "start timestamp that exceeds range allowed by implementation";
865 *timestamp_offset = group_start_timestamp_ - presentation_timestamp;
866 if (timestamp_offset->is_inf()) {
867 MEDIA_LOG(ERROR, media_log_)
868 << "Sequence mode timestampOffset update resulted in an offset "
869 "that exceeds range allowed by implementation";
873 DVLOG(3) << __func__ << ": updated timestampOffset is now "
874 << timestamp_offset->InMicroseconds() << "us";
876 // 3.2. Set group end timestamp equal to group start timestamp.
877 group_end_timestamp_ = group_start_timestamp_;
879 // 3.3. Set the need random access point flag on all track buffers to
881 SetAllTrackBuffersNeedRandomAccessPoint();
883 // Remember to signal a new coded frame group. Note, this may introduce
884 // gaps on large jumps forwards in sequence mode.
885 pending_notify_all_group_start_ = true;
887 // 3.4. Unset group start timestamp.
888 group_start_timestamp_ = kNoTimestamp;
891 // 4. If timestampOffset is not 0, then run the following steps:
892 if (!timestamp_offset->is_zero()) {
893 if (timestamp_offset->is_inf()) {
894 // This condition might occur if the app set timestampOffset while in
895 // 'segments' append mode, skipping the 'sequence' mode offset update
897 MEDIA_LOG(ERROR, media_log_)
898 << "timestampOffset exceeds range allowed by implementation";
902 // 4.1. Add timestampOffset to the presentation timestamp.
903 // Note: |frame| PTS is only updated if it survives discontinuity
905 presentation_timestamp += *timestamp_offset;
906 if (presentation_timestamp.is_inf()) {
907 MEDIA_LOG(ERROR, media_log_)
908 << "After adjusting by timestampOffset, PTS for "
909 << frame->GetTypeName()
910 << " frame exceeds range allowed by implementation";
914 // 4.2. Add timestampOffset to the decode timestamp.
915 // Frame DTS is only updated if it survives discontinuity processing.
916 decode_timestamp += *timestamp_offset;
917 if (decode_timestamp.is_inf()) {
918 MEDIA_LOG(ERROR, media_log_)
919 << "After adjusting by timestampOffset, DTS for "
920 << frame->GetTypeName()
921 << " frame exceeds range allowed by implementation";
926 // 5. Let track buffer equal the track buffer that the coded frame will be
928 StreamParser::TrackId track_id = frame->track_id();
929 MseTrackBuffer* track_buffer = FindTrack(track_id);
931 MEDIA_LOG(ERROR, media_log_)
932 << "Unknown track with type " << frame->GetTypeName()
933 << ", frame processor track id " << track_id
934 << ", and parser track id " << frame->track_id();
937 if (frame->type() != track_buffer->stream()->type()) {
938 MEDIA_LOG(ERROR, media_log_) << "Frame type " << frame->GetTypeName()
939 << " doesn't match track buffer type "
940 << track_buffer->stream()->type();
944 // 6. If last decode timestamp for track buffer is set and decode timestamp
945 // is less than last decode timestamp
947 // If last decode timestamp for track buffer is set and the difference
948 // between decode timestamp and last decode timestamp is greater than 2
949 // times last frame duration:
950 DecodeTimestamp track_last_decode_timestamp =
951 track_buffer->last_decode_timestamp();
952 if (track_last_decode_timestamp != kNoDecodeTimestamp) {
953 base::TimeDelta track_dts_delta =
954 decode_timestamp - track_last_decode_timestamp;
955 if (track_dts_delta.is_negative() ||
956 track_dts_delta > 2 * track_buffer->last_frame_duration()) {
957 // 6.1. If mode equals "segments": Set group end timestamp to
958 // presentation timestamp.
959 // If mode equals "sequence": Set group start timestamp equal to
960 // the group end timestamp.
961 if (!sequence_mode_) {
962 group_end_timestamp_ = presentation_timestamp;
963 // This triggers a discontinuity so we need to treat the next frames
964 // appended within the append window as if they were the beginning of
965 // a new coded frame group. |pending_notify_all_group_start_| is reset
966 // in Reset(), below, for "segments" mode.
968 DVLOG(3) << __func__ << " : Sequence mode discontinuity, GETS: "
969 << group_end_timestamp_.InMicroseconds() << "us";
970 // Reset(), below, performs the "Set group start timestamp equal to
971 // the group end timestamp" operation for "sequence" mode.
977 // 6.6. Jump to the Loop Top step above to restart processing of the
978 // current coded frame.
979 DVLOG(3) << __func__ << ": Discontinuity: reprocessing frame";
984 // 7. Let frame end timestamp equal the sum of presentation timestamp and
986 base::TimeDelta frame_end_timestamp =
987 presentation_timestamp + frame_duration;
989 // 8. If presentation timestamp is less than appendWindowStart, then set
990 // the need random access point flag to true, drop the coded frame, and
991 // jump to the top of the loop to start processing the next coded
993 // Note: We keep the result of partial discard of a buffer that overlaps
994 // |append_window_start| and does not end after |append_window_end|,
995 // for streams which support partial trimming.
996 // 9. If frame end timestamp is greater than appendWindowEnd, then set the
997 // need random access point flag to true, drop the coded frame, and jump
998 // to the top of the loop to start processing the next coded frame.
999 // Note: We keep the result of partial discard of a buffer that overlaps
1000 // |append_window_end|, for streams which support partial trimming.
1001 frame->set_timestamp(presentation_timestamp);
1002 frame->SetDecodeTimestamp(decode_timestamp);
1004 if (has_dependent_audio_frames_ && frame->type() == DemuxerStream::AUDIO &&
1005 !CheckAudioPresentationOrder(
1006 *frame, track_buffer->needs_random_access_point())) {
1007 MEDIA_LOG(ERROR, media_log_)
1008 << "Dependent audio frame with invalid decreasing presentation "
1009 "timestamp detected.";
1013 // Attempt to trim audio exactly to fit the append window.
1014 if (frame->type() == DemuxerStream::AUDIO &&
1015 (frame->is_key_frame() || !track_buffer->needs_random_access_point()) &&
1016 HandlePartialAppendWindowTrimming(append_window_start,
1017 append_window_end, frame)) {
1018 // |frame| has been partially trimmed or had preroll added. Though
1019 // |frame|'s duration may have changed, do not update |frame_duration|
1020 // here, so |track_buffer|'s last frame duration update uses original
1021 // frame duration and reduces spurious discontinuity detection.
1022 decode_timestamp = frame->GetDecodeTimestamp();
1023 presentation_timestamp = frame->timestamp();
1024 frame_end_timestamp = frame->timestamp() + frame->duration();
1027 if (frame_end_timestamp.is_inf()) {
1028 MEDIA_LOG(ERROR, media_log_)
1029 << "Frame end timestamp for " << frame->GetTypeName()
1030 << " frame exceeds range allowed by implementation";
1034 if (presentation_timestamp < append_window_start ||
1035 frame_end_timestamp > append_window_end) {
1036 track_buffer->set_needs_random_access_point(true);
1038 LIMITED_MEDIA_LOG(INFO, media_log_, num_dropped_frame_warnings_,
1039 kMaxDroppedFrameWarnings)
1040 << "Dropping " << frame->GetTypeName() << " frame (DTS "
1041 << decode_timestamp.InMicroseconds() << "us PTS "
1042 << presentation_timestamp.InMicroseconds() << "us,"
1043 << frame_end_timestamp.InMicroseconds()
1044 << "us) that is outside append window ["
1045 << append_window_start.InMicroseconds() << "us,"
1046 << append_window_end.InMicroseconds() << "us).";
1050 DCHECK(presentation_timestamp >= base::TimeDelta());
1052 // 10. If the need random access point flag on track buffer equals true,
1053 // then run the following steps:
1054 if (track_buffer->needs_random_access_point()) {
1055 // 10.1. If the coded frame is not a random access point, then drop the
1056 // coded frame and jump to the top of the loop to start processing
1057 // the next coded frame.
1058 if (!frame->is_key_frame()) {
1059 DVLOG(3) << __func__
1060 << ": Dropping frame that is not a random access point";
1064 // 10.2. Set the need random access point flag on track buffer to false.
1065 track_buffer->set_needs_random_access_point(false);
1068 // We now have a processed buffer to append to the track buffer's stream.
1069 // If it is the first in a new coded frame group (such as following a
1070 // segments append mode discontinuity, or following a switch to segments
1071 // append mode from sequence append mode), notify all the track buffers
1072 // that a coded frame group is starting.
1073 bool signal_new_cfg = pending_notify_all_group_start_;
1075 // In muxed multi-track streams, it may occur that we already signaled a new
1076 // coded frame group (CFG) upon detecting a discontinuity in trackA, only to
1077 // now find that frames in trackB actually have an earlier timestamp. If
1078 // this is detected using last_processed_decode_timestamp() (which persists
1079 // across DTS-based discontinuity detection in sequence mode, and which
1080 // contains either the last processed DTS or last signalled CFG DTS for
1081 // trackB), re-signal trackB that a CFG is starting with its new earlier
1082 // DTS. Similarly, if this is detected using pending_group_start_pts()
1083 // (which is !kNoTimestamp only when the track hasn't yet been given the
1084 // first buffer in the CFG, and if so, it's the expected PTS start of that
1085 // CFG), re-signal trackB that a CFG is starting with its new earlier PTS.
1086 // Avoid re-signalling trackA, as it has already started processing frames
1089 track_buffer->last_processed_decode_timestamp() > decode_timestamp ||
1090 (track_buffer->pending_group_start_pts() != kNoTimestamp &&
1091 track_buffer->pending_group_start_pts() > presentation_timestamp);
1093 if (frame->is_key_frame()) {
1094 // When a keyframe is discovered to have a decreasing PTS versus the
1095 // previous highest presentation timestamp for that track in the current
1096 // coded frame group, signal a new coded frame group for that track buffer
1097 // so that it can correctly process overlap-removals for the new GOP.
1098 if (track_buffer->highest_presentation_timestamp() != kNoTimestamp &&
1099 track_buffer->highest_presentation_timestamp() >
1100 presentation_timestamp) {
1101 signal_new_cfg = true;
1102 // In case there is currently a decreasing keyframe PTS relative to the
1103 // track buffer's highest PTS, that is later followed by a jump forward
1104 // requiring overlap removal of media prior to the track buffer's
1105 // highest PTS, reset that tracking now to ensure correctness of
1106 // signalling the need for such overlap removal later.
1107 track_buffer->ResetHighestPresentationTimestamp();
1110 // When an otherwise continuous coded frame group (by DTS, and with
1111 // non-decreasing keyframe PTS) contains a keyframe with PTS in the future
1112 // significantly far enough that it may be outside of buffering fudge
1113 // room, signal a new coded frame group with start time set to the
1114 // previous highest frame end time in the coded frame group for this
1115 // track. This lets the stream coalesce a potential gap, and also pass
1116 // internal buffer adjacency checks.
1118 track_buffer->highest_presentation_timestamp() != kNoTimestamp &&
1119 track_buffer->highest_presentation_timestamp() + frame->duration() <
1120 presentation_timestamp;
1123 if (signal_new_cfg) {
1124 DCHECK(frame->is_key_frame());
1126 // First, complete the append to track buffer streams of the previous
1127 // coded frame group's frames, if any.
1128 if (!FlushProcessedFrames())
1131 if (pending_notify_all_group_start_) {
1132 NotifyStartOfCodedFrameGroup(decode_timestamp, presentation_timestamp);
1133 pending_notify_all_group_start_ = false;
1135 DecodeTimestamp updated_dts = std::min(
1136 track_buffer->last_processed_decode_timestamp(), decode_timestamp);
1137 base::TimeDelta updated_pts = track_buffer->pending_group_start_pts();
1138 if (updated_pts == kNoTimestamp &&
1139 track_buffer->highest_presentation_timestamp() != kNoTimestamp &&
1140 track_buffer->highest_presentation_timestamp() <
1141 presentation_timestamp) {
1142 updated_pts = track_buffer->highest_presentation_timestamp();
1144 if (updated_pts == kNoTimestamp || updated_pts > presentation_timestamp)
1145 updated_pts = presentation_timestamp;
1146 track_buffer->NotifyStartOfCodedFrameGroup(updated_dts, updated_pts);
1150 DVLOG(3) << __func__ << ": Enqueueing processed frame "
1151 << "PTS=" << presentation_timestamp.InMicroseconds()
1152 << "us, DTS=" << decode_timestamp.InMicroseconds() << "us";
1154 // Steps 11-16: Note, we optimize by appending groups of contiguous
1155 // processed frames for each track buffer at end of ProcessFrames() or prior
1156 // to signalling coded frame group starts.
1157 if (!track_buffer->EnqueueProcessedFrame(std::move(frame)))
1160 // 17. Set last decode timestamp for track buffer to decode timestamp.
1161 track_buffer->set_last_decode_timestamp(decode_timestamp);
1163 // 18. Set last frame duration for track buffer to frame duration.
1164 track_buffer->set_last_frame_duration(frame_duration);
1166 // 19. If highest presentation timestamp for track buffer is unset or frame
1167 // end timestamp is greater than highest presentation timestamp, then
1168 // set highest presentation timestamp for track buffer to frame end
1170 track_buffer->SetHighestPresentationTimestampIfIncreased(
1171 frame_end_timestamp);
1173 // 20. If frame end timestamp is greater than group end timestamp, then set
1174 // group end timestamp equal to frame end timestamp.
1175 if (frame_end_timestamp > group_end_timestamp_)
1176 group_end_timestamp_ = frame_end_timestamp;
1177 DCHECK(group_end_timestamp_ >= base::TimeDelta());
1179 // TODO(wolenetz): Step 21 is currently approximated by predicted
1180 // frame_end_time by SourceBufferState::OnNewBuffers(). See
1181 // https://crbug.com/850316.
1186 NOTREACHED_NORETURN();
1189 } // namespace media