media/filters/frame_processor.cc

   1 // Copyright 2014 The Chromium Authors
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/filters/frame_processor.h"
   6
   7 #include <stdint.h>
   8 #include <memory>
   9
  10 #include <cstdlib>
  11
  12 #include "base/memory/raw_ptr.h"
  13 #include "media/base/stream_parser_buffer.h"
  14 #include "media/base/timestamp_constants.h"
  15
  16 namespace media {
  17
  18 const int kMaxDroppedPrerollWarnings = 10;
  19 const int kMaxAudioNonKeyframeWarnings = 10;
  20 const int kMaxNumKeyframeTimeGreaterThanDependantWarnings = 1;
  21 const int kMaxMuxedSequenceModeWarnings = 1;
  22 const int kMaxSkippedEmptyFrameWarnings = 5;
  23 const int kMaxPartialDiscardWarnings = 5;
  24 const int kMaxDroppedFrameWarnings = 10;
  25
  26 // Helper class to capture per-track details needed by a frame processor. Some
  27 // of this information may be duplicated in the short-term in the associated
  28 // ChunkDemuxerStream and SourceBufferStream for a track.
  29 // This parallels the MSE spec each of a SourceBuffer's Track Buffers at
  30 // http://www.w3.org/TR/media-source/#track-buffers.
  31 class MseTrackBuffer {
  32  public:
  33   MseTrackBuffer(ChunkDemuxerStream* stream,
  34                  MediaLog* media_log,
  35                  SourceBufferParseWarningCB parse_warning_cb);
  36
  37   MseTrackBuffer(const MseTrackBuffer&) = delete;
  38   MseTrackBuffer& operator=(const MseTrackBuffer&) = delete;
  39
  40   ~MseTrackBuffer();
  41
  42   // Get/set |last_decode_timestamp_|.
  43   DecodeTimestamp last_decode_timestamp() const {
  44     return last_decode_timestamp_;
  45   }
  46   void set_last_decode_timestamp(DecodeTimestamp timestamp) {
  47     last_decode_timestamp_ = timestamp;
  48   }
  49
  50   // Get/set |last_frame_duration_|.
  51   base::TimeDelta last_frame_duration() const {
  52     return last_frame_duration_;
  53   }
  54   void set_last_frame_duration(base::TimeDelta duration) {
  55     last_frame_duration_ = duration;
  56   }
  57
  58   // Gets |highest_presentation_timestamp_|.
  59   base::TimeDelta highest_presentation_timestamp() const {
  60     return highest_presentation_timestamp_;
  61   }
  62
  63   // Get/set |needs_random_access_point_|.
  64   bool needs_random_access_point() const {
  65     return needs_random_access_point_;
  66   }
  67   void set_needs_random_access_point(bool needs_random_access_point) {
  68     needs_random_access_point_ = needs_random_access_point;
  69   }
  70
  71   DecodeTimestamp last_processed_decode_timestamp() const {
  72     return last_processed_decode_timestamp_;
  73   }
  74
  75   base::TimeDelta last_keyframe_presentation_timestamp() const {
  76     return last_keyframe_presentation_timestamp_;
  77   }
  78
  79   base::TimeDelta pending_group_start_pts() const {
  80     return pending_group_start_pts_;
  81   }
  82
  83   // Gets a pointer to this track's ChunkDemuxerStream.
  84   ChunkDemuxerStream* stream() const { return stream_; }
  85
  86   // Unsets |last_decode_timestamp_|, unsets |last_frame_duration_|,
  87   // unsets |highest_presentation_timestamp_|, and sets
  88   // |needs_random_access_point_| to true.
  89   void Reset();
  90
  91   // Unsets |highest_presentation_timestamp_|.
  92   void ResetHighestPresentationTimestamp();
  93
  94   // If |highest_presentation_timestamp_| is unset or |timestamp| is greater
  95   // than |highest_presentation_timestamp_|, sets
  96   // |highest_presentation_timestamp_| to |timestamp|. Note that bidirectional
  97   // prediction between coded frames can cause |timestamp| to not be
  98   // monotonically increasing even though the decode timestamps are
  99   // monotonically increasing.
 100   void SetHighestPresentationTimestampIfIncreased(base::TimeDelta timestamp);
 101
 102   // Adds |frame| to the end of |processed_frames_|. In some SAP-Type-2
 103   // conditions, may also flush any previously enqueued frames, which can fail.
 104   // Returns the result of such flushing, or true if no flushing was done.
 105   bool EnqueueProcessedFrame(scoped_refptr<StreamParserBuffer> frame);
 106
 107   // Appends |processed_frames_|, if not empty, to |stream_| and clears
 108   // |processed_frames_|. Returns false if append failed, true otherwise.
 109   // |processed_frames_| is cleared in both cases.
 110   bool FlushProcessedFrames();
 111
 112   // Signals this track buffer's stream that a coded frame group is starting
 113   // with |start_dts| and |start_pts|.
 114   void NotifyStartOfCodedFrameGroup(DecodeTimestamp start_dts,
 115                                     base::TimeDelta start_pts);
 116
 117  private:
 118   // The decode timestamp of the last coded frame appended in the current coded
 119   // frame group. Initially kNoTimestamp, meaning "unset".
 120   DecodeTimestamp last_decode_timestamp_;
 121
 122   // On signalling the stream of a new coded frame group start, this is reset to
 123   // that start decode time. Any buffers subsequently enqueued for emission to
 124   // the stream update this. This is managed separately from
 125   // |last_decode_timestamp_| because |last_processed_decode_timestamp_| is not
 126   // reset during Reset(), to especially be able to track the need to signal
 127   // coded frame group start time for muxed post-discontinuity edge cases. See
 128   // also FrameProcessor::ProcessFrame().
 129   DecodeTimestamp last_processed_decode_timestamp_;
 130
 131   // On signalling the stream of a new coded frame group start, this is set to
 132   // the group start PTS. If the first frame for this track in the coded frame
 133   // group has a lower PTS, then this must be reset to that time. Once the first
 134   // frame for this track has been queued, this is reset to kNoTimestamp. Like
 135   // |last_processed_decode_timestamp_|, this is helpful for signalling an
 136   // updated coded frame group start time for muxed post-discontinuity edge
 137   // cases. See also FrameProcessor::ProcessFrame().
 138   base::TimeDelta pending_group_start_pts_;
 139
 140   // This is kNoTimestamp if no frames have been enqueued ever or since the last
 141   // NotifyStartOfCodedFrameGroup() or Reset(). Otherwise, this is the most
 142   // recently enqueued keyframe's presentation timestamp.
 143   // This is used:
 144   // 1) to understand if the stream parser is producing random access
 145   //    points that are not SAP Type 1, whose support is likely going to be
 146   //    deprecated from MSE API pending real-world usage data, and
 147   // 2) (by owning FrameProcessor) to determine if it's hit a decreasing
 148   //    keyframe PTS sequence when buffering by PTS intervals, such that a new
 149   //    coded frame group needs to be signalled.
 150   base::TimeDelta last_keyframe_presentation_timestamp_;
 151
 152   // These are used to determine if more incremental flushing is needed to
 153   // correctly buffer a SAP-Type-2 non-keyframe when buffering by PTS.  They are
 154   // updated (if necessary) in FlushProcessedFrames() and
 155   // NotifyStartOfCodedFrameGroup(), and they are consulted (if necessary) in
 156   // EnqueueProcessedFrame().
 157   base::TimeDelta last_signalled_group_start_pts_;
 158   bool have_flushed_since_last_group_start_;
 159
 160   // The coded frame duration of the last coded frame appended in the current
 161   // coded frame group. Initially kNoTimestamp, meaning "unset".
 162   base::TimeDelta last_frame_duration_;
 163
 164   // The highest presentation timestamp encountered in a coded frame appended
 165   // in the current coded frame group. Initially kNoTimestamp, meaning
 166   // "unset".
 167   base::TimeDelta highest_presentation_timestamp_;
 168
 169   // Keeps track of whether the track buffer is waiting for a random access
 170   // point coded frame. Initially set to true to indicate that a random access
 171   // point coded frame is needed before anything can be added to the track
 172   // buffer.
 173   bool needs_random_access_point_;
 174
 175   // Pointer to the stream associated with this track. The stream is not owned
 176   // by |this|.
 177   const raw_ptr<ChunkDemuxerStream, DanglingUntriaged> stream_;
 178
 179   // Queue of processed frames that have not yet been appended to |stream_|.
 180   // EnqueueProcessedFrame() adds to this queue, and FlushProcessedFrames()
 181   // clears it.
 182   StreamParser::BufferQueue processed_frames_;
 183
 184   // MediaLog for reporting messages and properties to debug content and engine.
 185   raw_ptr<MediaLog> media_log_;
 186
 187   // Callback for reporting problematic conditions that are not necessarily
 188   // errors.
 189   SourceBufferParseWarningCB parse_warning_cb_;
 190
 191   // Counter that limits spam to |media_log_| for MseTrackBuffer warnings.
 192   int num_keyframe_time_greater_than_dependant_warnings_ = 0;
 193 };
 194
 195 MseTrackBuffer::MseTrackBuffer(ChunkDemuxerStream* stream,
 196                                MediaLog* media_log,
 197                                SourceBufferParseWarningCB parse_warning_cb)
 198     : last_decode_timestamp_(kNoDecodeTimestamp),
 199       pending_group_start_pts_(kNoTimestamp),
 200       last_keyframe_presentation_timestamp_(kNoTimestamp),
 201       last_signalled_group_start_pts_(kNoTimestamp),
 202       have_flushed_since_last_group_start_(false),
 203       last_frame_duration_(kNoTimestamp),
 204       highest_presentation_timestamp_(kNoTimestamp),
 205       needs_random_access_point_(true),
 206       stream_(stream),
 207       media_log_(media_log),
 208       parse_warning_cb_(std::move(parse_warning_cb)) {
 209   DCHECK(stream_);
 210   DCHECK(parse_warning_cb_);
 211 }
 212
 213 MseTrackBuffer::~MseTrackBuffer() {
 214   DVLOG(2) << __func__ << "()";
 215 }
 216
 217 void MseTrackBuffer::Reset() {
 218   DVLOG(2) << __func__ << "()";
 219
 220   last_decode_timestamp_ = kNoDecodeTimestamp;
 221   last_frame_duration_ = kNoTimestamp;
 222   highest_presentation_timestamp_ = kNoTimestamp;
 223   needs_random_access_point_ = true;
 224   last_keyframe_presentation_timestamp_ = kNoTimestamp;
 225 }
 226
 227 void MseTrackBuffer::ResetHighestPresentationTimestamp() {
 228   highest_presentation_timestamp_ = kNoTimestamp;
 229 }
 230
 231 void MseTrackBuffer::SetHighestPresentationTimestampIfIncreased(
 232     base::TimeDelta timestamp) {
 233   if (highest_presentation_timestamp_ == kNoTimestamp ||
 234       timestamp > highest_presentation_timestamp_) {
 235     highest_presentation_timestamp_ = timestamp;
 236   }
 237 }
 238
 239 bool MseTrackBuffer::EnqueueProcessedFrame(
 240     scoped_refptr<StreamParserBuffer> frame) {
 241   if (frame->is_key_frame()) {
 242     last_keyframe_presentation_timestamp_ = frame->timestamp();
 243   } else {
 244     DCHECK(last_keyframe_presentation_timestamp_ != kNoTimestamp);
 245     // This is just one case of potentially problematic GOP structures, though
 246     // others are more clearly disallowed in at least some of the MSE bytestream
 247     // specs, especially ISOBMFF. See https://crbug.com/739931 for more
 248     // information.
 249     if (frame->timestamp() < last_keyframe_presentation_timestamp_) {
 250       if (!num_keyframe_time_greater_than_dependant_warnings_) {
 251         // At most once per each track (but potentially multiple times per
 252         // playback, if there are more than one tracks that exhibit this
 253         // sequence in a playback) run the warning's callback.
 254         DCHECK(parse_warning_cb_);
 255         parse_warning_cb_.Run(
 256             SourceBufferParseWarning::kKeyframeTimeGreaterThanDependant);
 257       }
 258
 259       LIMITED_MEDIA_LOG(DEBUG, media_log_,
 260                         num_keyframe_time_greater_than_dependant_warnings_,
 261                         kMaxNumKeyframeTimeGreaterThanDependantWarnings)
 262           << "Warning: presentation time of most recently processed random "
 263              "access point ("
 264           << last_keyframe_presentation_timestamp_
 265           << ") is later than the presentation time of a non-keyframe ("
 266           << frame->timestamp()
 267           << ") that depends on it. This type of random access point is not "
 268              "well supported by MSE; buffered range reporting may be less "
 269              "precise.";
 270
 271       // SAP-Type-2 GOPs, by definition, contain at least one non-keyframe with
 272       // PTS prior to the keyframe's PTS, with DTS continuous from keyframe
 273       // forward to at least that non-keyframe. If such a non-keyframe overlaps
 274       // the end of a previously buffered GOP sufficiently (such that, say, some
 275       // previous GOP's non-keyframes depending on the overlapped
 276       // non-keyframe(s) must be dropped), then a gap might need to result. But
 277       // if we attempt to buffer the new GOP's keyframe through at least that
 278       // first non-keyframe that does such overlapping all at once, the
 279       // buffering mechanism doesn't expect such a discontinuity could occur
 280       // (failing assumptions in places like SourceBufferRange).
 281       //
 282       // To prevent such failure, we can first flush what's previously been
 283       // enqueued (if anything), but do this conservatively to not flush
 284       // unnecessarily: we suppress such a flush if this nonkeyframe's PTS is
 285       // still higher than the last coded frame group start time signalled for
 286       // this track and no flush has yet occurred for this track since then, or
 287       // if there has been a flush since then but this nonkeyframe's PTS is no
 288       // lower than the PTS of the first frame pending flush currently.
 289       if (!processed_frames_.empty()) {
 290         DCHECK(kNoTimestamp != last_signalled_group_start_pts_);
 291
 292         if (!have_flushed_since_last_group_start_) {
 293           if (frame->timestamp() < last_signalled_group_start_pts_) {
 294             if (!FlushProcessedFrames())
 295               return false;
 296           }
 297         } else {
 298           if (frame->timestamp() < processed_frames_.front()->timestamp()) {
 299             if (!FlushProcessedFrames())
 300               return false;
 301           }
 302         }
 303       }
 304     }
 305   }
 306
 307   DCHECK(pending_group_start_pts_ == kNoTimestamp ||
 308          pending_group_start_pts_ <= frame->timestamp());
 309   pending_group_start_pts_ = kNoTimestamp;
 310   last_processed_decode_timestamp_ = frame->GetDecodeTimestamp();
 311   processed_frames_.emplace_back(std::move(frame));
 312   return true;
 313 }
 314
 315 bool MseTrackBuffer::FlushProcessedFrames() {
 316   if (processed_frames_.empty())
 317     return true;
 318
 319   bool result = stream_->Append(processed_frames_);
 320   processed_frames_.clear();
 321   have_flushed_since_last_group_start_ = true;
 322
 323   DVLOG_IF(3, !result) << __func__
 324                        << "(): Failure appending processed frames to stream";
 325
 326   return result;
 327 }
 328
 329 void MseTrackBuffer::NotifyStartOfCodedFrameGroup(DecodeTimestamp start_dts,
 330                                                   base::TimeDelta start_pts) {
 331   last_keyframe_presentation_timestamp_ = kNoTimestamp;
 332   last_processed_decode_timestamp_ = start_dts;
 333   pending_group_start_pts_ = start_pts;
 334   have_flushed_since_last_group_start_ = false;
 335   last_signalled_group_start_pts_ = start_pts;
 336   stream_->OnStartOfCodedFrameGroup(start_dts, start_pts);
 337 }
 338
 339 FrameProcessor::FrameProcessor(UpdateDurationCB update_duration_cb,
 340                                MediaLog* media_log)
 341     : group_start_timestamp_(kNoTimestamp),
 342       update_duration_cb_(std::move(update_duration_cb)),
 343       media_log_(media_log) {
 344   DVLOG(2) << __func__ << "()";
 345   DCHECK(update_duration_cb_);
 346 }
 347
 348 FrameProcessor::~FrameProcessor() {
 349   DVLOG(2) << __func__ << "()";
 350 }
 351
 352 void FrameProcessor::SetParseWarningCallback(
 353     SourceBufferParseWarningCB parse_warning_cb) {
 354   DCHECK(!parse_warning_cb_);
 355   DCHECK(parse_warning_cb);
 356   parse_warning_cb_ = std::move(parse_warning_cb);
 357 }
 358
 359 void FrameProcessor::SetSequenceMode(bool sequence_mode) {
 360   DVLOG(2) << __func__ << "(" << sequence_mode << ")";
 361   // Per June 9, 2016 MSE spec editor's draft:
 362   // https://rawgit.com/w3c/media-source/d8f901f22/
 363   //     index.html#widl-SourceBuffer-mode
 364   // Step 7: If the new mode equals "sequence", then set the group start
 365   // timestamp to the group end timestamp.
 366   if (sequence_mode) {
 367     DCHECK(kNoTimestamp != group_end_timestamp_);
 368     group_start_timestamp_ = group_end_timestamp_;
 369   } else if (sequence_mode_) {
 370     // We're switching from 'sequence' to 'segments' mode. Be safe and signal a
 371     // new coded frame group on the next frame emitted.
 372     pending_notify_all_group_start_ = true;
 373   }
 374
 375   // Step 8: Update the attribute to new mode.
 376   sequence_mode_ = sequence_mode;
 377 }
 378
 379 bool FrameProcessor::ProcessFrames(
 380     const StreamParser::BufferQueueMap& buffer_queue_map,
 381     base::TimeDelta append_window_start,
 382     base::TimeDelta append_window_end,
 383     base::TimeDelta* timestamp_offset) {
 384   StreamParser::BufferQueue frames;
 385   if (!MergeBufferQueues(buffer_queue_map, &frames)) {
 386     MEDIA_LOG(ERROR, media_log_) << "Parsed buffers not in DTS sequence";
 387     return false;
 388   }
 389
 390   DCHECK(!frames.empty());
 391
 392   if (sequence_mode_ && track_buffers_.size() > 1) {
 393     if (!num_muxed_sequence_mode_warnings_) {
 394       // At most once per SourceBuffer (but potentially multiple times per
 395       // playback, if there are more than one SourceBuffers used this way in a
 396       // playback) run the warning's callback.
 397       DCHECK(parse_warning_cb_);
 398       parse_warning_cb_.Run(SourceBufferParseWarning::kMuxedSequenceMode);
 399     }
 400
 401     LIMITED_MEDIA_LOG(DEBUG, media_log_, num_muxed_sequence_mode_warnings_,
 402                       kMaxMuxedSequenceModeWarnings)
 403         << "Warning: using MSE 'sequence' AppendMode for a SourceBuffer with "
 404            "multiple tracks may cause loss of track synchronization. In some "
 405            "cases, buffered range gaps and playback stalls can occur. It is "
 406            "recommended to instead use 'segments' mode for a multitrack "
 407            "SourceBuffer.";
 408   }
 409
 410   // Monitor |group_end_timestamp_| to detect any cases where it decreases while
 411   // processing |frames| (which should all be from no more than 1 media
 412   // segment), to see if (outside of mediasource fuzzers) real API usage hits
 413   // this case frequently enough to potentially warrant MSE spec clarification
 414   // of the last step in the coded frame processing algorithm. The previous
 415   // value is not used as a baseline, since the spec would already handle that
 416   // case interoperably (since we may be starting the processing of frames from
 417   // a new media segment.) See https://crbug.com/920853 and
 418   // https://github.com/w3c/media-source/issues/203.
 419   base::TimeDelta max_group_end_timestamp = kNoTimestamp;
 420
 421   // Implements the coded frame processing algorithm's outer loop for step 1.
 422   // Note that ProcessFrame() implements an inner loop for a single frame that
 423   // handles "jump to the Loop Top step to restart processing of the current
 424   // coded frame" per June 9, 2016 MSE spec editor's draft:
 425   // https://rawgit.com/w3c/media-source/d8f901f22/
 426   //     index.html#sourcebuffer-coded-frame-processing
 427   // 1. For each coded frame in the media segment run the following steps:
 428   for (const auto& frame : frames) {
 429     // Skip any 0-byte audio or video buffers, since they cannot produce any
 430     // valid decode output (and are rejected by FFmpeg A/V decode.)
 431     if (!frame->data_size()) {
 432       LIMITED_MEDIA_LOG(DEBUG, media_log_, num_skipped_empty_frame_warnings_,
 433                         kMaxSkippedEmptyFrameWarnings)
 434           << "Discarding empty audio or video coded frame, PTS="
 435           << frame->timestamp().InMicroseconds()
 436           << "us, DTS=" << frame->GetDecodeTimestamp().InMicroseconds() << "us";
 437       continue;
 438     }
 439
 440     if (!ProcessFrame(frame, append_window_start, append_window_end,
 441                       timestamp_offset)) {
 442       FlushProcessedFrames();
 443       return false;
 444     }
 445
 446     max_group_end_timestamp =
 447         std::max(group_end_timestamp_, max_group_end_timestamp);
 448   }
 449
 450   if (!FlushProcessedFrames())
 451     return false;
 452
 453   // 2. - 4. Are handled by the WebMediaPlayer / Pipeline / Media Element.
 454
 455   // 5. If the media segment contains data beyond the current duration, then run
 456   //    the duration change algorithm with new duration set to the maximum of
 457   //    the current duration and the group end timestamp.
 458   if (max_group_end_timestamp > group_end_timestamp_) {
 459     // Log a parse warning. For now at least, we don't also log this to
 460     // media-internals.
 461     DCHECK(parse_warning_cb_);
 462     parse_warning_cb_.Run(
 463         SourceBufferParseWarning::kGroupEndTimestampDecreaseWithinMediaSegment);
 464   }
 465   update_duration_cb_.Run(group_end_timestamp_);
 466
 467   return true;
 468 }
 469
 470 void FrameProcessor::SetGroupStartTimestampIfInSequenceMode(
 471     base::TimeDelta timestamp_offset) {
 472   DVLOG(2) << __func__ << "(" << timestamp_offset.InMicroseconds() << "us)";
 473   DCHECK(kNoTimestamp != timestamp_offset);
 474   if (sequence_mode_)
 475     group_start_timestamp_ = timestamp_offset;
 476
 477   // Changes to timestampOffset should invalidate the preroll buffer.
 478   audio_preroll_buffer_.reset();
 479 }
 480
 481 bool FrameProcessor::AddTrack(StreamParser::TrackId id,
 482                               ChunkDemuxerStream* stream) {
 483   DVLOG(2) << __func__ << "(): id=" << id;
 484
 485   MseTrackBuffer* existing_track = FindTrack(id);
 486   DCHECK(!existing_track);
 487   if (existing_track) {
 488     MEDIA_LOG(ERROR, media_log_) << "Failure adding track with duplicate ID "
 489                                  << id;
 490     return false;
 491   }
 492
 493   track_buffers_[id] =
 494       std::make_unique<MseTrackBuffer>(stream, media_log_, parse_warning_cb_);
 495   return true;
 496 }
 497
 498 bool FrameProcessor::UpdateTrackIds(const TrackIdChanges& track_id_changes) {
 499   TrackBuffersMap& old_track_buffers = track_buffers_;
 500   TrackBuffersMap new_track_buffers;
 501
 502   for (const auto& ids : track_id_changes) {
 503     if (old_track_buffers.find(ids.first) == old_track_buffers.end() ||
 504         new_track_buffers.find(ids.second) != new_track_buffers.end()) {
 505       MEDIA_LOG(ERROR, media_log_) << "Failure updating track id from "
 506                                    << ids.first << " to " << ids.second;
 507       return false;
 508     }
 509     new_track_buffers[ids.second] = std::move(old_track_buffers[ids.first]);
 510     CHECK_EQ(1u, old_track_buffers.erase(ids.first));
 511   }
 512
 513   // Process remaining track buffers with unchanged ids.
 514   for (const auto& t : old_track_buffers) {
 515     if (new_track_buffers.find(t.first) != new_track_buffers.end()) {
 516       MEDIA_LOG(ERROR, media_log_) << "Track id " << t.first << " conflict";
 517       return false;
 518     }
 519     new_track_buffers[t.first] = std::move(old_track_buffers[t.first]);
 520   }
 521
 522   std::swap(track_buffers_, new_track_buffers);
 523   return true;
 524 }
 525
 526 void FrameProcessor::SetAllTrackBuffersNeedRandomAccessPoint() {
 527   for (auto itr = track_buffers_.begin(); itr != track_buffers_.end(); ++itr) {
 528     itr->second->set_needs_random_access_point(true);
 529   }
 530 }
 531
 532 void FrameProcessor::Reset() {
 533   DVLOG(2) << __func__ << "()";
 534   for (auto itr = track_buffers_.begin(); itr != track_buffers_.end(); ++itr) {
 535     itr->second->Reset();
 536   }
 537
 538   // Maintain current |pending_notify_all_group_start_| state for Reset() during
 539   // sequence mode. Reset it here only if in segments mode. In sequence mode,
 540   // the current coded frame group may be continued across Reset() operations to
 541   // allow the stream to coalesce what might otherwise be gaps in the buffered
 542   // ranges. See also the declaration for |pending_notify_all_group_start_|.
 543   if (!sequence_mode_) {
 544     pending_notify_all_group_start_ = true;
 545     return;
 546   }
 547
 548   // Sequence mode
 549   DCHECK(kNoTimestamp != group_end_timestamp_);
 550   group_start_timestamp_ = group_end_timestamp_;
 551 }
 552
 553 void FrameProcessor::OnPossibleAudioConfigUpdate(
 554     const AudioDecoderConfig& config) {
 555   DCHECK(config.IsValidConfig());
 556
 557   // Always clear the preroll buffer when a config update is received.
 558   audio_preroll_buffer_.reset();
 559
 560   if (config.Matches(current_audio_config_))
 561     return;
 562
 563   current_audio_config_ = config;
 564   sample_duration_ =
 565       base::Seconds(1.0 / current_audio_config_.samples_per_second());
 566   has_dependent_audio_frames_ =
 567       current_audio_config_.profile() == AudioCodecProfile::kXHE_AAC;
 568   last_audio_pts_for_nonkeyframe_monotonicity_check_ = kNoTimestamp;
 569 }
 570
 571 MseTrackBuffer* FrameProcessor::FindTrack(StreamParser::TrackId id) {
 572   auto itr = track_buffers_.find(id);
 573   if (itr == track_buffers_.end())
 574     return NULL;
 575
 576   return itr->second.get();
 577 }
 578
 579 void FrameProcessor::NotifyStartOfCodedFrameGroup(DecodeTimestamp start_dts,
 580                                                   base::TimeDelta start_pts) {
 581   DVLOG(2) << __func__ << "(dts " << start_dts.InMicroseconds() << "us, pts "
 582            << start_pts.InMicroseconds() << "us)";
 583
 584   for (auto itr = track_buffers_.begin(); itr != track_buffers_.end(); ++itr) {
 585     itr->second->NotifyStartOfCodedFrameGroup(start_dts, start_pts);
 586   }
 587 }
 588
 589 bool FrameProcessor::FlushProcessedFrames() {
 590   DVLOG(2) << __func__ << "()";
 591
 592   bool result = true;
 593   for (auto itr = track_buffers_.begin(); itr != track_buffers_.end(); ++itr) {
 594     if (!itr->second->FlushProcessedFrames())
 595       result = false;
 596   }
 597
 598   return result;
 599 }
 600
 601 bool FrameProcessor::HandlePartialAppendWindowTrimming(
 602     base::TimeDelta append_window_start,
 603     base::TimeDelta append_window_end,
 604     scoped_refptr<StreamParserBuffer> buffer) {
 605   DCHECK(buffer->duration() >= base::TimeDelta());
 606   DCHECK_EQ(DemuxerStream::AUDIO, buffer->type());
 607   DCHECK(has_dependent_audio_frames_ || buffer->is_key_frame());
 608
 609   const base::TimeDelta frame_end_timestamp =
 610       buffer->timestamp() + buffer->duration();
 611
 612   // If the buffer is entirely before |append_window_start|, save it as preroll
 613   // for the first buffer which overlaps |append_window_start|.
 614   if (buffer->timestamp() < append_window_start &&
 615       frame_end_timestamp <= append_window_start) {
 616     // But if the buffer is not a keyframe, do not use it for preroll, nor use
 617     // any previous preroll buffer for simplicity here.
 618     if (has_dependent_audio_frames_ && !buffer->is_key_frame()) {
 619       audio_preroll_buffer_.reset();
 620     } else {
 621       audio_preroll_buffer_ = std::move(buffer);
 622     }
 623     return false;
 624   }
 625
 626   // If the buffer is entirely after |append_window_end| there's nothing to do.
 627   if (buffer->timestamp() >= append_window_end)
 628     return false;
 629
 630   DCHECK(buffer->timestamp() >= append_window_start ||
 631          frame_end_timestamp > append_window_start);
 632
 633   bool processed_buffer = false;
 634
 635   // If we have a preroll buffer see if we can attach it to the first buffer
 636   // overlapping or after |append_window_start|.
 637   if (audio_preroll_buffer_) {
 638     // We only want to use the preroll buffer if it directly precedes (less
 639     // than one sample apart) the current buffer.
 640     const int64_t delta =
 641         (audio_preroll_buffer_->timestamp() +
 642          audio_preroll_buffer_->duration() - buffer->timestamp())
 643             .InMicroseconds();
 644     if (std::abs(delta) < sample_duration_.InMicroseconds() &&
 645         audio_preroll_buffer_->timestamp() <= buffer->timestamp()) {
 646       DVLOG(1) << "Attaching audio preroll buffer ["
 647                << audio_preroll_buffer_->timestamp().InMicroseconds() << "us, "
 648                << (audio_preroll_buffer_->timestamp() +
 649                    audio_preroll_buffer_->duration())
 650                       .InMicroseconds()
 651                << "us) to " << buffer->timestamp().InMicroseconds() << "us";
 652       buffer->SetPrerollBuffer(std::move(audio_preroll_buffer_));
 653       processed_buffer = true;
 654     } else {
 655       LIMITED_MEDIA_LOG(DEBUG, media_log_, num_dropped_preroll_warnings_,
 656                         kMaxDroppedPrerollWarnings)
 657           << "Partial append window trimming dropping unused audio preroll "
 658              "buffer with PTS "
 659           << audio_preroll_buffer_->timestamp().InMicroseconds()
 660           << "us that ends too far (" << delta
 661           << "us) from next buffer with PTS "
 662           << buffer->timestamp().InMicroseconds() << "us";
 663       audio_preroll_buffer_.reset();
 664     }
 665   }
 666
 667   // See if a partial discard can be done around |append_window_start|.
 668   if (buffer->timestamp() < append_window_start) {
 669     LIMITED_MEDIA_LOG(INFO, media_log_, num_partial_discard_warnings_,
 670                       kMaxPartialDiscardWarnings)
 671         << "Truncating audio buffer which overlaps append window start."
 672         << " PTS " << buffer->timestamp().InMicroseconds()
 673         << "us frame_end_timestamp " << frame_end_timestamp.InMicroseconds()
 674         << "us append_window_start " << append_window_start.InMicroseconds()
 675         << "us";
 676
 677     // Mark the overlapping portion of the buffer for discard.
 678     // TODO(wolenetz): Is this correct to ignore any pre-existing discard
 679     // padding (e.g. WebM discard padding)? See https://crbug.com/969195.
 680     buffer->set_discard_padding(std::make_pair(
 681         append_window_start - buffer->timestamp(), base::TimeDelta()));
 682
 683     // Adjust the timestamp of this buffer forward to |append_window_start| and
 684     // decrease the duration to compensate. Adjust DTS by the same delta as PTS
 685     // to help prevent spurious discontinuities when DTS > PTS.
 686     base::TimeDelta pts_delta = append_window_start - buffer->timestamp();
 687     buffer->set_timestamp(append_window_start);
 688     buffer->SetDecodeTimestamp(buffer->GetDecodeTimestamp() + pts_delta);
 689     buffer->set_duration(frame_end_timestamp - append_window_start);
 690     processed_buffer = true;
 691   }
 692
 693   // See if a partial discard can be done around |append_window_end|.
 694   if (frame_end_timestamp > append_window_end) {
 695     LIMITED_MEDIA_LOG(INFO, media_log_, num_partial_discard_warnings_,
 696                       kMaxPartialDiscardWarnings)
 697         << "Truncating audio buffer which overlaps append window end."
 698         << " PTS " << buffer->timestamp().InMicroseconds()
 699         << "us frame_end_timestamp " << frame_end_timestamp.InMicroseconds()
 700         << "us append_window_end " << append_window_end.InMicroseconds() << "us"
 701         << (buffer->is_duration_estimated() ? " (frame duration is estimated)"
 702                                             : "");
 703
 704     // Mark the overlapping portion of the buffer for discard.
 705     // TODO(wolenetz): Is this correct to ignore any pre-existing discard
 706     // padding (e.g. WebM discard padding)? See https://crbug.com/969195.
 707     buffer->set_discard_padding(
 708         std::make_pair(buffer->discard_padding().first,
 709                        frame_end_timestamp - append_window_end));
 710
 711     // Decrease the duration of the buffer to remove the discarded portion.
 712     buffer->set_duration(append_window_end - buffer->timestamp());
 713     processed_buffer = true;
 714   }
 715
 716   return processed_buffer;
 717 }
 718
 719 bool FrameProcessor::CheckAudioPresentationOrder(
 720     const StreamParserBuffer& frame,
 721     bool track_buffer_needs_random_access_point) {
 722   DCHECK_EQ(DemuxerStream::AUDIO, frame.type());
 723   DCHECK(has_dependent_audio_frames_);
 724   if (frame.is_key_frame()) {
 725     // Audio keyframes trivially succeed here. They start a new PTS baseline for
 726     // the purpose of the checks in this method.
 727     last_audio_pts_for_nonkeyframe_monotonicity_check_ = frame.timestamp();
 728     return true;
 729   }
 730   if (track_buffer_needs_random_access_point) {
 731     // This nonkeyframe trivially succeeds here, though it will not be buffered
 732     // later in the caller since a keyframe is required first.
 733     last_audio_pts_for_nonkeyframe_monotonicity_check_ = kNoTimestamp;
 734     return true;
 735   }
 736
 737   // We're not waiting for a random access point, so we must have a valid PTS
 738   // baseline.
 739   DCHECK_NE(kNoTimestamp, last_audio_pts_for_nonkeyframe_monotonicity_check_);
 740
 741   if (frame.timestamp() >= last_audio_pts_for_nonkeyframe_monotonicity_check_) {
 742     last_audio_pts_for_nonkeyframe_monotonicity_check_ = frame.timestamp();
 743     return true;
 744   }
 745
 746   last_audio_pts_for_nonkeyframe_monotonicity_check_ = kNoTimestamp;
 747   return false;  // Caller should fail parse in this case.
 748 }
 749
 750 bool FrameProcessor::ProcessFrame(scoped_refptr<StreamParserBuffer> frame,
 751                                   base::TimeDelta append_window_start,
 752                                   base::TimeDelta append_window_end,
 753                                   base::TimeDelta* timestamp_offset) {
 754   // Implements the loop within step 1 of the coded frame processing algorithm
 755   // for a single input frame per June 9, 2016 MSE spec editor's draft:
 756   // https://rawgit.com/w3c/media-source/d8f901f22/
 757   //     index.html#sourcebuffer-coded-frame-processing
 758   while (true) {
 759     // 1. Loop Top:
 760     // Otherwise case: (See also SourceBufferState::OnNewBuffer's conditional
 761     // modification of timestamp_offset after frame processing returns, when
 762     // generate_timestamps_flag is true).
 763     // 1.1. Let presentation timestamp be a double precision floating point
 764     //      representation of the coded frame's presentation timestamp in
 765     //      seconds.
 766     // 1.2. Let decode timestamp be a double precision floating point
 767     //      representation of the coded frame's decode timestamp in seconds.
 768     // 2. Let frame duration be a double precision floating point representation
 769     //    of the coded frame's duration in seconds.
 770     // We use base::TimeDelta and DecodeTimestamp instead of double.
 771     base::TimeDelta presentation_timestamp = frame->timestamp();
 772     DecodeTimestamp decode_timestamp = frame->GetDecodeTimestamp();
 773     base::TimeDelta frame_duration = frame->duration();
 774
 775     DVLOG(3) << __func__ << ": Processing frame Type=" << frame->type()
 776              << ", TrackID=" << frame->track_id()
 777              << ", PTS=" << presentation_timestamp.InMicroseconds()
 778              << "us, DTS=" << decode_timestamp.InMicroseconds()
 779              << "us, DUR=" << frame_duration.InMicroseconds()
 780              << "us, RAP=" << frame->is_key_frame();
 781
 782     // Buffering, splicing, append window trimming, etc., all depend on the
 783     // assumption that all audio coded frames are key frames. Metadata in the
 784     // bytestream may not indicate that, so we need to enforce that assumption
 785     // here with a warning log.
 786     if (frame->type() == DemuxerStream::AUDIO && !has_dependent_audio_frames_ &&
 787         !frame->is_key_frame()) {
 788       LIMITED_MEDIA_LOG(DEBUG, media_log_, num_audio_non_keyframe_warnings_,
 789                         kMaxAudioNonKeyframeWarnings)
 790           << "Bytestream with audio frame PTS "
 791           << presentation_timestamp.InMicroseconds() << "us and DTS "
 792           << decode_timestamp.InMicroseconds()
 793           << "us indicated the frame is not a random access point (key frame). "
 794              "All audio frames are expected to be key frames for the current "
 795              "audio codec.";
 796       frame->set_is_key_frame(true);
 797     }
 798
 799     // Sanity check the timestamps.
 800     if (presentation_timestamp == kNoTimestamp) {
 801       MEDIA_LOG(ERROR, media_log_) << "Unknown PTS for " << frame->GetTypeName()
 802                                    << " frame";
 803       return false;
 804     }
 805
 806     // StreamParserBuffer's GetDecodeTimestamp() shouldn't return
 807     // kNoDecodeTimestamp if we already found the frame's PTS was kNoTimestamp
 808     // and failed processing.
 809     DCHECK(decode_timestamp != kNoDecodeTimestamp);
 810
 811     if (presentation_timestamp.is_inf()) {
 812       MEDIA_LOG(ERROR, media_log_)
 813           << "Before adjusting by timestampOffset, PTS for "
 814           << frame->GetTypeName()
 815           << " frame exceeds range allowed by implementation";
 816       return false;
 817     }
 818
 819     if (decode_timestamp.is_inf()) {
 820       MEDIA_LOG(ERROR, media_log_)
 821           << "Before adjusting by timestampOffset, DTS for "
 822           << frame->GetTypeName()
 823           << " frame exceeds range allowed by implementation";
 824       return false;
 825     }
 826
 827     // TODO(wolenetz): Determine whether any DTS>PTS logging is needed. See
 828     // http://crbug.com/354518.
 829     DVLOG_IF(2, decode_timestamp.ToPresentationTime() > presentation_timestamp)
 830         << __func__ << ": WARNING: Frame DTS("
 831         << decode_timestamp.InMicroseconds() << "us) > PTS("
 832         << presentation_timestamp.InMicroseconds()
 833         << "us), frame type=" << frame->GetTypeName();
 834
 835     // All stream parsers should emit valid (non-negative) frame durations.
 836     // Note that duration of 0 can occur for at least WebM alt-ref frames.
 837     if (frame_duration == kNoTimestamp) {
 838       MEDIA_LOG(ERROR, media_log_)
 839           << "Unknown duration for " << frame->GetTypeName() << " frame at PTS "
 840           << presentation_timestamp.InMicroseconds() << "us";
 841       return false;
 842     }
 843
 844     // See also partial protections in DecoderBuffer::set_duration().
 845     // Using stronger CHECK here in case any of the parsers become fragile to
 846     // fuzzer coverage gaps when calculating buffer durations.
 847     CHECK(frame_duration >= base::TimeDelta() &&
 848           frame_duration != kInfiniteDuration);
 849
 850     // 3. If mode equals "sequence" and group start timestamp is set, then run
 851     //    the following steps:
 852     if (sequence_mode_ && group_start_timestamp_ != kNoTimestamp) {
 853       // 3.1. Set timestampOffset equal to group start timestamp -
 854       //      presentation timestamp.
 855       if (group_start_timestamp_.is_inf()) {
 856         // +Infinity may be set when app sets timestampOffset. We emit error in
 857         // such case upon next potential use of that offset here.
 858         DCHECK(group_start_timestamp_ == kInfiniteDuration);
 859         MEDIA_LOG(ERROR, media_log_)
 860             << "Sequence mode timestampOffset update prevented by a group "
 861                "start timestamp that exceeds range allowed by implementation";
 862         return false;
 863       }
 864
 865       *timestamp_offset = group_start_timestamp_ - presentation_timestamp;
 866       if (timestamp_offset->is_inf()) {
 867         MEDIA_LOG(ERROR, media_log_)
 868             << "Sequence mode timestampOffset update resulted in an offset "
 869                "that exceeds range allowed by implementation";
 870         return false;
 871       }
 872
 873       DVLOG(3) << __func__ << ": updated timestampOffset is now "
 874                << timestamp_offset->InMicroseconds() << "us";
 875
 876       // 3.2. Set group end timestamp equal to group start timestamp.
 877       group_end_timestamp_ = group_start_timestamp_;
 878
 879       // 3.3. Set the need random access point flag on all track buffers to
 880       //      true.
 881       SetAllTrackBuffersNeedRandomAccessPoint();
 882
 883       // Remember to signal a new coded frame group. Note, this may introduce
 884       // gaps on large jumps forwards in sequence mode.
 885       pending_notify_all_group_start_ = true;
 886
 887       // 3.4. Unset group start timestamp.
 888       group_start_timestamp_ = kNoTimestamp;
 889     }
 890
 891     // 4. If timestampOffset is not 0, then run the following steps:
 892     if (!timestamp_offset->is_zero()) {
 893       if (timestamp_offset->is_inf()) {
 894         // This condition might occur if the app set timestampOffset while in
 895         // 'segments' append mode, skipping the 'sequence' mode offset update
 896         // checks, above.
 897         MEDIA_LOG(ERROR, media_log_)
 898             << "timestampOffset exceeds range allowed by implementation";
 899         return false;
 900       }
 901
 902       // 4.1. Add timestampOffset to the presentation timestamp.
 903       // Note: |frame| PTS is only updated if it survives discontinuity
 904       // processing.
 905       presentation_timestamp += *timestamp_offset;
 906       if (presentation_timestamp.is_inf()) {
 907         MEDIA_LOG(ERROR, media_log_)
 908             << "After adjusting by timestampOffset, PTS for "
 909             << frame->GetTypeName()
 910             << " frame exceeds range allowed by implementation";
 911         return false;
 912       }
 913
 914       // 4.2. Add timestampOffset to the decode timestamp.
 915       // Frame DTS is only updated if it survives discontinuity processing.
 916       decode_timestamp += *timestamp_offset;
 917       if (decode_timestamp.is_inf()) {
 918         MEDIA_LOG(ERROR, media_log_)
 919             << "After adjusting by timestampOffset, DTS for "
 920             << frame->GetTypeName()
 921             << " frame exceeds range allowed by implementation";
 922         return false;
 923       }
 924     }
 925
 926     // 5. Let track buffer equal the track buffer that the coded frame will be
 927     //    added to.
 928     StreamParser::TrackId track_id = frame->track_id();
 929     MseTrackBuffer* track_buffer = FindTrack(track_id);
 930     if (!track_buffer) {
 931       MEDIA_LOG(ERROR, media_log_)
 932           << "Unknown track with type " << frame->GetTypeName()
 933           << ", frame processor track id " << track_id
 934           << ", and parser track id " << frame->track_id();
 935       return false;
 936     }
 937     if (frame->type() != track_buffer->stream()->type()) {
 938       MEDIA_LOG(ERROR, media_log_) << "Frame type " << frame->GetTypeName()
 939                                    << " doesn't match track buffer type "
 940                                    << track_buffer->stream()->type();
 941       return false;
 942     }
 943
 944     // 6. If last decode timestamp for track buffer is set and decode timestamp
 945     //    is less than last decode timestamp
 946     //    OR
 947     //    If last decode timestamp for track buffer is set and the difference
 948     //    between decode timestamp and last decode timestamp is greater than 2
 949     //    times last frame duration:
 950     DecodeTimestamp track_last_decode_timestamp =
 951         track_buffer->last_decode_timestamp();
 952     if (track_last_decode_timestamp != kNoDecodeTimestamp) {
 953       base::TimeDelta track_dts_delta =
 954           decode_timestamp - track_last_decode_timestamp;
 955       if (track_dts_delta.is_negative() ||
 956           track_dts_delta > 2 * track_buffer->last_frame_duration()) {
 957         // 6.1. If mode equals "segments": Set group end timestamp to
 958         //      presentation timestamp.
 959         //      If mode equals "sequence": Set group start timestamp equal to
 960         //      the group end timestamp.
 961         if (!sequence_mode_) {
 962           group_end_timestamp_ = presentation_timestamp;
 963           // This triggers a discontinuity so we need to treat the next frames
 964           // appended within the append window as if they were the beginning of
 965           // a new coded frame group. |pending_notify_all_group_start_| is reset
 966           // in Reset(), below, for "segments" mode.
 967         } else {
 968           DVLOG(3) << __func__ << " : Sequence mode discontinuity, GETS: "
 969                    << group_end_timestamp_.InMicroseconds() << "us";
 970           // Reset(), below, performs the "Set group start timestamp equal to
 971           // the group end timestamp" operation for "sequence" mode.
 972         }
 973
 974         // 6.2. - 6.5.:
 975         Reset();
 976
 977         // 6.6. Jump to the Loop Top step above to restart processing of the
 978         //      current coded frame.
 979         DVLOG(3) << __func__ << ": Discontinuity: reprocessing frame";
 980         continue;
 981       }
 982     }
 983
 984     // 7. Let frame end timestamp equal the sum of presentation timestamp and
 985     //    frame duration.
 986     base::TimeDelta frame_end_timestamp =
 987         presentation_timestamp + frame_duration;
 988
 989     // 8.  If presentation timestamp is less than appendWindowStart, then set
 990     //     the need random access point flag to true, drop the coded frame, and
 991     //     jump to the top of the loop to start processing the next coded
 992     //     frame.
 993     // Note: We keep the result of partial discard of a buffer that overlaps
 994     //       |append_window_start| and does not end after |append_window_end|,
 995     //       for streams which support partial trimming.
 996     // 9. If frame end timestamp is greater than appendWindowEnd, then set the
 997     //    need random access point flag to true, drop the coded frame, and jump
 998     //    to the top of the loop to start processing the next coded frame.
 999     // Note: We keep the result of partial discard of a buffer that overlaps
1000     //       |append_window_end|, for streams which support partial trimming.
1001     frame->set_timestamp(presentation_timestamp);
1002     frame->SetDecodeTimestamp(decode_timestamp);
1003
1004     if (has_dependent_audio_frames_ && frame->type() == DemuxerStream::AUDIO &&
1005         !CheckAudioPresentationOrder(
1006             *frame, track_buffer->needs_random_access_point())) {
1007       MEDIA_LOG(ERROR, media_log_)
1008           << "Dependent audio frame with invalid decreasing presentation "
1009              "timestamp detected.";
1010       return false;
1011     }
1012
1013     // Attempt to trim audio exactly to fit the append window.
1014     if (frame->type() == DemuxerStream::AUDIO &&
1015         (frame->is_key_frame() || !track_buffer->needs_random_access_point()) &&
1016         HandlePartialAppendWindowTrimming(append_window_start,
1017                                           append_window_end, frame)) {
1018       // |frame| has been partially trimmed or had preroll added.  Though
1019       // |frame|'s duration may have changed, do not update |frame_duration|
1020       // here, so |track_buffer|'s last frame duration update uses original
1021       // frame duration and reduces spurious discontinuity detection.
1022       decode_timestamp = frame->GetDecodeTimestamp();
1023       presentation_timestamp = frame->timestamp();
1024       frame_end_timestamp = frame->timestamp() + frame->duration();
1025     }
1026
1027     if (frame_end_timestamp.is_inf()) {
1028       MEDIA_LOG(ERROR, media_log_)
1029           << "Frame end timestamp for " << frame->GetTypeName()
1030           << " frame exceeds range allowed by implementation";
1031       return false;
1032     }
1033
1034     if (presentation_timestamp < append_window_start ||
1035         frame_end_timestamp > append_window_end) {
1036       track_buffer->set_needs_random_access_point(true);
1037
1038       LIMITED_MEDIA_LOG(INFO, media_log_, num_dropped_frame_warnings_,
1039                         kMaxDroppedFrameWarnings)
1040           << "Dropping " << frame->GetTypeName() << " frame (DTS "
1041           << decode_timestamp.InMicroseconds() << "us PTS "
1042           << presentation_timestamp.InMicroseconds() << "us,"
1043           << frame_end_timestamp.InMicroseconds()
1044           << "us) that is outside append window ["
1045           << append_window_start.InMicroseconds() << "us,"
1046           << append_window_end.InMicroseconds() << "us).";
1047       return true;
1048     }
1049
1050     DCHECK(presentation_timestamp >= base::TimeDelta());
1051
1052     // 10. If the need random access point flag on track buffer equals true,
1053     //     then run the following steps:
1054     if (track_buffer->needs_random_access_point()) {
1055       // 10.1. If the coded frame is not a random access point, then drop the
1056       //       coded frame and jump to the top of the loop to start processing
1057       //       the next coded frame.
1058       if (!frame->is_key_frame()) {
1059         DVLOG(3) << __func__
1060                  << ": Dropping frame that is not a random access point";
1061         return true;
1062       }
1063
1064       // 10.2. Set the need random access point flag on track buffer to false.
1065       track_buffer->set_needs_random_access_point(false);
1066     }
1067
1068     // We now have a processed buffer to append to the track buffer's stream.
1069     // If it is the first in a new coded frame group (such as following a
1070     // segments append mode discontinuity, or following a switch to segments
1071     // append mode from sequence append mode), notify all the track buffers
1072     // that a coded frame group is starting.
1073     bool signal_new_cfg = pending_notify_all_group_start_;
1074
1075     // In muxed multi-track streams, it may occur that we already signaled a new
1076     // coded frame group (CFG) upon detecting a discontinuity in trackA, only to
1077     // now find that frames in trackB actually have an earlier timestamp. If
1078     // this is detected using last_processed_decode_timestamp() (which persists
1079     // across DTS-based discontinuity detection in sequence mode, and which
1080     // contains either the last processed DTS or last signalled CFG DTS for
1081     // trackB), re-signal trackB that a CFG is starting with its new earlier
1082     // DTS. Similarly, if this is detected using pending_group_start_pts()
1083     // (which is !kNoTimestamp only when the track hasn't yet been given the
1084     // first buffer in the CFG, and if so, it's the expected PTS start of that
1085     // CFG), re-signal trackB that a CFG is starting with its new earlier PTS.
1086     // Avoid re-signalling trackA, as it has already started processing frames
1087     // for this CFG.
1088     signal_new_cfg |=
1089         track_buffer->last_processed_decode_timestamp() > decode_timestamp ||
1090         (track_buffer->pending_group_start_pts() != kNoTimestamp &&
1091          track_buffer->pending_group_start_pts() > presentation_timestamp);
1092
1093     if (frame->is_key_frame()) {
1094       // When a keyframe is discovered to have a decreasing PTS versus the
1095       // previous highest presentation timestamp for that track in the current
1096       // coded frame group, signal a new coded frame group for that track buffer
1097       // so that it can correctly process overlap-removals for the new GOP.
1098       if (track_buffer->highest_presentation_timestamp() != kNoTimestamp &&
1099           track_buffer->highest_presentation_timestamp() >
1100               presentation_timestamp) {
1101         signal_new_cfg = true;
1102         // In case there is currently a decreasing keyframe PTS relative to the
1103         // track buffer's highest PTS, that is later followed by a jump forward
1104         // requiring overlap removal of media prior to the track buffer's
1105         // highest PTS, reset that tracking now to ensure correctness of
1106         // signalling the need for such overlap removal later.
1107         track_buffer->ResetHighestPresentationTimestamp();
1108       }
1109
1110       // When an otherwise continuous coded frame group (by DTS, and with
1111       // non-decreasing keyframe PTS) contains a keyframe with PTS in the future
1112       // significantly far enough that it may be outside of buffering fudge
1113       // room, signal a new coded frame group with start time set to the
1114       // previous highest frame end time in the coded frame group for this
1115       // track. This lets the stream coalesce a potential gap, and also pass
1116       // internal buffer adjacency checks.
1117       signal_new_cfg |=
1118           track_buffer->highest_presentation_timestamp() != kNoTimestamp &&
1119           track_buffer->highest_presentation_timestamp() + frame->duration() <
1120               presentation_timestamp;
1121     }
1122
1123     if (signal_new_cfg) {
1124       DCHECK(frame->is_key_frame());
1125
1126       // First, complete the append to track buffer streams of the previous
1127       // coded frame group's frames, if any.
1128       if (!FlushProcessedFrames())
1129         return false;
1130
1131       if (pending_notify_all_group_start_) {
1132         NotifyStartOfCodedFrameGroup(decode_timestamp, presentation_timestamp);
1133         pending_notify_all_group_start_ = false;
1134       } else {
1135         DecodeTimestamp updated_dts = std::min(
1136             track_buffer->last_processed_decode_timestamp(), decode_timestamp);
1137         base::TimeDelta updated_pts = track_buffer->pending_group_start_pts();
1138         if (updated_pts == kNoTimestamp &&
1139             track_buffer->highest_presentation_timestamp() != kNoTimestamp &&
1140             track_buffer->highest_presentation_timestamp() <
1141                 presentation_timestamp) {
1142           updated_pts = track_buffer->highest_presentation_timestamp();
1143         }
1144         if (updated_pts == kNoTimestamp || updated_pts > presentation_timestamp)
1145           updated_pts = presentation_timestamp;
1146         track_buffer->NotifyStartOfCodedFrameGroup(updated_dts, updated_pts);
1147       }
1148     }
1149
1150     DVLOG(3) << __func__ << ": Enqueueing processed frame "
1151              << "PTS=" << presentation_timestamp.InMicroseconds()
1152              << "us, DTS=" << decode_timestamp.InMicroseconds() << "us";
1153
1154     // Steps 11-16: Note, we optimize by appending groups of contiguous
1155     // processed frames for each track buffer at end of ProcessFrames() or prior
1156     // to signalling coded frame group starts.
1157     if (!track_buffer->EnqueueProcessedFrame(std::move(frame)))
1158       return false;
1159
1160     // 17. Set last decode timestamp for track buffer to decode timestamp.
1161     track_buffer->set_last_decode_timestamp(decode_timestamp);
1162
1163     // 18. Set last frame duration for track buffer to frame duration.
1164     track_buffer->set_last_frame_duration(frame_duration);
1165
1166     // 19. If highest presentation timestamp for track buffer is unset or frame
1167     //     end timestamp is greater than highest presentation timestamp, then
1168     //     set highest presentation timestamp for track buffer to frame end
1169     //     timestamp.
1170     track_buffer->SetHighestPresentationTimestampIfIncreased(
1171         frame_end_timestamp);
1172
1173     // 20. If frame end timestamp is greater than group end timestamp, then set
1174     //     group end timestamp equal to frame end timestamp.
1175     if (frame_end_timestamp > group_end_timestamp_)
1176       group_end_timestamp_ = frame_end_timestamp;
1177     DCHECK(group_end_timestamp_ >= base::TimeDelta());
1178
1179     // TODO(wolenetz): Step 21 is currently approximated by predicted
1180     // frame_end_time by SourceBufferState::OnNewBuffers(). See
1181     // https://crbug.com/850316.
1182
1183     return true;
1184   }
1185
1186   NOTREACHED_NORETURN();
1187 }
1188
1189 }  // namespace media