media/filters/frame_processor.h

   1 // Copyright 2014 The Chromium Authors
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #ifndef MEDIA_FILTERS_FRAME_PROCESSOR_H_
   6 #define MEDIA_FILTERS_FRAME_PROCESSOR_H_
   7
   8 #include <map>
   9 #include <memory>
  10
  11 #include "base/callback_forward.h"
  12 #include "base/memory/raw_ptr.h"
  13 #include "base/time/time.h"
  14 #include "media/base/media_export.h"
  15 #include "media/base/media_log.h"
  16 #include "media/base/stream_parser.h"
  17 #include "media/filters/chunk_demuxer.h"
  18 #include "media/filters/source_buffer_parse_warnings.h"
  19
  20 namespace media {
  21
  22 class MseTrackBuffer;
  23
  24 // Helper class that implements Media Source Extension's coded frame processing
  25 // algorithm.
  26 class MEDIA_EXPORT FrameProcessor {
  27  public:
  28   using UpdateDurationCB = base::RepeatingCallback<void(base::TimeDelta)>;
  29
  30   FrameProcessor(UpdateDurationCB update_duration_cb, MediaLog* media_log);
  31
  32   FrameProcessor(const FrameProcessor&) = delete;
  33   FrameProcessor& operator=(const FrameProcessor&) = delete;
  34
  35   ~FrameProcessor();
  36
  37   // This must be called exactly once, before doing any track buffer creation or
  38   // frame processing.
  39   void SetParseWarningCallback(SourceBufferParseWarningCB parse_warning_cb);
  40
  41   // Get/set the current append mode, which if true means "sequence" and if
  42   // false means "segments".
  43   // See http://www.w3.org/TR/media-source/#widl-SourceBuffer-mode.
  44   bool sequence_mode() { return sequence_mode_; }
  45   void SetSequenceMode(bool sequence_mode);
  46
  47   // Processes buffers in |buffer_queue_map|.
  48   // Returns true on success or false on failure which indicates decode error.
  49   // |append_window_start| and |append_window_end| correspond to the MSE spec's
  50   // similarly named source buffer attributes that are used in coded frame
  51   // processing.
  52   // Uses |*timestamp_offset| according to the coded frame processing algorithm,
  53   // including updating it as required in 'sequence' mode frame processing.
  54   bool ProcessFrames(const StreamParser::BufferQueueMap& buffer_queue_map,
  55                      base::TimeDelta append_window_start,
  56                      base::TimeDelta append_window_end,
  57                      base::TimeDelta* timestamp_offset);
  58
  59   // Signals the frame processor to update its group start timestamp to be
  60   // |timestamp_offset| if it is in sequence append mode.
  61   void SetGroupStartTimestampIfInSequenceMode(base::TimeDelta timestamp_offset);
  62
  63   // Adds a new track with unique track ID |id|.
  64   // If |id| has previously been added, returns false to indicate error.
  65   // Otherwise, returns true, indicating future ProcessFrames() will emit
  66   // frames for the track |id| to |stream|.
  67   bool AddTrack(StreamParser::TrackId id, ChunkDemuxerStream* stream);
  68
  69   // A map that describes how track ids changed between init segment. Maps the
  70   // old track id for a new track id for the same track.
  71   using TrackIdChanges = std::map<StreamParser::TrackId, StreamParser::TrackId>;
  72
  73   // Updates the internal mapping of TrackIds to track buffers. The input
  74   // parameter |track_id_changes| maps old track ids to new ones. The track ids
  75   // not present in the map must be assumed unchanged. Returns false if
  76   // remapping failed.
  77   bool UpdateTrackIds(const TrackIdChanges& track_id_changes);
  78
  79   // Sets the need random access point flag on all track buffers to true.
  80   void SetAllTrackBuffersNeedRandomAccessPoint();
  81
  82   // Resets state for the coded frame processing algorithm as described in steps
  83   // 2-5 of the MSE Reset Parser State algorithm described at
  84   // http://www.w3.org/TR/media-source/#sourcebuffer-reset-parser-state
  85   void Reset();
  86
  87   // Must be called when the audio config is updated.  Used to manage when
  88   // the preroll buffer is cleared and the allowed "fudge" factor between
  89   // preroll buffers.
  90   void OnPossibleAudioConfigUpdate(const AudioDecoderConfig& config);
  91
  92  private:
  93   friend class FrameProcessorTest;
  94
  95   // If |track_buffers_| contains |id|, returns a pointer to the associated
  96   // MseTrackBuffer. Otherwise, returns NULL.
  97   MseTrackBuffer* FindTrack(StreamParser::TrackId id);
  98
  99   // Signals all track buffers' streams that a coded frame group is starting
 100   // with |start_dts| and |start_pts|.
 101   void NotifyStartOfCodedFrameGroup(DecodeTimestamp start_dts,
 102                                     base::TimeDelta start_pts);
 103
 104   // Helper that signals each track buffer to append any processed, but not yet
 105   // appended, frames to its stream. Returns true on success, or false if one or
 106   // more of the appends failed.
 107   bool FlushProcessedFrames();
 108
 109   // Handles partial append window trimming of |buffer|.  Returns true if the
 110   // given |buffer| can be partially trimmed or have preroll added; otherwise,
 111   // returns false.
 112   //
 113   // If |buffer| overlaps |append_window_start|, the portion of |buffer| before
 114   // |append_window_start| will be marked for post-decode discard.  Further, if
 115   // |audio_preroll_buffer_| exists and abuts |buffer|, it will be set as
 116   // preroll on |buffer| and |audio_preroll_buffer_| will be cleared.  If the
 117   // preroll buffer does not abut |buffer|, it will be discarded unused.
 118   //
 119   // Likewise, if |buffer| overlaps |append_window_end|, the portion of |buffer|
 120   // after |append_window_end| will be marked for post-decode discard.
 121   //
 122   // If |buffer| lies entirely before |append_window_start|, and thus would
 123   // normally be discarded, |audio_preroll_buffer_| will be updated and the
 124   // method will return false. In this case, the updated preroll will be
 125   // |buffer| iff |buffer| is a keyframe, otherwise the preroll will be cleared.
 126   bool HandlePartialAppendWindowTrimming(
 127       base::TimeDelta append_window_start,
 128       base::TimeDelta append_window_end,
 129       scoped_refptr<StreamParserBuffer> buffer);
 130
 131   // Enables rejection of audio frame streams with nonkeyframe timestamps that
 132   // do not monotonically increase since the last keyframe. Returns true if
 133   // |frame| appears to be in order, false if |frame|'s order is not supported.
 134   // |track_needs_random_access_point| should be the corresponding value for the
 135   // frame's track buffer. This helper should only be called when
 136   // |has_dependent_audio_frames_| is true, and only for an audio |frame|. This
 137   // method also uses and updates
 138   // |last_audio_pts_for_nonkeyframe_monotonicity_check_|.
 139   bool CheckAudioPresentationOrder(const StreamParserBuffer& frame,
 140                                    bool track_needs_random_access_point);
 141
 142   // Helper that processes one frame with the coded frame processing algorithm.
 143   // Returns false on error or true on success.
 144   bool ProcessFrame(scoped_refptr<StreamParserBuffer> frame,
 145                     base::TimeDelta append_window_start,
 146                     base::TimeDelta append_window_end,
 147                     base::TimeDelta* timestamp_offset);
 148
 149   // TrackId-indexed map of each track's stream.
 150   using TrackBuffersMap =
 151       std::map<StreamParser::TrackId, std::unique_ptr<MseTrackBuffer>>;
 152   TrackBuffersMap track_buffers_;
 153
 154   // The last audio buffer seen by the frame processor that was removed because
 155   // it was entirely before the start of the append window.
 156   scoped_refptr<StreamParserBuffer> audio_preroll_buffer_;
 157
 158   // The AudioDecoderConfig associated with buffers handed to ProcessFrames().
 159   // TODO(wolenetz): Associate current audio config and the derived
 160   // |has_dependent_audio_frames_|, |sample_duration_| and
 161   // |last_audio_pts_for_nonkeyframe_monotonicity_check_| with MseTrackBuffer
 162   // instead to enable handling more than 1 audio track in a SourceBuffer
 163   // simultaneously. See https://crbug.com/1081952.
 164   AudioDecoderConfig current_audio_config_;
 165   bool has_dependent_audio_frames_ = false;
 166   base::TimeDelta sample_duration_;
 167
 168   // When |has_dependent_audio_frames_| is true, holds the PTS of the last
 169   // successfully processed audio frame. If the next audio frame is not a
 170   // keyframe and has lower PTS, the stream is invalid. Currently, the only
 171   // supported audio streams that could contain nonkeyframes are in-order (PTS
 172   // increases monotonically since last keyframe), e.g. xHE-AAC.
 173   base::TimeDelta last_audio_pts_for_nonkeyframe_monotonicity_check_ =
 174       kNoTimestamp;
 175
 176   // The AppendMode of the associated SourceBuffer.
 177   // See SetSequenceMode() for interpretation of |sequence_mode_|.
 178   // Per http://www.w3.org/TR/media-source/#widl-SourceBuffer-mode:
 179   // Controls how a sequence of media segments are handled. This is initially
 180   // set to false ("segments").
 181   bool sequence_mode_ = false;
 182
 183   // Tracks whether or not we need to notify all track buffers of a new coded
 184   // frame group (see https://w3c.github.io/media-source/#coded-frame-group)
 185   // upon the next successfully processed frame.  Set true initially and upon
 186   // detection of DTS discontinuity, parser reset during 'segments' mode, or
 187   // switching from 'sequence' to 'segments' mode.  Individual track buffers can
 188   // also be notified of an updated coded frame group start in edge cases. See
 189   // further comments in ProcessFrame().
 190   bool pending_notify_all_group_start_ = true;
 191
 192   // Tracks the MSE coded frame processing variable of same name.
 193   // Initially kNoTimestamp, meaning "unset".
 194   base::TimeDelta group_start_timestamp_;
 195
 196   // Tracks the MSE coded frame processing variable of same name. It stores the
 197   // highest coded frame end timestamp across all coded frames in the current
 198   // coded frame group. It is set to 0 when the SourceBuffer object is created
 199   // and gets updated by ProcessFrames().
 200   base::TimeDelta group_end_timestamp_;
 201
 202   const UpdateDurationCB update_duration_cb_;
 203
 204   // MediaLog for reporting messages and properties to debug content and engine.
 205   raw_ptr<MediaLog> media_log_;
 206
 207   // Callback for reporting problematic conditions that are not necessarily
 208   // errors.
 209   SourceBufferParseWarningCB parse_warning_cb_;
 210
 211   // Counters that limit spam to |media_log_| for frame processor warnings.
 212   int num_dropped_preroll_warnings_ = 0;
 213   int num_audio_non_keyframe_warnings_ = 0;
 214   int num_muxed_sequence_mode_warnings_ = 0;
 215   int num_skipped_empty_frame_warnings_ = 0;
 216   int num_partial_discard_warnings_ = 0;
 217   int num_dropped_frame_warnings_ = 0;
 218 };
 219
 220 }  // namespace media
 221
 222 #endif  // MEDIA_FILTERS_FRAME_PROCESSOR_H_