media/renderers/audio_renderer_impl.h

   1 // Copyright 2012 The Chromium Authors
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 // Audio rendering unit utilizing an AudioRendererSink to output data.
   6 //
   7 // This class lives inside three threads during it's lifetime, namely:
   8 // 1. Render thread
   9 //    Where the object is created.
  10 // 2. Media thread (provided via constructor)
  11 //    All AudioDecoder methods are called on this thread.
  12 // 3. Audio thread created by the AudioRendererSink.
  13 //    Render() is called here where audio data is decoded into raw PCM data.
  14 //
  15 // AudioRendererImpl talks to an AudioRendererAlgorithm that takes care of
  16 // queueing audio data and stretching/shrinking audio data when playback rate !=
  17 // 1.0 or 0.0.
  18
  19 #ifndef MEDIA_RENDERERS_AUDIO_RENDERER_IMPL_H_
  20 #define MEDIA_RENDERERS_AUDIO_RENDERER_IMPL_H_
  21
  22 #include <stdint.h>
  23
  24 #include <memory>
  25
  26 #include "base/memory/raw_ptr.h"
  27 #include "base/memory/weak_ptr.h"
  28 #include "base/power_monitor/power_observer.h"
  29 #include "base/synchronization/lock.h"
  30 #include "base/task/sequenced_task_runner.h"
  31 #include "base/time/time.h"
  32 #include "build/build_config.h"
  33 #include "media/base/audio_decoder.h"
  34 #include "media/base/audio_decoder_config.h"
  35 #include "media/base/audio_renderer.h"
  36 #include "media/base/audio_renderer_sink.h"
  37 #include "media/base/decryptor.h"
  38 #include "media/base/media_log.h"
  39 #include "media/base/time_source.h"
  40 #include "media/filters/audio_renderer_algorithm.h"
  41 #include "media/filters/decoder_stream.h"
  42 #include "media/renderers/renderer_impl_factory.h"
  43 #include "third_party/abseil-cpp/absl/types/optional.h"
  44
  45 namespace base {
  46 class TickClock;
  47 }  // namespace base
  48
  49 namespace media {
  50
  51 class AudioBufferConverter;
  52 class AudioBus;
  53 class AudioClock;
  54 class NullAudioSink;
  55 class SpeechRecognitionClient;
  56
  57 class MEDIA_EXPORT AudioRendererImpl
  58     : public AudioRenderer,
  59       public TimeSource,
  60       public base::PowerSuspendObserver,
  61       public AudioRendererSink::RenderCallback {
  62  public:
  63   using PlayDelayCBForTesting = base::RepeatingCallback<void(base::TimeDelta)>;
  64
  65   // Send the audio to the speech recognition service for caption transcription.
  66   using TranscribeAudioCallback =
  67       base::RepeatingCallback<void(scoped_refptr<AudioBuffer>)>;
  68
  69   using EnableSpeechRecognitionCallback =
  70       base::OnceCallback<void(TranscribeAudioCallback)>;
  71
  72   // |task_runner| is the thread on which AudioRendererImpl will execute.
  73   //
  74   // |sink| is used as the destination for the rendered audio.
  75   //
  76   // |decoders| contains the AudioDecoders to use when initializing.
  77   AudioRendererImpl(
  78       const scoped_refptr<base::SequencedTaskRunner>& task_runner,
  79       AudioRendererSink* sink,
  80       const CreateAudioDecodersCB& create_audio_decoders_cb,
  81       MediaLog* media_log,
  82       MediaPlayerLoggingID media_player_id,
  83       SpeechRecognitionClient* speech_recognition_client = nullptr);
  84
  85   AudioRendererImpl(const AudioRendererImpl&) = delete;
  86   AudioRendererImpl& operator=(const AudioRendererImpl&) = delete;
  87
  88   ~AudioRendererImpl() override;
  89
  90   // TimeSource implementation.
  91   void StartTicking() override;
  92   void StopTicking() override;
  93   void SetPlaybackRate(double rate) override;
  94   void SetMediaTime(base::TimeDelta time) override;
  95   base::TimeDelta CurrentMediaTime() override;
  96   bool GetWallClockTimes(
  97       const std::vector<base::TimeDelta>& media_timestamps,
  98       std::vector<base::TimeTicks>* wall_clock_times) override;
  99
 100   // AudioRenderer implementation.
 101   void Initialize(DemuxerStream* stream,
 102                   CdmContext* cdm_context,
 103                   RendererClient* client,
 104                   PipelineStatusCallback init_cb) override;
 105   TimeSource* GetTimeSource() override;
 106   void Flush(base::OnceClosure callback) override;
 107   void StartPlaying() override;
 108   void SetVolume(float volume) override;
 109   void SetLatencyHint(absl::optional<base::TimeDelta> latency_hint) override;
 110   void SetPreservesPitch(bool preserves_pitch) override;
 111   void SetWasPlayedWithUserActivation(
 112       bool was_played_with_user_activation) override;
 113
 114   // base::PowerSuspendObserver implementation.
 115   void OnSuspend() override;
 116   void OnResume() override;
 117
 118   void SetPlayDelayCBForTesting(PlayDelayCBForTesting cb);
 119   bool was_unmuted_for_testing() const { return was_unmuted_; }
 120
 121   void decoded_audio_ready_for_testing() {
 122     DecodedAudioReady(DecoderStatus::Codes::kFailed);
 123   }
 124
 125  private:
 126   friend class AudioRendererImplTest;
 127
 128   // Important detail: being in kPlaying doesn't imply that audio is being
 129   // rendered. Rather, it means that the renderer is ready to go. The actual
 130   // rendering of audio is controlled via Start/StopRendering().
 131   // Audio renderer can be reinitialized completely by calling Initialize again
 132   // when it is in a kFlushed state.
 133   //
 134   //   kUninitialized
 135   //  +----> | Initialize()
 136   //  |      |
 137   //  |      V
 138   //  | kInitializing
 139   //  |      | Decoders initialized
 140   //  |      |
 141   //  |      V            Decoders reset
 142   //  +-  kFlushed <------------------ kFlushing
 143   //         | StartPlaying()             ^
 144   //         |                            |
 145   //         |                            | Flush()
 146   //         `---------> kPlaying --------'
 147   enum State { kUninitialized, kInitializing, kFlushing, kFlushed, kPlaying };
 148
 149   // Called after hardware device information is available.
 150   void OnDeviceInfoReceived(DemuxerStream* stream,
 151                             CdmContext* cdm_context,
 152                             OutputDeviceInfo output_device_info);
 153
 154   // Callback from the audio decoder delivering decoded audio samples.
 155   void DecodedAudioReady(AudioDecoderStream::ReadResult result);
 156
 157   // Handles buffers that come out of decoder (MSE: after passing through
 158   // |buffer_converter_|).
 159   // Returns true if more buffers are needed.
 160   bool HandleDecodedBuffer_Locked(scoped_refptr<AudioBuffer> buffer);
 161
 162   // Helper functions for DecodeStatus values passed to
 163   // DecodedAudioReady().
 164   void HandleAbortedReadOrDecodeError(PipelineStatus status);
 165
 166   void StartRendering_Locked();
 167   void StopRendering_Locked();
 168
 169   // AudioRendererSink::RenderCallback implementation.
 170   //
 171   // NOTE: These are called on the audio callback thread!
 172   //
 173   // Render() fills the given buffer with audio data by delegating to its
 174   // |algorithm_|. Render() also takes care of updating the clock.
 175   // Returns the number of frames copied into |audio_bus|, which may be less
 176   // than or equal to the initial number of frames in |audio_bus|
 177   //
 178   // If this method returns fewer frames than the initial number of frames in
 179   // |audio_bus|, it could be a sign that the pipeline is stalled or unable to
 180   // stream the data fast enough.  In such scenarios, the callee should zero out
 181   // unused portions of their buffer to play back silence.
 182   //
 183   // Render() updates the pipeline's playback timestamp. If Render() is
 184   // not called at the same rate as audio samples are played, then the reported
 185   // timestamp in the pipeline will be ahead of the actual audio playback. In
 186   // this case |delay| should be used to indicate when in the future
 187   // should the filled buffer be played.
 188   int Render(base::TimeDelta delay,
 189              base::TimeTicks delay_timestamp,
 190              const AudioGlitchInfo& glitch_info,
 191              AudioBus* dest) override;
 192   void OnRenderError() override;
 193
 194   // Helper methods that schedule an asynchronous read from the decoder as long
 195   // as there isn't a pending read.
 196   //
 197   // Must be called on |task_runner_|.
 198   void AttemptRead();
 199   void AttemptRead_Locked();
 200   bool CanRead_Locked();
 201   void ChangeState_Locked(State new_state);
 202
 203   // Returns true if the data in the buffer is all before |start_timestamp_|.
 204   // This can only return true while in the kPlaying state.
 205   bool IsBeforeStartTime(const AudioBuffer& buffer);
 206
 207   // Called upon AudioDecoderStream initialization, or failure thereof
 208   // (indicated by the value of |success|).
 209   void OnAudioDecoderStreamInitialized(bool succes);
 210
 211   void FinishInitialization(PipelineStatus status);
 212   void FinishFlush();
 213
 214   // Callback functions to be called on |client_|.
 215   void OnPlaybackError(PipelineStatus error);
 216   void OnPlaybackEnded();
 217   void OnStatisticsUpdate(const PipelineStatistics& stats);
 218   void OnBufferingStateChange(BufferingState state);
 219   void OnWaiting(WaitingReason reason);
 220
 221   // Generally called by the AudioDecoderStream when a config change occurs. May
 222   // also be called internally with an empty config to reset config-based state.
 223   // Will notify RenderClient when called with a valid config.
 224   void OnConfigChange(const AudioDecoderConfig& config);
 225
 226   // Used to initiate the flush operation once all pending reads have
 227   // completed.
 228   void DoFlush_Locked();
 229
 230   // Called when the |decoder_|.Reset() has completed.
 231   void ResetDecoderDone();
 232
 233   // Updates |buffering_state_| and fires |buffering_state_cb_|.
 234   void SetBufferingState_Locked(BufferingState buffering_state);
 235
 236   // Configure's the channel mask for |algorithm_|. Must be called if the layout
 237   // changes. Expect the layout in |last_decoded_channel_layout_|.
 238   void ConfigureChannelMask();
 239
 240   void EnableSpeechRecognition();
 241   void TranscribeAudio(scoped_refptr<media::AudioBuffer> buffer);
 242
 243   scoped_refptr<base::SequencedTaskRunner> task_runner_;
 244
 245   std::unique_ptr<AudioBufferConverter> buffer_converter_;
 246
 247   // Whether or not we expect to handle config changes.
 248   bool expecting_config_changes_;
 249
 250   // Stores the last decoder config that was passed to
 251   // RendererClient::OnAudioConfigChange. Used to prevent signaling config
 252   // to the upper layers when when the new config is the same.
 253   AudioDecoderConfig current_decoder_config_;
 254
 255   // The sink (destination) for rendered audio. |sink_| must only be accessed
 256   // on |task_runner_|. |sink_| must never be called under |lock_| or else we
 257   // may deadlock between |task_runner_| and the audio callback thread.
 258   //
 259   // When a muted playback starts up, |sink_| will be unused until the playback
 260   // is unmuted. During this time |null_sink_| will be used.
 261   scoped_refptr<AudioRendererSink> sink_;
 262
 263   // For muted playbacks we don't use a real sink. Unused if the playback is
 264   // unmuted.
 265   scoped_refptr<NullAudioSink> null_sink_;
 266
 267   // True if |sink_| has not yet been started.
 268   bool real_sink_needs_start_;
 269
 270   std::unique_ptr<AudioDecoderStream> audio_decoder_stream_;
 271
 272   // This dangling raw_ptr occurred in:
 273   // Webkit_unit_tests: WebMediaPlayerImplTest.MediaPositionState_Playing
 274   // https://ci.chromium.org/ui/p/chromium/builders/try/linux-rel/1425332/test-results?q=ExactID%3Aninja%3A%2F%2Fthird_party%2Fblink%2Frenderer%2Fcontroller%3Ablink_unittests%2FWebMediaPlayerImplTest.MediaPositionState_Playing+VHash%3A896f1103f2d1008d
 275   raw_ptr<MediaLog, FlakyDanglingUntriaged> media_log_;
 276
 277   MediaPlayerLoggingID player_id_;
 278
 279   // Cached copy of audio params that the renderer is initialized with.
 280   AudioParameters audio_parameters_;
 281
 282   // Passed in during Initialize().
 283   raw_ptr<DemuxerStream> demuxer_stream_;
 284
 285   raw_ptr<RendererClient> client_;
 286
 287   // Callback provided during Initialize().
 288   PipelineStatusCallback init_cb_;
 289
 290   // Callback provided to Flush().
 291   base::OnceClosure flush_cb_;
 292
 293   // Overridable tick clock for testing.
 294   raw_ptr<const base::TickClock> tick_clock_;
 295
 296   // Memory usage of |algorithm_| recorded during the last
 297   // HandleDecodedBuffer_Locked() call.
 298   int64_t last_audio_memory_usage_;
 299
 300   // Sample rate of the last decoded audio buffer. Allows for detection of
 301   // sample rate changes due to implicit AAC configuration change.
 302   int last_decoded_sample_rate_;
 303
 304   // Similar to |last_decoded_sample_rate_|, used to configure the channel mask
 305   // given to the |algorithm_| for efficient playback rate changes.
 306   ChannelLayout last_decoded_channel_layout_;
 307
 308   // Whether the stream is possibly encrypted.
 309   bool is_encrypted_;
 310
 311   // Similar to |last_decoded_channel_layout_|, used to configure the channel
 312   // mask given to the |algorithm_| for efficient playback rate changes.
 313   int last_decoded_channels_;
 314
 315   // Cached volume provided by SetVolume().
 316   float volume_;
 317
 318   // A flag indicating whether the audio stream was ever unmuted.
 319   bool was_unmuted_ = false;
 320
 321   // After Initialize() has completed, all variables below must be accessed
 322   // under |lock_|. ------------------------------------------------------------
 323   base::Lock lock_;
 324
 325   // Algorithm for scaling audio.
 326   double playback_rate_;
 327   std::unique_ptr<AudioRendererAlgorithm> algorithm_;
 328
 329   // Stored value from last call to SetLatencyHint(). Passed to |algorithm_|
 330   // during Initialize().
 331   absl::optional<base::TimeDelta> latency_hint_;
 332
 333   // Passed to |algorithm_|. Indicates whether |algorithm_| should or should not
 334   // make pitch adjustments at playbacks other than 1.0.
 335   bool preserves_pitch_ = true;
 336
 337   bool was_played_with_user_activation_ = false;
 338
 339   // Simple state tracking variable.
 340   State state_;
 341
 342   // TODO(servolk): Consider using DecoderFactory here instead of the
 343   // CreateAudioDecodersCB.
 344   CreateAudioDecodersCB create_audio_decoders_cb_;
 345
 346   BufferingState buffering_state_;
 347
 348   // Keep track of whether or not the sink is playing and whether we should be
 349   // rendering.
 350   bool rendering_;
 351   bool sink_playing_;
 352
 353   // Keep track of our outstanding read to |decoder_|.
 354   bool pending_read_;
 355
 356   // Keeps track of whether we received and rendered the end of stream buffer.
 357   bool received_end_of_stream_;
 358   bool rendered_end_of_stream_;
 359
 360   std::unique_ptr<AudioClock> audio_clock_;
 361
 362   // The media timestamp to begin playback at after seeking. Set via
 363   // SetMediaTime().
 364   base::TimeDelta start_timestamp_;
 365
 366   // The media timestamp to signal end of audio playback. Determined during
 367   // Render() when writing the final frames of decoded audio data.
 368   base::TimeDelta ended_timestamp_;
 369
 370   // Set every Render() and used to provide an interpolated time value to
 371   // CurrentMediaTimeForSyncingVideo().
 372   base::TimeTicks last_render_time_;
 373
 374   // Set to the value of |last_render_time_| when StopRendering_Locked() is
 375   // called for any reason.  Cleared by the next successful Render() call after
 376   // being used to adjust for lost time between the last call.
 377   base::TimeTicks stop_rendering_time_;
 378
 379   // Set upon receipt of the first decoded buffer after a StartPlayingFrom().
 380   // Used to determine how long to delay playback.
 381   base::TimeDelta first_packet_timestamp_;
 382
 383   // Set by OnSuspend() and OnResume() to indicate when the system is about to
 384   // suspend/is suspended and when it resumes.
 385   bool is_suspending_;
 386
 387   // Whether to pass compressed audio bitstream to audio sink directly.
 388   bool is_passthrough_;
 389
 390   // Set and used only in tests to report positive play_delay values in
 391   // Render().
 392   PlayDelayCBForTesting play_delay_cb_for_testing_;
 393
 394   // End variables which must be accessed under |lock_|. ----------------------
 395
 396 #if !BUILDFLAG(IS_ANDROID)
 397   raw_ptr<SpeechRecognitionClient> speech_recognition_client_;
 398   TranscribeAudioCallback transcribe_audio_callback_;
 399 #endif
 400
 401   // Ensures we don't issue log spam when absurd delay values are encountered.
 402   int num_absurd_delay_warnings_ = 0;
 403
 404   // NOTE: Weak pointers must be invalidated before all other member variables.
 405   base::WeakPtrFactory<AudioRendererImpl> weak_factory_{this};
 406 };
 407
 408 }  // namespace media
 409
 410 #endif  // MEDIA_RENDERERS_AUDIO_RENDERER_IMPL_H_