1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/renderer/media/webrtc_audio_renderer.h"
7 #include "base/logging.h"
8 #include "base/metrics/histogram.h"
9 #include "base/strings/string_util.h"
10 #include "base/strings/stringprintf.h"
11 #include "content/renderer/media/audio_device_factory.h"
12 #include "content/renderer/media/webrtc_audio_device_impl.h"
13 #include "content/renderer/media/webrtc_logging.h"
14 #include "media/audio/audio_output_device.h"
15 #include "media/audio/audio_parameters.h"
16 #include "media/audio/sample_rates.h"
17 #include "third_party/libjingle/source/talk/app/webrtc/mediastreaminterface.h"
18 #include "third_party/libjingle/source/talk/media/base/audiorenderer.h"
22 #include "base/win/windows_version.h"
23 #include "media/audio/win/core_audio_util_win.h"
30 // Supported hardware sample rates for output sides.
31 #if defined(OS_WIN) || defined(OS_MACOSX)
32 // AudioHardwareConfig::GetOutputSampleRate() asks the audio layer for its
33 // current sample rate (set by the user) on Windows and Mac OS X. The listed
34 // rates below adds restrictions and Initialize() will fail if the user selects
35 // any rate outside these ranges.
36 const int kValidOutputRates[] = {96000, 48000, 44100, 32000, 16000};
37 #elif defined(OS_LINUX) || defined(OS_OPENBSD)
38 const int kValidOutputRates[] = {48000, 44100};
39 #elif defined(OS_ANDROID)
40 // TODO(leozwang): We want to use native sampling rate on Android to achieve
41 // low latency, currently 16000 is used to work around audio problem on some
43 const int kValidOutputRates[] = {48000, 44100, 16000};
45 const int kValidOutputRates[] = {44100};
48 // TODO(xians): Merge the following code to WebRtcAudioCapturer, or remove.
49 enum AudioFramesPerBuffer {
59 kUnexpectedAudioBufferSize // Must always be last!
62 // Helper method to convert integral values to their respective enum values
63 // above, or kUnexpectedAudioBufferSize if no match exists.
64 // We map 441 to k440 to avoid changes in the XML part for histograms.
65 // It is still possible to map the histogram result to the actual buffer size.
66 // See http://crbug.com/243450 for details.
67 AudioFramesPerBuffer AsAudioFramesPerBuffer(int frames_per_buffer) {
68 switch (frames_per_buffer) {
69 case 160: return k160;
70 case 320: return k320;
71 case 441: return k440;
72 case 480: return k480;
73 case 640: return k640;
74 case 880: return k880;
75 case 960: return k960;
76 case 1440: return k1440;
77 case 1920: return k1920;
79 return kUnexpectedAudioBufferSize;
82 void AddHistogramFramesPerBuffer(int param) {
83 AudioFramesPerBuffer afpb = AsAudioFramesPerBuffer(param);
84 if (afpb != kUnexpectedAudioBufferSize) {
85 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",
86 afpb, kUnexpectedAudioBufferSize);
88 // Report unexpected sample rates using a unique histogram name.
89 UMA_HISTOGRAM_COUNTS("WebRTC.AudioOutputFramesPerBufferUnexpected", param);
93 // This is a simple wrapper class that's handed out to users of a shared
94 // WebRtcAudioRenderer instance. This class maintains the per-user 'playing'
95 // and 'started' states to avoid problems related to incorrect usage which
96 // might violate the implementation assumptions inside WebRtcAudioRenderer
97 // (see the play reference count).
98 class SharedAudioRenderer : public MediaStreamAudioRenderer {
100 // Callback definition for a callback that is called when when Play(), Pause()
101 // or SetVolume are called (whenever the internal |playing_state_| changes).
102 typedef base::Callback<
103 void(const scoped_refptr<webrtc::MediaStreamInterface>&,
104 WebRtcAudioRenderer::PlayingState*)> OnPlayStateChanged;
107 const scoped_refptr<MediaStreamAudioRenderer>& delegate,
108 const scoped_refptr<webrtc::MediaStreamInterface>& media_stream,
109 const OnPlayStateChanged& on_play_state_changed)
110 : delegate_(delegate), media_stream_(media_stream), started_(false),
111 on_play_state_changed_(on_play_state_changed) {
112 DCHECK(!on_play_state_changed_.is_null());
113 DCHECK(media_stream_.get());
117 virtual ~SharedAudioRenderer() {
118 DCHECK(thread_checker_.CalledOnValidThread());
119 DVLOG(1) << __FUNCTION__;
123 virtual void Start() OVERRIDE {
124 DCHECK(thread_checker_.CalledOnValidThread());
131 virtual void Play() OVERRIDE {
132 DCHECK(thread_checker_.CalledOnValidThread());
134 if (playing_state_.playing())
136 playing_state_.set_playing(true);
137 on_play_state_changed_.Run(media_stream_, &playing_state_);
140 virtual void Pause() OVERRIDE {
141 DCHECK(thread_checker_.CalledOnValidThread());
143 if (!playing_state_.playing())
145 playing_state_.set_playing(false);
146 on_play_state_changed_.Run(media_stream_, &playing_state_);
149 virtual void Stop() OVERRIDE {
150 DCHECK(thread_checker_.CalledOnValidThread());
158 virtual void SetVolume(float volume) OVERRIDE {
159 DCHECK(thread_checker_.CalledOnValidThread());
160 DCHECK(volume >= 0.0f && volume <= 1.0f);
161 playing_state_.set_volume(volume);
162 on_play_state_changed_.Run(media_stream_, &playing_state_);
165 virtual base::TimeDelta GetCurrentRenderTime() const OVERRIDE {
166 DCHECK(thread_checker_.CalledOnValidThread());
167 return delegate_->GetCurrentRenderTime();
170 virtual bool IsLocalRenderer() const OVERRIDE {
171 DCHECK(thread_checker_.CalledOnValidThread());
172 return delegate_->IsLocalRenderer();
176 base::ThreadChecker thread_checker_;
177 const scoped_refptr<MediaStreamAudioRenderer> delegate_;
178 const scoped_refptr<webrtc::MediaStreamInterface> media_stream_;
180 WebRtcAudioRenderer::PlayingState playing_state_;
181 OnPlayStateChanged on_play_state_changed_;
186 WebRtcAudioRenderer::WebRtcAudioRenderer(
187 const scoped_refptr<webrtc::MediaStreamInterface>& media_stream,
188 int source_render_view_id,
189 int source_render_frame_id,
192 int frames_per_buffer)
193 : state_(UNINITIALIZED),
194 source_render_view_id_(source_render_view_id),
195 source_render_frame_id_(source_render_frame_id),
196 session_id_(session_id),
197 media_stream_(media_stream),
201 audio_delay_milliseconds_(0),
202 fifo_delay_milliseconds_(0),
203 sink_params_(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
204 media::CHANNEL_LAYOUT_STEREO, 0, sample_rate, 16,
205 frames_per_buffer, media::AudioParameters::DUCKING) {
206 WebRtcLogMessage(base::StringPrintf(
207 "WAR::WAR. source_render_view_id=%d"
208 ", session_id=%d, sample_rate=%d, frames_per_buffer=%d",
209 source_render_view_id,
215 WebRtcAudioRenderer::~WebRtcAudioRenderer() {
216 DCHECK(thread_checker_.CalledOnValidThread());
217 DCHECK_EQ(state_, UNINITIALIZED);
220 bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) {
221 DVLOG(1) << "WebRtcAudioRenderer::Initialize()";
222 DCHECK(thread_checker_.CalledOnValidThread());
223 base::AutoLock auto_lock(lock_);
224 DCHECK_EQ(state_, UNINITIALIZED);
226 DCHECK(!sink_.get());
229 // WebRTC does not yet support higher rates than 96000 on the client side
230 // and 48000 is the preferred sample rate. Therefore, if 192000 is detected,
231 // we change the rate to 48000 instead. The consequence is that the native
232 // layer will be opened up at 192kHz but WebRTC will provide data at 48kHz
233 // which will then be resampled by the audio converted on the browser side
234 // to match the native audio layer.
235 int sample_rate = sink_params_.sample_rate();
236 DVLOG(1) << "Audio output hardware sample rate: " << sample_rate;
237 if (sample_rate == 192000) {
238 DVLOG(1) << "Resampling from 48000 to 192000 is required";
241 media::AudioSampleRate asr;
242 if (media::ToAudioSampleRate(sample_rate, &asr)) {
243 UMA_HISTOGRAM_ENUMERATION(
244 "WebRTC.AudioOutputSampleRate", asr, media::kAudioSampleRateMax + 1);
246 UMA_HISTOGRAM_COUNTS("WebRTC.AudioOutputSampleRateUnexpected",
250 // Verify that the reported output hardware sample rate is supported
251 // on the current platform.
252 if (std::find(&kValidOutputRates[0],
253 &kValidOutputRates[0] + arraysize(kValidOutputRates),
255 &kValidOutputRates[arraysize(kValidOutputRates)]) {
256 DLOG(ERROR) << sample_rate << " is not a supported output rate.";
260 // Set up audio parameters for the source, i.e., the WebRTC client.
262 // The WebRTC client only supports multiples of 10ms as buffer size where
263 // 10ms is preferred for lowest possible delay.
264 media::AudioParameters source_params;
265 const int frames_per_10ms = (sample_rate / 100);
266 DVLOG(1) << "Using WebRTC output buffer size: " << frames_per_10ms;
268 source_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
269 sink_params_.channel_layout(), sink_params_.channels(), 0,
270 sample_rate, 16, frames_per_10ms);
272 // Update audio parameters for the sink, i.e., the native audio output stream.
273 // We strive to open up using native parameters to achieve best possible
274 // performance and to ensure that no FIFO is needed on the browser side to
275 // match the client request. Any mismatch between the source and the sink is
276 // taken care of in this class instead using a pull FIFO.
278 // Use native output size as default.
279 int frames_per_buffer = sink_params_.frames_per_buffer();
280 #if defined(OS_ANDROID)
281 // TODO(henrika): Keep tuning this scheme and espcicially for low-latency
282 // cases. Might not be possible to come up with the perfect solution using
283 // the render side only.
284 if (frames_per_buffer < 2 * frames_per_10ms) {
285 // Examples of low-latency frame sizes and the resulting |buffer_size|:
286 // Nexus 7 : 240 audio frames => 2*480 = 960
287 // Nexus 10 : 256 => 2*441 = 882
288 // Galaxy Nexus: 144 => 2*441 = 882
289 frames_per_buffer = 2 * frames_per_10ms;
290 DVLOG(1) << "Low-latency output detected on Android";
293 DVLOG(1) << "Using sink output buffer size: " << frames_per_buffer;
295 sink_params_.Reset(sink_params_.format(), sink_params_.channel_layout(),
296 sink_params_.channels(), 0, sample_rate, 16,
299 // Create a FIFO if re-buffering is required to match the source input with
300 // the sink request. The source acts as provider here and the sink as
302 fifo_delay_milliseconds_ = 0;
303 if (source_params.frames_per_buffer() != sink_params_.frames_per_buffer()) {
304 DVLOG(1) << "Rebuffering from " << source_params.frames_per_buffer()
305 << " to " << sink_params_.frames_per_buffer();
306 audio_fifo_.reset(new media::AudioPullFifo(
307 source_params.channels(),
308 source_params.frames_per_buffer(),
310 &WebRtcAudioRenderer::SourceCallback,
311 base::Unretained(this))));
313 if (sink_params_.frames_per_buffer() > source_params.frames_per_buffer()) {
314 int frame_duration_milliseconds = base::Time::kMillisecondsPerSecond /
315 static_cast<double>(source_params.sample_rate());
316 fifo_delay_milliseconds_ = (sink_params_.frames_per_buffer() -
317 source_params.frames_per_buffer()) * frame_duration_milliseconds;
323 // Configure the audio rendering client and start rendering.
324 sink_ = AudioDeviceFactory::NewOutputDevice(
325 source_render_view_id_, source_render_frame_id_);
327 // TODO(tommi): Rename InitializeUnifiedStream to rather reflect association
329 DCHECK_GE(session_id_, 0);
330 sink_->InitializeUnifiedStream(sink_params_, this, session_id_);
334 // User must call Play() before any audio can be heard.
337 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",
338 source_params.frames_per_buffer(),
339 kUnexpectedAudioBufferSize);
340 AddHistogramFramesPerBuffer(source_params.frames_per_buffer());
345 scoped_refptr<MediaStreamAudioRenderer>
346 WebRtcAudioRenderer::CreateSharedAudioRendererProxy(
347 const scoped_refptr<webrtc::MediaStreamInterface>& media_stream) {
348 content::SharedAudioRenderer::OnPlayStateChanged on_play_state_changed =
349 base::Bind(&WebRtcAudioRenderer::OnPlayStateChanged, this);
350 return new SharedAudioRenderer(this, media_stream, on_play_state_changed);
353 bool WebRtcAudioRenderer::IsStarted() const {
354 DCHECK(thread_checker_.CalledOnValidThread());
355 return start_ref_count_ != 0;
358 void WebRtcAudioRenderer::Start() {
359 DVLOG(1) << "WebRtcAudioRenderer::Start()";
360 DCHECK(thread_checker_.CalledOnValidThread());
364 void WebRtcAudioRenderer::Play() {
365 DVLOG(1) << "WebRtcAudioRenderer::Play()";
366 DCHECK(thread_checker_.CalledOnValidThread());
368 if (playing_state_.playing())
371 playing_state_.set_playing(true);
373 OnPlayStateChanged(media_stream_, &playing_state_);
376 void WebRtcAudioRenderer::EnterPlayState() {
377 DVLOG(1) << "WebRtcAudioRenderer::EnterPlayState()";
378 DCHECK(thread_checker_.CalledOnValidThread());
379 DCHECK_GT(start_ref_count_, 0) << "Did you forget to call Start()?";
380 base::AutoLock auto_lock(lock_);
381 if (state_ == UNINITIALIZED)
384 DCHECK(play_ref_count_ == 0 || state_ == PLAYING);
387 if (state_ != PLAYING) {
391 audio_delay_milliseconds_ = 0;
392 audio_fifo_->Clear();
397 void WebRtcAudioRenderer::Pause() {
398 DVLOG(1) << "WebRtcAudioRenderer::Pause()";
399 DCHECK(thread_checker_.CalledOnValidThread());
400 if (!playing_state_.playing())
403 playing_state_.set_playing(false);
405 OnPlayStateChanged(media_stream_, &playing_state_);
408 void WebRtcAudioRenderer::EnterPauseState() {
409 DVLOG(1) << "WebRtcAudioRenderer::EnterPauseState()";
410 DCHECK(thread_checker_.CalledOnValidThread());
411 DCHECK_GT(start_ref_count_, 0) << "Did you forget to call Start()?";
412 base::AutoLock auto_lock(lock_);
413 if (state_ == UNINITIALIZED)
416 DCHECK_EQ(state_, PLAYING);
417 DCHECK_GT(play_ref_count_, 0);
418 if (!--play_ref_count_)
422 void WebRtcAudioRenderer::Stop() {
423 DVLOG(1) << "WebRtcAudioRenderer::Stop()";
424 DCHECK(thread_checker_.CalledOnValidThread());
426 base::AutoLock auto_lock(lock_);
427 if (state_ == UNINITIALIZED)
430 if (--start_ref_count_)
433 DVLOG(1) << "Calling RemoveAudioRenderer and Stop().";
435 source_->RemoveAudioRenderer(this);
437 state_ = UNINITIALIZED;
440 // Make sure to stop the sink while _not_ holding the lock since the Render()
441 // callback may currently be executing and try to grab the lock while we're
442 // stopping the thread on which it runs.
446 void WebRtcAudioRenderer::SetVolume(float volume) {
447 DCHECK(thread_checker_.CalledOnValidThread());
448 DCHECK(volume >= 0.0f && volume <= 1.0f);
450 playing_state_.set_volume(volume);
451 OnPlayStateChanged(media_stream_, &playing_state_);
454 base::TimeDelta WebRtcAudioRenderer::GetCurrentRenderTime() const {
455 return base::TimeDelta();
458 bool WebRtcAudioRenderer::IsLocalRenderer() const {
462 int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus,
463 int audio_delay_milliseconds) {
464 base::AutoLock auto_lock(lock_);
468 DVLOG(2) << "WebRtcAudioRenderer::Render()";
469 DVLOG(2) << "audio_delay_milliseconds: " << audio_delay_milliseconds;
471 audio_delay_milliseconds_ = audio_delay_milliseconds;
474 audio_fifo_->Consume(audio_bus, audio_bus->frames());
476 SourceCallback(0, audio_bus);
478 return (state_ == PLAYING) ? audio_bus->frames() : 0;
481 void WebRtcAudioRenderer::OnRenderError() {
483 LOG(ERROR) << "OnRenderError()";
486 // Called by AudioPullFifo when more data is necessary.
487 void WebRtcAudioRenderer::SourceCallback(
488 int fifo_frame_delay, media::AudioBus* audio_bus) {
489 DVLOG(2) << "WebRtcAudioRenderer::SourceCallback("
490 << fifo_frame_delay << ", "
491 << audio_bus->frames() << ")";
493 int output_delay_milliseconds = audio_delay_milliseconds_;
494 output_delay_milliseconds += fifo_delay_milliseconds_;
495 DVLOG(2) << "output_delay_milliseconds: " << output_delay_milliseconds;
497 // We need to keep render data for the |source_| regardless of |state_|,
498 // otherwise the data will be buffered up inside |source_|.
499 source_->RenderData(audio_bus, sink_params_.sample_rate(),
500 output_delay_milliseconds);
502 // Avoid filling up the audio bus if we are not playing; instead
503 // return here and ensure that the returned value in Render() is 0.
504 if (state_ != PLAYING)
508 void WebRtcAudioRenderer::UpdateSourceVolume(
509 webrtc::AudioSourceInterface* source) {
510 DCHECK(thread_checker_.CalledOnValidThread());
512 // Note: If there are no playing audio renderers, then the volume will be
516 SourcePlayingStates::iterator entry = source_playing_states_.find(source);
517 if (entry != source_playing_states_.end()) {
518 PlayingStates& states = entry->second;
519 for (PlayingStates::const_iterator it = states.begin();
520 it != states.end(); ++it) {
521 if ((*it)->playing())
522 volume += (*it)->volume();
526 // The valid range for volume scaling of a remote webrtc source is
527 // 0.0-10.0 where 1.0 is no attenuation/boost.
528 DCHECK(volume >= 0.0f);
532 DVLOG(1) << "Setting remote source volume: " << volume;
533 source->SetVolume(volume);
536 bool WebRtcAudioRenderer::AddPlayingState(
537 webrtc::AudioSourceInterface* source,
538 PlayingState* state) {
539 DCHECK(thread_checker_.CalledOnValidThread());
540 DCHECK(state->playing());
541 // Look up or add the |source| to the map.
542 PlayingStates& array = source_playing_states_[source];
543 if (std::find(array.begin(), array.end(), state) != array.end())
546 array.push_back(state);
551 bool WebRtcAudioRenderer::RemovePlayingState(
552 webrtc::AudioSourceInterface* source,
553 PlayingState* state) {
554 DCHECK(thread_checker_.CalledOnValidThread());
555 DCHECK(!state->playing());
556 SourcePlayingStates::iterator found = source_playing_states_.find(source);
557 if (found == source_playing_states_.end())
560 PlayingStates& array = found->second;
561 PlayingStates::iterator state_it =
562 std::find(array.begin(), array.end(), state);
563 if (state_it == array.end())
566 array.erase(state_it);
569 source_playing_states_.erase(found);
574 void WebRtcAudioRenderer::OnPlayStateChanged(
575 const scoped_refptr<webrtc::MediaStreamInterface>& media_stream,
576 PlayingState* state) {
577 webrtc::AudioTrackVector tracks(media_stream->GetAudioTracks());
578 for (webrtc::AudioTrackVector::iterator it = tracks.begin();
579 it != tracks.end(); ++it) {
580 webrtc::AudioSourceInterface* source = (*it)->GetSource();
582 if (!state->playing()) {
583 if (RemovePlayingState(source, state))
585 } else if (AddPlayingState(source, state)) {
588 UpdateSourceVolume(source);
592 } // namespace content