1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/renderer/media/webrtc_audio_renderer.h"
7 #include "base/logging.h"
8 #include "base/metrics/histogram.h"
9 #include "base/strings/string_util.h"
10 #include "base/strings/stringprintf.h"
11 #include "content/renderer/media/audio_device_factory.h"
12 #include "content/renderer/media/webrtc_audio_device_impl.h"
13 #include "content/renderer/media/webrtc_logging.h"
14 #include "media/audio/audio_output_device.h"
15 #include "media/audio/audio_parameters.h"
16 #include "media/audio/sample_rates.h"
17 #include "third_party/libjingle/source/talk/app/webrtc/mediastreaminterface.h"
18 #include "third_party/libjingle/source/talk/media/base/audiorenderer.h"
22 #include "base/win/windows_version.h"
23 #include "media/audio/win/core_audio_util_win.h"
30 // Supported hardware sample rates for output sides.
31 #if defined(OS_WIN) || defined(OS_MACOSX)
32 // AudioHardwareConfig::GetOutputSampleRate() asks the audio layer for its
33 // current sample rate (set by the user) on Windows and Mac OS X. The listed
34 // rates below adds restrictions and Initialize() will fail if the user selects
35 // any rate outside these ranges.
36 const int kValidOutputRates[] = {96000, 48000, 44100, 32000, 16000};
37 #elif defined(OS_LINUX) || defined(OS_OPENBSD)
38 const int kValidOutputRates[] = {48000, 44100};
39 #elif defined(OS_ANDROID)
40 // TODO(leozwang): We want to use native sampling rate on Android to achieve
41 // low latency, currently 16000 is used to work around audio problem on some
43 const int kValidOutputRates[] = {48000, 44100, 16000};
45 const int kValidOutputRates[] = {44100};
48 // TODO(xians): Merge the following code to WebRtcAudioCapturer, or remove.
49 enum AudioFramesPerBuffer {
59 kUnexpectedAudioBufferSize // Must always be last!
62 // Helper method to convert integral values to their respective enum values
63 // above, or kUnexpectedAudioBufferSize if no match exists.
64 // We map 441 to k440 to avoid changes in the XML part for histograms.
65 // It is still possible to map the histogram result to the actual buffer size.
66 // See http://crbug.com/243450 for details.
67 AudioFramesPerBuffer AsAudioFramesPerBuffer(int frames_per_buffer) {
68 switch (frames_per_buffer) {
69 case 160: return k160;
70 case 320: return k320;
71 case 441: return k440;
72 case 480: return k480;
73 case 640: return k640;
74 case 880: return k880;
75 case 960: return k960;
76 case 1440: return k1440;
77 case 1920: return k1920;
79 return kUnexpectedAudioBufferSize;
82 void AddHistogramFramesPerBuffer(int param) {
83 AudioFramesPerBuffer afpb = AsAudioFramesPerBuffer(param);
84 if (afpb != kUnexpectedAudioBufferSize) {
85 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",
86 afpb, kUnexpectedAudioBufferSize);
88 // Report unexpected sample rates using a unique histogram name.
89 UMA_HISTOGRAM_COUNTS("WebRTC.AudioOutputFramesPerBufferUnexpected", param);
93 // This is a simple wrapper class that's handed out to users of a shared
94 // WebRtcAudioRenderer instance. This class maintains the per-user 'playing'
95 // and 'started' states to avoid problems related to incorrect usage which
96 // might violate the implementation assumptions inside WebRtcAudioRenderer
97 // (see the play reference count).
98 class SharedAudioRenderer : public MediaStreamAudioRenderer {
100 // Callback definition for a callback that is called when when Play(), Pause()
101 // or SetVolume are called (whenever the internal |playing_state_| changes).
102 typedef base::Callback<
103 void(const scoped_refptr<webrtc::MediaStreamInterface>&,
104 WebRtcAudioRenderer::PlayingState*)> OnPlayStateChanged;
107 const scoped_refptr<MediaStreamAudioRenderer>& delegate,
108 const scoped_refptr<webrtc::MediaStreamInterface>& media_stream,
109 const OnPlayStateChanged& on_play_state_changed)
110 : delegate_(delegate), media_stream_(media_stream), started_(false),
111 on_play_state_changed_(on_play_state_changed) {
112 DCHECK(!on_play_state_changed_.is_null());
113 DCHECK(media_stream_.get());
117 virtual ~SharedAudioRenderer() {
118 DCHECK(thread_checker_.CalledOnValidThread());
119 DVLOG(1) << __FUNCTION__;
123 virtual void Start() OVERRIDE {
124 DCHECK(thread_checker_.CalledOnValidThread());
131 virtual void Play() OVERRIDE {
132 DCHECK(thread_checker_.CalledOnValidThread());
134 if (playing_state_.playing())
136 playing_state_.set_playing(true);
137 on_play_state_changed_.Run(media_stream_, &playing_state_);
140 virtual void Pause() OVERRIDE {
141 DCHECK(thread_checker_.CalledOnValidThread());
143 if (!playing_state_.playing())
145 playing_state_.set_playing(false);
146 on_play_state_changed_.Run(media_stream_, &playing_state_);
149 virtual void Stop() OVERRIDE {
150 DCHECK(thread_checker_.CalledOnValidThread());
158 virtual void SetVolume(float volume) OVERRIDE {
159 DCHECK(thread_checker_.CalledOnValidThread());
160 DCHECK(volume >= 0.0f && volume <= 1.0f);
161 playing_state_.set_volume(volume);
162 on_play_state_changed_.Run(media_stream_, &playing_state_);
165 virtual base::TimeDelta GetCurrentRenderTime() const OVERRIDE {
166 DCHECK(thread_checker_.CalledOnValidThread());
167 return delegate_->GetCurrentRenderTime();
170 virtual bool IsLocalRenderer() const OVERRIDE {
171 DCHECK(thread_checker_.CalledOnValidThread());
172 return delegate_->IsLocalRenderer();
176 base::ThreadChecker thread_checker_;
177 const scoped_refptr<MediaStreamAudioRenderer> delegate_;
178 const scoped_refptr<webrtc::MediaStreamInterface> media_stream_;
180 WebRtcAudioRenderer::PlayingState playing_state_;
181 OnPlayStateChanged on_play_state_changed_;
186 WebRtcAudioRenderer::WebRtcAudioRenderer(
187 const scoped_refptr<webrtc::MediaStreamInterface>& media_stream,
188 int source_render_view_id,
189 int source_render_frame_id,
192 int frames_per_buffer)
193 : state_(UNINITIALIZED),
194 source_render_view_id_(source_render_view_id),
195 source_render_frame_id_(source_render_frame_id),
196 session_id_(session_id),
197 media_stream_(media_stream),
201 audio_delay_milliseconds_(0),
202 fifo_delay_milliseconds_(0),
203 sample_rate_(sample_rate),
204 frames_per_buffer_(frames_per_buffer) {
205 WebRtcLogMessage(base::StringPrintf(
206 "WAR::WAR. source_render_view_id=%d"
207 ", session_id=%d, sample_rate=%d, frames_per_buffer=%d",
208 source_render_view_id,
214 WebRtcAudioRenderer::~WebRtcAudioRenderer() {
215 DCHECK(thread_checker_.CalledOnValidThread());
216 DCHECK_EQ(state_, UNINITIALIZED);
220 bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) {
221 DVLOG(1) << "WebRtcAudioRenderer::Initialize()";
222 DCHECK(thread_checker_.CalledOnValidThread());
223 base::AutoLock auto_lock(lock_);
224 DCHECK_EQ(state_, UNINITIALIZED);
226 DCHECK(!sink_.get());
229 // Use stereo output on all platforms.
230 media::ChannelLayout channel_layout = media::CHANNEL_LAYOUT_STEREO;
232 // TODO(tommi,henrika): Maybe we should just change |sample_rate_| to be
233 // immutable and change its value instead of using a temporary?
234 int sample_rate = sample_rate_;
235 DVLOG(1) << "Audio output hardware sample rate: " << sample_rate;
237 // WebRTC does not yet support higher rates than 96000 on the client side
238 // and 48000 is the preferred sample rate. Therefore, if 192000 is detected,
239 // we change the rate to 48000 instead. The consequence is that the native
240 // layer will be opened up at 192kHz but WebRTC will provide data at 48kHz
241 // which will then be resampled by the audio converted on the browser side
242 // to match the native audio layer.
243 if (sample_rate == 192000) {
244 DVLOG(1) << "Resampling from 48000 to 192000 is required";
247 media::AudioSampleRate asr = media::AsAudioSampleRate(sample_rate);
248 if (asr != media::kUnexpectedAudioSampleRate) {
249 UMA_HISTOGRAM_ENUMERATION(
250 "WebRTC.AudioOutputSampleRate", asr, media::kUnexpectedAudioSampleRate);
252 UMA_HISTOGRAM_COUNTS("WebRTC.AudioOutputSampleRateUnexpected", sample_rate);
255 // Verify that the reported output hardware sample rate is supported
256 // on the current platform.
257 if (std::find(&kValidOutputRates[0],
258 &kValidOutputRates[0] + arraysize(kValidOutputRates),
260 &kValidOutputRates[arraysize(kValidOutputRates)]) {
261 DLOG(ERROR) << sample_rate << " is not a supported output rate.";
265 // Set up audio parameters for the source, i.e., the WebRTC client.
267 // The WebRTC client only supports multiples of 10ms as buffer size where
268 // 10ms is preferred for lowest possible delay.
269 media::AudioParameters source_params;
270 int buffer_size = (sample_rate / 100);
271 DVLOG(1) << "Using WebRTC output buffer size: " << buffer_size;
273 int channels = ChannelLayoutToChannelCount(channel_layout);
274 source_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
275 channel_layout, channels, 0,
276 sample_rate, 16, buffer_size);
278 // Set up audio parameters for the sink, i.e., the native audio output stream.
279 // We strive to open up using native parameters to achieve best possible
280 // performance and to ensure that no FIFO is needed on the browser side to
281 // match the client request. Any mismatch between the source and the sink is
282 // taken care of in this class instead using a pull FIFO.
284 media::AudioParameters sink_params;
286 // Use native output siz as default.
287 buffer_size = frames_per_buffer_;
288 #if defined(OS_ANDROID)
289 // TODO(henrika): Keep tuning this scheme and espcicially for low-latency
290 // cases. Might not be possible to come up with the perfect solution using
291 // the render side only.
292 const int frames_per_10ms = (sample_rate / 100);
293 if (buffer_size < 2 * frames_per_10ms) {
294 // Examples of low-latency frame sizes and the resulting |buffer_size|:
295 // Nexus 7 : 240 audio frames => 2*480 = 960
296 // Nexus 10 : 256 => 2*441 = 882
297 // Galaxy Nexus: 144 => 2*441 = 882
298 buffer_size = 2 * frames_per_10ms;
299 DVLOG(1) << "Low-latency output detected on Android";
302 DVLOG(1) << "Using sink output buffer size: " << buffer_size;
304 sink_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
305 channel_layout, channels, 0, sample_rate, 16, buffer_size);
307 // Create a FIFO if re-buffering is required to match the source input with
308 // the sink request. The source acts as provider here and the sink as
310 fifo_delay_milliseconds_ = 0;
311 if (source_params.frames_per_buffer() != sink_params.frames_per_buffer()) {
312 DVLOG(1) << "Rebuffering from " << source_params.frames_per_buffer()
313 << " to " << sink_params.frames_per_buffer();
314 audio_fifo_.reset(new media::AudioPullFifo(
315 source_params.channels(),
316 source_params.frames_per_buffer(),
318 &WebRtcAudioRenderer::SourceCallback,
319 base::Unretained(this))));
321 if (sink_params.frames_per_buffer() > source_params.frames_per_buffer()) {
322 int frame_duration_milliseconds = base::Time::kMillisecondsPerSecond /
323 static_cast<double>(source_params.sample_rate());
324 fifo_delay_milliseconds_ = (sink_params.frames_per_buffer() -
325 source_params.frames_per_buffer()) * frame_duration_milliseconds;
329 // Allocate local audio buffers based on the parameters above.
330 // It is assumed that each audio sample contains 16 bits and each
331 // audio frame contains one or two audio samples depending on the
332 // number of channels.
334 new int16[source_params.frames_per_buffer() * source_params.channels()]);
337 source->SetRenderFormat(source_params);
339 // Configure the audio rendering client and start rendering.
340 sink_ = AudioDeviceFactory::NewOutputDevice(
341 source_render_view_id_, source_render_frame_id_);
343 // TODO(tommi): Rename InitializeUnifiedStream to rather reflect association
345 DCHECK_GE(session_id_, 0);
346 sink_->InitializeUnifiedStream(sink_params, this, session_id_);
350 // User must call Play() before any audio can be heard.
353 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputChannelLayout",
354 source_params.channel_layout(),
355 media::CHANNEL_LAYOUT_MAX);
356 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",
357 source_params.frames_per_buffer(),
358 kUnexpectedAudioBufferSize);
359 AddHistogramFramesPerBuffer(source_params.frames_per_buffer());
364 scoped_refptr<MediaStreamAudioRenderer>
365 WebRtcAudioRenderer::CreateSharedAudioRendererProxy(
366 const scoped_refptr<webrtc::MediaStreamInterface>& media_stream) {
367 content::SharedAudioRenderer::OnPlayStateChanged on_play_state_changed =
368 base::Bind(&WebRtcAudioRenderer::OnPlayStateChanged, this);
369 return new SharedAudioRenderer(this, media_stream, on_play_state_changed);
372 bool WebRtcAudioRenderer::IsStarted() const {
373 DCHECK(thread_checker_.CalledOnValidThread());
374 return start_ref_count_ != 0;
377 void WebRtcAudioRenderer::Start() {
378 DVLOG(1) << "WebRtcAudioRenderer::Start()";
379 DCHECK(thread_checker_.CalledOnValidThread());
383 void WebRtcAudioRenderer::Play() {
384 DVLOG(1) << "WebRtcAudioRenderer::Play()";
385 DCHECK(thread_checker_.CalledOnValidThread());
387 if (playing_state_.playing())
390 playing_state_.set_playing(true);
392 OnPlayStateChanged(media_stream_, &playing_state_);
395 void WebRtcAudioRenderer::EnterPlayState() {
396 DVLOG(1) << "WebRtcAudioRenderer::EnterPlayState()";
397 DCHECK(thread_checker_.CalledOnValidThread());
398 DCHECK_GT(start_ref_count_, 0) << "Did you forget to call Start()?";
399 base::AutoLock auto_lock(lock_);
400 if (state_ == UNINITIALIZED)
403 DCHECK(play_ref_count_ == 0 || state_ == PLAYING);
406 if (state_ != PLAYING) {
410 audio_delay_milliseconds_ = 0;
411 audio_fifo_->Clear();
416 void WebRtcAudioRenderer::Pause() {
417 DVLOG(1) << "WebRtcAudioRenderer::Pause()";
418 DCHECK(thread_checker_.CalledOnValidThread());
419 if (!playing_state_.playing())
422 playing_state_.set_playing(false);
424 OnPlayStateChanged(media_stream_, &playing_state_);
427 void WebRtcAudioRenderer::EnterPauseState() {
428 DVLOG(1) << "WebRtcAudioRenderer::EnterPauseState()";
429 DCHECK(thread_checker_.CalledOnValidThread());
430 DCHECK_GT(start_ref_count_, 0) << "Did you forget to call Start()?";
431 base::AutoLock auto_lock(lock_);
432 if (state_ == UNINITIALIZED)
435 DCHECK_EQ(state_, PLAYING);
436 DCHECK_GT(play_ref_count_, 0);
437 if (!--play_ref_count_)
441 void WebRtcAudioRenderer::Stop() {
442 DVLOG(1) << "WebRtcAudioRenderer::Stop()";
443 DCHECK(thread_checker_.CalledOnValidThread());
445 base::AutoLock auto_lock(lock_);
446 if (state_ == UNINITIALIZED)
449 if (--start_ref_count_)
452 DVLOG(1) << "Calling RemoveAudioRenderer and Stop().";
454 source_->RemoveAudioRenderer(this);
456 state_ = UNINITIALIZED;
459 // Make sure to stop the sink while _not_ holding the lock since the Render()
460 // callback may currently be executing and try to grab the lock while we're
461 // stopping the thread on which it runs.
465 void WebRtcAudioRenderer::SetVolume(float volume) {
466 DCHECK(thread_checker_.CalledOnValidThread());
467 DCHECK(volume >= 0.0f && volume <= 1.0f);
469 playing_state_.set_volume(volume);
470 OnPlayStateChanged(media_stream_, &playing_state_);
473 base::TimeDelta WebRtcAudioRenderer::GetCurrentRenderTime() const {
474 return base::TimeDelta();
477 bool WebRtcAudioRenderer::IsLocalRenderer() const {
481 int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus,
482 int audio_delay_milliseconds) {
483 base::AutoLock auto_lock(lock_);
487 DVLOG(2) << "WebRtcAudioRenderer::Render()";
488 DVLOG(2) << "audio_delay_milliseconds: " << audio_delay_milliseconds;
490 audio_delay_milliseconds_ = audio_delay_milliseconds;
493 audio_fifo_->Consume(audio_bus, audio_bus->frames());
495 SourceCallback(0, audio_bus);
497 return (state_ == PLAYING) ? audio_bus->frames() : 0;
500 void WebRtcAudioRenderer::OnRenderError() {
502 LOG(ERROR) << "OnRenderError()";
505 // Called by AudioPullFifo when more data is necessary.
506 void WebRtcAudioRenderer::SourceCallback(
507 int fifo_frame_delay, media::AudioBus* audio_bus) {
508 DVLOG(2) << "WebRtcAudioRenderer::SourceCallback("
509 << fifo_frame_delay << ", "
510 << audio_bus->frames() << ")";
512 int output_delay_milliseconds = audio_delay_milliseconds_;
513 output_delay_milliseconds += fifo_delay_milliseconds_;
514 DVLOG(2) << "output_delay_milliseconds: " << output_delay_milliseconds;
516 // We need to keep render data for the |source_| regardless of |state_|,
517 // otherwise the data will be buffered up inside |source_|.
518 source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()),
519 audio_bus->channels(), audio_bus->frames(),
520 output_delay_milliseconds);
522 // Avoid filling up the audio bus if we are not playing; instead
523 // return here and ensure that the returned value in Render() is 0.
524 if (state_ != PLAYING) {
529 // De-interleave each channel and convert to 32-bit floating-point
530 // with nominal range -1.0 -> +1.0 to match the callback format.
531 audio_bus->FromInterleaved(buffer_.get(),
536 void WebRtcAudioRenderer::UpdateSourceVolume(
537 webrtc::AudioSourceInterface* source) {
538 DCHECK(thread_checker_.CalledOnValidThread());
540 // Note: If there are no playing audio renderers, then the volume will be
544 SourcePlayingStates::iterator entry = source_playing_states_.find(source);
545 if (entry != source_playing_states_.end()) {
546 PlayingStates& states = entry->second;
547 for (PlayingStates::const_iterator it = states.begin();
548 it != states.end(); ++it) {
549 if ((*it)->playing())
550 volume += (*it)->volume();
554 // The valid range for volume scaling of a remote webrtc source is
555 // 0.0-10.0 where 1.0 is no attenuation/boost.
556 DCHECK(volume >= 0.0f);
560 DVLOG(1) << "Setting remote source volume: " << volume;
561 source->SetVolume(volume);
564 bool WebRtcAudioRenderer::AddPlayingState(
565 webrtc::AudioSourceInterface* source,
566 PlayingState* state) {
567 DCHECK(thread_checker_.CalledOnValidThread());
568 DCHECK(state->playing());
569 // Look up or add the |source| to the map.
570 PlayingStates& array = source_playing_states_[source];
571 if (std::find(array.begin(), array.end(), state) != array.end())
574 array.push_back(state);
579 bool WebRtcAudioRenderer::RemovePlayingState(
580 webrtc::AudioSourceInterface* source,
581 PlayingState* state) {
582 DCHECK(thread_checker_.CalledOnValidThread());
583 DCHECK(!state->playing());
584 SourcePlayingStates::iterator found = source_playing_states_.find(source);
585 if (found == source_playing_states_.end())
588 PlayingStates& array = found->second;
589 PlayingStates::iterator state_it =
590 std::find(array.begin(), array.end(), state);
591 if (state_it == array.end())
594 array.erase(state_it);
597 source_playing_states_.erase(found);
602 void WebRtcAudioRenderer::OnPlayStateChanged(
603 const scoped_refptr<webrtc::MediaStreamInterface>& media_stream,
604 PlayingState* state) {
605 webrtc::AudioTrackVector tracks(media_stream->GetAudioTracks());
606 for (webrtc::AudioTrackVector::iterator it = tracks.begin();
607 it != tracks.end(); ++it) {
608 webrtc::AudioSourceInterface* source = (*it)->GetSource();
610 if (!state->playing()) {
611 if (RemovePlayingState(source, state))
613 } else if (AddPlayingState(source, state)) {
616 UpdateSourceVolume(source);
620 } // namespace content