1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/renderer/media/webrtc_audio_renderer.h"
7 #include "base/logging.h"
8 #include "base/metrics/histogram.h"
9 #include "base/strings/string_util.h"
10 #include "content/renderer/media/audio_device_factory.h"
11 #include "content/renderer/media/webrtc_audio_device_impl.h"
12 #include "media/audio/audio_output_device.h"
13 #include "media/audio/audio_parameters.h"
14 #include "media/audio/sample_rates.h"
17 #include "base/win/windows_version.h"
18 #include "media/audio/win/core_audio_util_win.h"
25 // Supported hardware sample rates for output sides.
26 #if defined(OS_WIN) || defined(OS_MACOSX)
27 // AudioHardwareConfig::GetOutputSampleRate() asks the audio layer for its
28 // current sample rate (set by the user) on Windows and Mac OS X. The listed
29 // rates below adds restrictions and Initialize() will fail if the user selects
30 // any rate outside these ranges.
31 const int kValidOutputRates[] = {96000, 48000, 44100, 32000, 16000};
32 #elif defined(OS_LINUX) || defined(OS_OPENBSD)
33 const int kValidOutputRates[] = {48000, 44100};
34 #elif defined(OS_ANDROID)
35 // TODO(leozwang): We want to use native sampling rate on Android to achieve
36 // low latency, currently 16000 is used to work around audio problem on some
38 const int kValidOutputRates[] = {48000, 44100, 16000};
40 const int kValidOutputRates[] = {44100};
43 // TODO(xians): Merge the following code to WebRtcAudioCapturer, or remove.
44 enum AudioFramesPerBuffer {
54 kUnexpectedAudioBufferSize // Must always be last!
57 // Helper method to convert integral values to their respective enum values
58 // above, or kUnexpectedAudioBufferSize if no match exists.
59 // We map 441 to k440 to avoid changes in the XML part for histograms.
60 // It is still possible to map the histogram result to the actual buffer size.
61 // See http://crbug.com/243450 for details.
62 AudioFramesPerBuffer AsAudioFramesPerBuffer(int frames_per_buffer) {
63 switch (frames_per_buffer) {
64 case 160: return k160;
65 case 320: return k320;
66 case 441: return k440;
67 case 480: return k480;
68 case 640: return k640;
69 case 880: return k880;
70 case 960: return k960;
71 case 1440: return k1440;
72 case 1920: return k1920;
74 return kUnexpectedAudioBufferSize;
77 void AddHistogramFramesPerBuffer(int param) {
78 AudioFramesPerBuffer afpb = AsAudioFramesPerBuffer(param);
79 if (afpb != kUnexpectedAudioBufferSize) {
80 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",
81 afpb, kUnexpectedAudioBufferSize);
83 // Report unexpected sample rates using a unique histogram name.
84 UMA_HISTOGRAM_COUNTS("WebRTC.AudioOutputFramesPerBufferUnexpected", param);
88 // This is a simple wrapper class that's handed out to users of a shared
89 // WebRtcAudioRenderer instance. This class maintains the per-user 'playing'
90 // and 'started' states to avoid problems related to incorrect usage which
91 // might violate the implementation assumptions inside WebRtcAudioRenderer
92 // (see the play reference count).
93 class SharedAudioRenderer : public MediaStreamAudioRenderer {
95 SharedAudioRenderer(const scoped_refptr<MediaStreamAudioRenderer>& delegate)
96 : delegate_(delegate), started_(false), playing_(false) {
100 virtual ~SharedAudioRenderer() {
101 DCHECK(thread_checker_.CalledOnValidThread());
102 DVLOG(1) << __FUNCTION__;
106 virtual void Start() OVERRIDE {
107 DCHECK(thread_checker_.CalledOnValidThread());
114 virtual void Play() OVERRIDE {
115 DCHECK(thread_checker_.CalledOnValidThread());
123 virtual void Pause() OVERRIDE {
124 DCHECK(thread_checker_.CalledOnValidThread());
132 virtual void Stop() OVERRIDE {
133 DCHECK(thread_checker_.CalledOnValidThread());
141 virtual void SetVolume(float volume) OVERRIDE {
142 DCHECK(thread_checker_.CalledOnValidThread());
143 return delegate_->SetVolume(volume);
146 virtual base::TimeDelta GetCurrentRenderTime() const OVERRIDE {
147 DCHECK(thread_checker_.CalledOnValidThread());
148 return delegate_->GetCurrentRenderTime();
151 virtual bool IsLocalRenderer() const OVERRIDE {
152 DCHECK(thread_checker_.CalledOnValidThread());
153 return delegate_->IsLocalRenderer();
157 base::ThreadChecker thread_checker_;
158 scoped_refptr<MediaStreamAudioRenderer> delegate_;
165 WebRtcAudioRenderer::WebRtcAudioRenderer(int source_render_view_id,
168 int frames_per_buffer)
169 : state_(UNINITIALIZED),
170 source_render_view_id_(source_render_view_id),
171 session_id_(session_id),
175 audio_delay_milliseconds_(0),
176 fifo_delay_milliseconds_(0),
177 sample_rate_(sample_rate),
178 frames_per_buffer_(frames_per_buffer) {
181 WebRtcAudioRenderer::~WebRtcAudioRenderer() {
182 DCHECK(thread_checker_.CalledOnValidThread());
183 DCHECK_EQ(state_, UNINITIALIZED);
187 bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) {
188 DVLOG(1) << "WebRtcAudioRenderer::Initialize()";
189 DCHECK(thread_checker_.CalledOnValidThread());
190 base::AutoLock auto_lock(lock_);
191 DCHECK_EQ(state_, UNINITIALIZED);
193 DCHECK(!sink_.get());
196 // Use stereo output on all platforms.
197 media::ChannelLayout channel_layout = media::CHANNEL_LAYOUT_STEREO;
199 // TODO(tommi,henrika): Maybe we should just change |sample_rate_| to be
200 // immutable and change its value instead of using a temporary?
201 int sample_rate = sample_rate_;
202 DVLOG(1) << "Audio output hardware sample rate: " << sample_rate;
204 // WebRTC does not yet support higher rates than 96000 on the client side
205 // and 48000 is the preferred sample rate. Therefore, if 192000 is detected,
206 // we change the rate to 48000 instead. The consequence is that the native
207 // layer will be opened up at 192kHz but WebRTC will provide data at 48kHz
208 // which will then be resampled by the audio converted on the browser side
209 // to match the native audio layer.
210 if (sample_rate == 192000) {
211 DVLOG(1) << "Resampling from 48000 to 192000 is required";
214 media::AudioSampleRate asr = media::AsAudioSampleRate(sample_rate);
215 if (asr != media::kUnexpectedAudioSampleRate) {
216 UMA_HISTOGRAM_ENUMERATION(
217 "WebRTC.AudioOutputSampleRate", asr, media::kUnexpectedAudioSampleRate);
219 UMA_HISTOGRAM_COUNTS("WebRTC.AudioOutputSampleRateUnexpected", sample_rate);
222 // Verify that the reported output hardware sample rate is supported
223 // on the current platform.
224 if (std::find(&kValidOutputRates[0],
225 &kValidOutputRates[0] + arraysize(kValidOutputRates),
227 &kValidOutputRates[arraysize(kValidOutputRates)]) {
228 DLOG(ERROR) << sample_rate << " is not a supported output rate.";
232 // Set up audio parameters for the source, i.e., the WebRTC client.
234 // The WebRTC client only supports multiples of 10ms as buffer size where
235 // 10ms is preferred for lowest possible delay.
236 media::AudioParameters source_params;
237 int buffer_size = (sample_rate / 100);
238 DVLOG(1) << "Using WebRTC output buffer size: " << buffer_size;
240 int channels = ChannelLayoutToChannelCount(channel_layout);
241 source_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
242 channel_layout, channels, 0,
243 sample_rate, 16, buffer_size);
245 // Set up audio parameters for the sink, i.e., the native audio output stream.
246 // We strive to open up using native parameters to achieve best possible
247 // performance and to ensure that no FIFO is needed on the browser side to
248 // match the client request. Any mismatch between the source and the sink is
249 // taken care of in this class instead using a pull FIFO.
251 media::AudioParameters sink_params;
253 // Use native output siz as default.
254 buffer_size = frames_per_buffer_;
255 #if defined(OS_ANDROID)
256 // TODO(henrika): Keep tuning this scheme and espcicially for low-latency
257 // cases. Might not be possible to come up with the perfect solution using
258 // the render side only.
259 const int frames_per_10ms = (sample_rate / 100);
260 if (buffer_size < 2 * frames_per_10ms) {
261 // Examples of low-latency frame sizes and the resulting |buffer_size|:
262 // Nexus 7 : 240 audio frames => 2*480 = 960
263 // Nexus 10 : 256 => 2*441 = 882
264 // Galaxy Nexus: 144 => 2*441 = 882
265 buffer_size = 2 * frames_per_10ms;
266 DVLOG(1) << "Low-latency output detected on Android";
269 DVLOG(1) << "Using sink output buffer size: " << buffer_size;
271 sink_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
272 channel_layout, channels, 0, sample_rate, 16, buffer_size);
274 // Create a FIFO if re-buffering is required to match the source input with
275 // the sink request. The source acts as provider here and the sink as
277 fifo_delay_milliseconds_ = 0;
278 if (source_params.frames_per_buffer() != sink_params.frames_per_buffer()) {
279 DVLOG(1) << "Rebuffering from " << source_params.frames_per_buffer()
280 << " to " << sink_params.frames_per_buffer();
281 audio_fifo_.reset(new media::AudioPullFifo(
282 source_params.channels(),
283 source_params.frames_per_buffer(),
285 &WebRtcAudioRenderer::SourceCallback,
286 base::Unretained(this))));
288 if (sink_params.frames_per_buffer() > source_params.frames_per_buffer()) {
289 int frame_duration_milliseconds = base::Time::kMillisecondsPerSecond /
290 static_cast<double>(source_params.sample_rate());
291 fifo_delay_milliseconds_ = (sink_params.frames_per_buffer() -
292 source_params.frames_per_buffer()) * frame_duration_milliseconds;
296 // Allocate local audio buffers based on the parameters above.
297 // It is assumed that each audio sample contains 16 bits and each
298 // audio frame contains one or two audio samples depending on the
299 // number of channels.
301 new int16[source_params.frames_per_buffer() * source_params.channels()]);
304 source->SetRenderFormat(source_params);
306 // Configure the audio rendering client and start rendering.
307 sink_ = AudioDeviceFactory::NewOutputDevice(source_render_view_id_);
309 // TODO(tommi): Rename InitializeUnifiedStream to rather reflect association
311 DCHECK_GE(session_id_, 0);
312 sink_->InitializeUnifiedStream(sink_params, this, session_id_);
316 // User must call Play() before any audio can be heard.
319 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputChannelLayout",
320 source_params.channel_layout(),
321 media::CHANNEL_LAYOUT_MAX);
322 UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",
323 source_params.frames_per_buffer(),
324 kUnexpectedAudioBufferSize);
325 AddHistogramFramesPerBuffer(source_params.frames_per_buffer());
330 scoped_refptr<MediaStreamAudioRenderer>
331 WebRtcAudioRenderer::CreateSharedAudioRendererProxy() {
332 return new SharedAudioRenderer(this);
335 bool WebRtcAudioRenderer::IsStarted() const {
336 DCHECK(thread_checker_.CalledOnValidThread());
337 return start_ref_count_ != 0;
340 void WebRtcAudioRenderer::Start() {
341 DVLOG(1) << "WebRtcAudioRenderer::Start()";
342 DCHECK(thread_checker_.CalledOnValidThread());
346 void WebRtcAudioRenderer::Play() {
347 DVLOG(1) << "WebRtcAudioRenderer::Play()";
348 DCHECK(thread_checker_.CalledOnValidThread());
349 DCHECK_GT(start_ref_count_, 0) << "Did you forget to call Start()?";
350 base::AutoLock auto_lock(lock_);
351 if (state_ == UNINITIALIZED)
354 DCHECK(play_ref_count_ == 0 || state_ == PLAYING);
357 if (state_ != PLAYING) {
361 audio_delay_milliseconds_ = 0;
362 audio_fifo_->Clear();
367 void WebRtcAudioRenderer::Pause() {
368 DVLOG(1) << "WebRtcAudioRenderer::Pause()";
369 DCHECK(thread_checker_.CalledOnValidThread());
370 DCHECK_GT(start_ref_count_, 0) << "Did you forget to call Start()?";
371 base::AutoLock auto_lock(lock_);
372 if (state_ == UNINITIALIZED)
375 DCHECK_EQ(state_, PLAYING);
376 DCHECK_GT(play_ref_count_, 0);
377 if (!--play_ref_count_)
381 void WebRtcAudioRenderer::Stop() {
382 DVLOG(1) << "WebRtcAudioRenderer::Stop()";
383 DCHECK(thread_checker_.CalledOnValidThread());
385 base::AutoLock auto_lock(lock_);
386 if (state_ == UNINITIALIZED)
389 if (--start_ref_count_)
392 DVLOG(1) << "Calling RemoveAudioRenderer and Stop().";
394 source_->RemoveAudioRenderer(this);
396 state_ = UNINITIALIZED;
399 // Make sure to stop the sink while _not_ holding the lock since the Render()
400 // callback may currently be executing and try to grab the lock while we're
401 // stopping the thread on which it runs.
405 void WebRtcAudioRenderer::SetVolume(float volume) {
406 DCHECK(thread_checker_.CalledOnValidThread());
407 base::AutoLock auto_lock(lock_);
408 if (state_ == UNINITIALIZED)
411 sink_->SetVolume(volume);
414 base::TimeDelta WebRtcAudioRenderer::GetCurrentRenderTime() const {
415 return base::TimeDelta();
418 bool WebRtcAudioRenderer::IsLocalRenderer() const {
422 int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus,
423 int audio_delay_milliseconds) {
424 base::AutoLock auto_lock(lock_);
428 DVLOG(2) << "WebRtcAudioRenderer::Render()";
429 DVLOG(2) << "audio_delay_milliseconds: " << audio_delay_milliseconds;
431 audio_delay_milliseconds_ = audio_delay_milliseconds;
434 audio_fifo_->Consume(audio_bus, audio_bus->frames());
436 SourceCallback(0, audio_bus);
438 return (state_ == PLAYING) ? audio_bus->frames() : 0;
441 void WebRtcAudioRenderer::OnRenderError() {
443 LOG(ERROR) << "OnRenderError()";
446 // Called by AudioPullFifo when more data is necessary.
447 void WebRtcAudioRenderer::SourceCallback(
448 int fifo_frame_delay, media::AudioBus* audio_bus) {
449 DVLOG(2) << "WebRtcAudioRenderer::SourceCallback("
450 << fifo_frame_delay << ", "
451 << audio_bus->frames() << ")";
453 int output_delay_milliseconds = audio_delay_milliseconds_;
454 output_delay_milliseconds += fifo_delay_milliseconds_;
455 DVLOG(2) << "output_delay_milliseconds: " << output_delay_milliseconds;
457 // We need to keep render data for the |source_| regardless of |state_|,
458 // otherwise the data will be buffered up inside |source_|.
459 source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()),
460 audio_bus->channels(), audio_bus->frames(),
461 output_delay_milliseconds);
463 // Avoid filling up the audio bus if we are not playing; instead
464 // return here and ensure that the returned value in Render() is 0.
465 if (state_ != PLAYING) {
470 // De-interleave each channel and convert to 32-bit floating-point
471 // with nominal range -1.0 -> +1.0 to match the callback format.
472 audio_bus->FromInterleaved(buffer_.get(),
477 } // namespace content