src/content/renderer/media/webrtc_audio_renderer.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/renderer/media/webrtc_audio_renderer.h"
   6
   7 #include "base/logging.h"
   8 #include "base/metrics/histogram.h"
   9 #include "base/strings/string_util.h"
  10 #include "content/renderer/media/audio_device_factory.h"
  11 #include "content/renderer/media/webrtc_audio_device_impl.h"
  12 #include "media/audio/audio_output_device.h"
  13 #include "media/audio/audio_parameters.h"
  14 #include "media/audio/sample_rates.h"
  15
  16 #if defined(OS_WIN)
  17 #include "base/win/windows_version.h"
  18 #include "media/audio/win/core_audio_util_win.h"
  19 #endif
  20
  21 namespace content {
  22
  23 namespace {
  24
  25 // Supported hardware sample rates for output sides.
  26 #if defined(OS_WIN) || defined(OS_MACOSX)
  27 // AudioHardwareConfig::GetOutputSampleRate() asks the audio layer for its
  28 // current sample rate (set by the user) on Windows and Mac OS X.  The listed
  29 // rates below adds restrictions and Initialize() will fail if the user selects
  30 // any rate outside these ranges.
  31 const int kValidOutputRates[] = {96000, 48000, 44100, 32000, 16000};
  32 #elif defined(OS_LINUX) || defined(OS_OPENBSD)
  33 const int kValidOutputRates[] = {48000, 44100};
  34 #elif defined(OS_ANDROID)
  35 // TODO(leozwang): We want to use native sampling rate on Android to achieve
  36 // low latency, currently 16000 is used to work around audio problem on some
  37 // Android devices.
  38 const int kValidOutputRates[] = {48000, 44100, 16000};
  39 #else
  40 const int kValidOutputRates[] = {44100};
  41 #endif
  42
  43 // TODO(xians): Merge the following code to WebRtcAudioCapturer, or remove.
  44 enum AudioFramesPerBuffer {
  45   k160,
  46   k320,
  47   k440,
  48   k480,
  49   k640,
  50   k880,
  51   k960,
  52   k1440,
  53   k1920,
  54   kUnexpectedAudioBufferSize  // Must always be last!
  55 };
  56
  57 // Helper method to convert integral values to their respective enum values
  58 // above, or kUnexpectedAudioBufferSize if no match exists.
  59 // We map 441 to k440 to avoid changes in the XML part for histograms.
  60 // It is still possible to map the histogram result to the actual buffer size.
  61 // See http://crbug.com/243450 for details.
  62 AudioFramesPerBuffer AsAudioFramesPerBuffer(int frames_per_buffer) {
  63   switch (frames_per_buffer) {
  64     case 160: return k160;
  65     case 320: return k320;
  66     case 441: return k440;
  67     case 480: return k480;
  68     case 640: return k640;
  69     case 880: return k880;
  70     case 960: return k960;
  71     case 1440: return k1440;
  72     case 1920: return k1920;
  73   }
  74   return kUnexpectedAudioBufferSize;
  75 }
  76
  77 void AddHistogramFramesPerBuffer(int param) {
  78   AudioFramesPerBuffer afpb = AsAudioFramesPerBuffer(param);
  79   if (afpb != kUnexpectedAudioBufferSize) {
  80     UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",
  81                               afpb, kUnexpectedAudioBufferSize);
  82   } else {
  83     // Report unexpected sample rates using a unique histogram name.
  84     UMA_HISTOGRAM_COUNTS("WebRTC.AudioOutputFramesPerBufferUnexpected", param);
  85   }
  86 }
  87
  88 // This is a simple wrapper class that's handed out to users of a shared
  89 // WebRtcAudioRenderer instance.  This class maintains the per-user 'playing'
  90 // and 'started' states to avoid problems related to incorrect usage which
  91 // might violate the implementation assumptions inside WebRtcAudioRenderer
  92 // (see the play reference count).
  93 class SharedAudioRenderer : public MediaStreamAudioRenderer {
  94  public:
  95   SharedAudioRenderer(const scoped_refptr<MediaStreamAudioRenderer>& delegate)
  96       : delegate_(delegate), started_(false), playing_(false) {
  97   }
  98
  99  protected:
 100   virtual ~SharedAudioRenderer() {
 101     DCHECK(thread_checker_.CalledOnValidThread());
 102     DVLOG(1) << __FUNCTION__;
 103     Stop();
 104   }
 105
 106   virtual void Start() OVERRIDE {
 107     DCHECK(thread_checker_.CalledOnValidThread());
 108     if (started_)
 109       return;
 110     started_ = true;
 111     delegate_->Start();
 112   }
 113
 114   virtual void Play() OVERRIDE {
 115     DCHECK(thread_checker_.CalledOnValidThread());
 116     DCHECK(started_);
 117     if (playing_)
 118       return;
 119     playing_ = true;
 120     delegate_->Play();
 121   }
 122
 123   virtual void Pause() OVERRIDE {
 124     DCHECK(thread_checker_.CalledOnValidThread());
 125     DCHECK(started_);
 126     if (!playing_)
 127       return;
 128     playing_ = false;
 129     delegate_->Pause();
 130   }
 131
 132   virtual void Stop() OVERRIDE {
 133     DCHECK(thread_checker_.CalledOnValidThread());
 134     if (!started_)
 135       return;
 136     Pause();
 137     started_ = false;
 138     delegate_->Stop();
 139   }
 140
 141   virtual void SetVolume(float volume) OVERRIDE {
 142     DCHECK(thread_checker_.CalledOnValidThread());
 143     return delegate_->SetVolume(volume);
 144   }
 145
 146   virtual base::TimeDelta GetCurrentRenderTime() const OVERRIDE {
 147     DCHECK(thread_checker_.CalledOnValidThread());
 148     return delegate_->GetCurrentRenderTime();
 149   }
 150
 151   virtual bool IsLocalRenderer() const OVERRIDE {
 152     DCHECK(thread_checker_.CalledOnValidThread());
 153     return delegate_->IsLocalRenderer();
 154   }
 155
 156  private:
 157   base::ThreadChecker thread_checker_;
 158   scoped_refptr<MediaStreamAudioRenderer> delegate_;
 159   bool started_;
 160   bool playing_;
 161 };
 162
 163 }  // namespace
 164
 165 WebRtcAudioRenderer::WebRtcAudioRenderer(int source_render_view_id,
 166                                          int session_id,
 167                                          int sample_rate,
 168                                          int frames_per_buffer)
 169     : state_(UNINITIALIZED),
 170       source_render_view_id_(source_render_view_id),
 171       session_id_(session_id),
 172       source_(NULL),
 173       play_ref_count_(0),
 174       start_ref_count_(0),
 175       audio_delay_milliseconds_(0),
 176       fifo_delay_milliseconds_(0),
 177       sample_rate_(sample_rate),
 178       frames_per_buffer_(frames_per_buffer) {
 179 }
 180
 181 WebRtcAudioRenderer::~WebRtcAudioRenderer() {
 182   DCHECK(thread_checker_.CalledOnValidThread());
 183   DCHECK_EQ(state_, UNINITIALIZED);
 184   buffer_.reset();
 185 }
 186
 187 bool WebRtcAudioRenderer::Initialize(WebRtcAudioRendererSource* source) {
 188   DVLOG(1) << "WebRtcAudioRenderer::Initialize()";
 189   DCHECK(thread_checker_.CalledOnValidThread());
 190   base::AutoLock auto_lock(lock_);
 191   DCHECK_EQ(state_, UNINITIALIZED);
 192   DCHECK(source);
 193   DCHECK(!sink_.get());
 194   DCHECK(!source_);
 195
 196   // Use stereo output on all platforms.
 197   media::ChannelLayout channel_layout = media::CHANNEL_LAYOUT_STEREO;
 198
 199   // TODO(tommi,henrika): Maybe we should just change |sample_rate_| to be
 200   // immutable and change its value instead of using a temporary?
 201   int sample_rate = sample_rate_;
 202   DVLOG(1) << "Audio output hardware sample rate: " << sample_rate;
 203
 204   // WebRTC does not yet support higher rates than 96000 on the client side
 205   // and 48000 is the preferred sample rate. Therefore, if 192000 is detected,
 206   // we change the rate to 48000 instead. The consequence is that the native
 207   // layer will be opened up at 192kHz but WebRTC will provide data at 48kHz
 208   // which will then be resampled by the audio converted on the browser side
 209   // to match the native audio layer.
 210   if (sample_rate == 192000) {
 211     DVLOG(1) << "Resampling from 48000 to 192000 is required";
 212     sample_rate = 48000;
 213   }
 214   media::AudioSampleRate asr = media::AsAudioSampleRate(sample_rate);
 215   if (asr != media::kUnexpectedAudioSampleRate) {
 216     UMA_HISTOGRAM_ENUMERATION(
 217         "WebRTC.AudioOutputSampleRate", asr, media::kUnexpectedAudioSampleRate);
 218   } else {
 219     UMA_HISTOGRAM_COUNTS("WebRTC.AudioOutputSampleRateUnexpected", sample_rate);
 220   }
 221
 222   // Verify that the reported output hardware sample rate is supported
 223   // on the current platform.
 224   if (std::find(&kValidOutputRates[0],
 225                 &kValidOutputRates[0] + arraysize(kValidOutputRates),
 226                 sample_rate) ==
 227                     &kValidOutputRates[arraysize(kValidOutputRates)]) {
 228     DLOG(ERROR) << sample_rate << " is not a supported output rate.";
 229     return false;
 230   }
 231
 232   // Set up audio parameters for the source, i.e., the WebRTC client.
 233
 234   // The WebRTC client only supports multiples of 10ms as buffer size where
 235   // 10ms is preferred for lowest possible delay.
 236   media::AudioParameters source_params;
 237   int buffer_size = (sample_rate / 100);
 238   DVLOG(1) << "Using WebRTC output buffer size: " << buffer_size;
 239
 240   int channels = ChannelLayoutToChannelCount(channel_layout);
 241   source_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
 242                       channel_layout, channels, 0,
 243                       sample_rate, 16, buffer_size);
 244
 245   // Set up audio parameters for the sink, i.e., the native audio output stream.
 246   // We strive to open up using native parameters to achieve best possible
 247   // performance and to ensure that no FIFO is needed on the browser side to
 248   // match the client request. Any mismatch between the source and the sink is
 249   // taken care of in this class instead using a pull FIFO.
 250
 251   media::AudioParameters sink_params;
 252
 253   // Use native output siz as default.
 254   buffer_size = frames_per_buffer_;
 255 #if defined(OS_ANDROID)
 256   // TODO(henrika): Keep tuning this scheme and espcicially for low-latency
 257   // cases. Might not be possible to come up with the perfect solution using
 258   // the render side only.
 259   const int frames_per_10ms = (sample_rate / 100);
 260   if (buffer_size < 2 * frames_per_10ms) {
 261     // Examples of low-latency frame sizes and the resulting |buffer_size|:
 262     //  Nexus 7     : 240 audio frames => 2*480 = 960
 263     //  Nexus 10    : 256              => 2*441 = 882
 264     //  Galaxy Nexus: 144              => 2*441 = 882
 265     buffer_size = 2 * frames_per_10ms;
 266     DVLOG(1) << "Low-latency output detected on Android";
 267   }
 268 #endif
 269   DVLOG(1) << "Using sink output buffer size: " << buffer_size;
 270
 271   sink_params.Reset(media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
 272                     channel_layout, channels, 0, sample_rate, 16, buffer_size);
 273
 274   // Create a FIFO if re-buffering is required to match the source input with
 275   // the sink request. The source acts as provider here and the sink as
 276   // consumer.
 277   fifo_delay_milliseconds_ = 0;
 278   if (source_params.frames_per_buffer() != sink_params.frames_per_buffer()) {
 279     DVLOG(1) << "Rebuffering from " << source_params.frames_per_buffer()
 280              << " to " << sink_params.frames_per_buffer();
 281     audio_fifo_.reset(new media::AudioPullFifo(
 282         source_params.channels(),
 283         source_params.frames_per_buffer(),
 284         base::Bind(
 285             &WebRtcAudioRenderer::SourceCallback,
 286             base::Unretained(this))));
 287
 288     if (sink_params.frames_per_buffer() > source_params.frames_per_buffer()) {
 289       int frame_duration_milliseconds = base::Time::kMillisecondsPerSecond /
 290           static_cast<double>(source_params.sample_rate());
 291       fifo_delay_milliseconds_ = (sink_params.frames_per_buffer() -
 292         source_params.frames_per_buffer()) * frame_duration_milliseconds;
 293     }
 294   }
 295
 296   // Allocate local audio buffers based on the parameters above.
 297   // It is assumed that each audio sample contains 16 bits and each
 298   // audio frame contains one or two audio samples depending on the
 299   // number of channels.
 300   buffer_.reset(
 301       new int16[source_params.frames_per_buffer() * source_params.channels()]);
 302
 303   source_ = source;
 304   source->SetRenderFormat(source_params);
 305
 306   // Configure the audio rendering client and start rendering.
 307   sink_ = AudioDeviceFactory::NewOutputDevice(source_render_view_id_);
 308
 309   // TODO(tommi): Rename InitializeUnifiedStream to rather reflect association
 310   // with a session.
 311   DCHECK_GE(session_id_, 0);
 312   sink_->InitializeUnifiedStream(sink_params, this, session_id_);
 313
 314   sink_->Start();
 315
 316   // User must call Play() before any audio can be heard.
 317   state_ = PAUSED;
 318
 319   UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputChannelLayout",
 320                             source_params.channel_layout(),
 321                             media::CHANNEL_LAYOUT_MAX);
 322   UMA_HISTOGRAM_ENUMERATION("WebRTC.AudioOutputFramesPerBuffer",
 323                             source_params.frames_per_buffer(),
 324                             kUnexpectedAudioBufferSize);
 325   AddHistogramFramesPerBuffer(source_params.frames_per_buffer());
 326
 327   return true;
 328 }
 329
 330 scoped_refptr<MediaStreamAudioRenderer>
 331 WebRtcAudioRenderer::CreateSharedAudioRendererProxy() {
 332   return new SharedAudioRenderer(this);
 333 }
 334
 335 bool WebRtcAudioRenderer::IsStarted() const {
 336   DCHECK(thread_checker_.CalledOnValidThread());
 337   return start_ref_count_ != 0;
 338 }
 339
 340 void WebRtcAudioRenderer::Start() {
 341   DVLOG(1) << "WebRtcAudioRenderer::Start()";
 342   DCHECK(thread_checker_.CalledOnValidThread());
 343   ++start_ref_count_;
 344 }
 345
 346 void WebRtcAudioRenderer::Play() {
 347   DVLOG(1) << "WebRtcAudioRenderer::Play()";
 348   DCHECK(thread_checker_.CalledOnValidThread());
 349   DCHECK_GT(start_ref_count_, 0) << "Did you forget to call Start()?";
 350   base::AutoLock auto_lock(lock_);
 351   if (state_ == UNINITIALIZED)
 352     return;
 353
 354   DCHECK(play_ref_count_ == 0 || state_ == PLAYING);
 355   ++play_ref_count_;
 356
 357   if (state_ != PLAYING) {
 358     state_ = PLAYING;
 359
 360     if (audio_fifo_) {
 361       audio_delay_milliseconds_ = 0;
 362       audio_fifo_->Clear();
 363     }
 364   }
 365 }
 366
 367 void WebRtcAudioRenderer::Pause() {
 368   DVLOG(1) << "WebRtcAudioRenderer::Pause()";
 369   DCHECK(thread_checker_.CalledOnValidThread());
 370   DCHECK_GT(start_ref_count_, 0) << "Did you forget to call Start()?";
 371   base::AutoLock auto_lock(lock_);
 372   if (state_ == UNINITIALIZED)
 373     return;
 374
 375   DCHECK_EQ(state_, PLAYING);
 376   DCHECK_GT(play_ref_count_, 0);
 377   if (!--play_ref_count_)
 378     state_ = PAUSED;
 379 }
 380
 381 void WebRtcAudioRenderer::Stop() {
 382   DVLOG(1) << "WebRtcAudioRenderer::Stop()";
 383   DCHECK(thread_checker_.CalledOnValidThread());
 384   {
 385     base::AutoLock auto_lock(lock_);
 386     if (state_ == UNINITIALIZED)
 387       return;
 388
 389     if (--start_ref_count_)
 390       return;
 391
 392     DVLOG(1) << "Calling RemoveAudioRenderer and Stop().";
 393
 394     source_->RemoveAudioRenderer(this);
 395     source_ = NULL;
 396     state_ = UNINITIALIZED;
 397   }
 398
 399   // Make sure to stop the sink while _not_ holding the lock since the Render()
 400   // callback may currently be executing and try to grab the lock while we're
 401   // stopping the thread on which it runs.
 402   sink_->Stop();
 403 }
 404
 405 void WebRtcAudioRenderer::SetVolume(float volume) {
 406   DCHECK(thread_checker_.CalledOnValidThread());
 407   base::AutoLock auto_lock(lock_);
 408   if (state_ == UNINITIALIZED)
 409     return;
 410
 411   sink_->SetVolume(volume);
 412 }
 413
 414 base::TimeDelta WebRtcAudioRenderer::GetCurrentRenderTime() const {
 415   return base::TimeDelta();
 416 }
 417
 418 bool WebRtcAudioRenderer::IsLocalRenderer() const {
 419   return false;
 420 }
 421
 422 int WebRtcAudioRenderer::Render(media::AudioBus* audio_bus,
 423                                 int audio_delay_milliseconds) {
 424   base::AutoLock auto_lock(lock_);
 425   if (!source_)
 426     return 0;
 427
 428   DVLOG(2) << "WebRtcAudioRenderer::Render()";
 429   DVLOG(2) << "audio_delay_milliseconds: " << audio_delay_milliseconds;
 430
 431   audio_delay_milliseconds_ = audio_delay_milliseconds;
 432
 433   if (audio_fifo_)
 434     audio_fifo_->Consume(audio_bus, audio_bus->frames());
 435   else
 436     SourceCallback(0, audio_bus);
 437
 438   return (state_ == PLAYING) ? audio_bus->frames() : 0;
 439 }
 440
 441 void WebRtcAudioRenderer::OnRenderError() {
 442   NOTIMPLEMENTED();
 443   LOG(ERROR) << "OnRenderError()";
 444 }
 445
 446 // Called by AudioPullFifo when more data is necessary.
 447 void WebRtcAudioRenderer::SourceCallback(
 448     int fifo_frame_delay, media::AudioBus* audio_bus) {
 449   DVLOG(2) << "WebRtcAudioRenderer::SourceCallback("
 450            << fifo_frame_delay << ", "
 451            << audio_bus->frames() << ")";
 452
 453   int output_delay_milliseconds = audio_delay_milliseconds_;
 454   output_delay_milliseconds += fifo_delay_milliseconds_;
 455   DVLOG(2) << "output_delay_milliseconds: " << output_delay_milliseconds;
 456
 457   // We need to keep render data for the |source_| regardless of |state_|,
 458   // otherwise the data will be buffered up inside |source_|.
 459   source_->RenderData(reinterpret_cast<uint8*>(buffer_.get()),
 460                       audio_bus->channels(), audio_bus->frames(),
 461                       output_delay_milliseconds);
 462
 463   // Avoid filling up the audio bus if we are not playing; instead
 464   // return here and ensure that the returned value in Render() is 0.
 465   if (state_ != PLAYING) {
 466     audio_bus->Zero();
 467     return;
 468   }
 469
 470   // De-interleave each channel and convert to 32-bit floating-point
 471   // with nominal range -1.0 -> +1.0 to match the callback format.
 472   audio_bus->FromInterleaved(buffer_.get(),
 473                              audio_bus->frames(),
 474                              sizeof(buffer_[0]));
 475 }
 476
 477 }  // namespace content