src/content/renderer/media/webrtc_audio_device_impl.h

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #ifndef CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_
   6 #define CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_
   7
   8 #include <string>
   9 #include <vector>
  10
  11 #include "base/basictypes.h"
  12 #include "base/compiler_specific.h"
  13 #include "base/logging.h"
  14 #include "base/memory/ref_counted.h"
  15 #include "base/memory/scoped_ptr.h"
  16 #include "base/threading/thread_checker.h"
  17 #include "content/common/content_export.h"
  18 #include "content/renderer/media/webrtc_audio_capturer.h"
  19 #include "content/renderer/media/webrtc_audio_device_not_impl.h"
  20 #include "content/renderer/media/webrtc_audio_renderer.h"
  21 #include "media/base/audio_capturer_source.h"
  22 #include "media/base/audio_renderer_sink.h"
  23
  24 // A WebRtcAudioDeviceImpl instance implements the abstract interface
  25 // webrtc::AudioDeviceModule which makes it possible for a user (e.g. webrtc::
  26 // VoiceEngine) to register this class as an external AudioDeviceModule (ADM).
  27 // Then WebRtcAudioDeviceImpl::SetSessionId() needs to be called to set the
  28 // session id that tells which device to use. The user can then call
  29 // WebRtcAudioDeviceImpl::StartPlayout() and
  30 // WebRtcAudioDeviceImpl::StartRecording() from the render process to initiate
  31 // and start audio rendering and capturing in the browser process. IPC is
  32 // utilized to set up the media streams.
  33 //
  34 // Usage example:
  35 //
  36 //   using namespace webrtc;
  37 //
  38 //   {
  39 //      scoped_refptr<WebRtcAudioDeviceImpl> external_adm;
  40 //      external_adm = new WebRtcAudioDeviceImpl();
  41 //      external_adm->SetSessionId(session_id);
  42 //      VoiceEngine* voe = VoiceEngine::Create();
  43 //      VoEBase* base = VoEBase::GetInterface(voe);
  44 //      base->Init(external_adm);
  45 //      int ch = base->CreateChannel();
  46 //      ...
  47 //      base->StartReceive(ch)
  48 //      base->StartPlayout(ch);
  49 //      base->StartSending(ch);
  50 //      ...
  51 //      <== full-duplex audio session with AGC enabled ==>
  52 //      ...
  53 //      base->DeleteChannel(ch);
  54 //      base->Terminate();
  55 //      base->Release();
  56 //      VoiceEngine::Delete(voe);
  57 //   }
  58 //
  59 // webrtc::VoiceEngine::Init() calls these ADM methods (in this order):
  60 //
  61 //  RegisterAudioCallback(this)
  62 //    webrtc::VoiceEngine is an webrtc::AudioTransport implementation and
  63 //    implements the RecordedDataIsAvailable() and NeedMorePlayData() callbacks.
  64 //
  65 //  Init()
  66 //    Creates and initializes the AudioOutputDevice and AudioInputDevice
  67 //    objects.
  68 //
  69 //  SetAGC(true)
  70 //    Enables the adaptive analog mode of the AGC which ensures that a
  71 //    suitable microphone volume level will be set. This scheme will affect
  72 //    the actual microphone control slider.
  73 //
  74 // AGC overview:
  75 //
  76 // It aims to maintain a constant speech loudness level from the microphone.
  77 // This is done by both controlling the analog microphone gain and applying
  78 // digital gain. The microphone gain on the sound card is slowly
  79 // increased/decreased during speech only. By observing the microphone control
  80 // slider you can see it move when you speak. If you scream, the slider moves
  81 // downwards and then upwards again when you return to normal. It is not
  82 // uncommon that the slider hits the maximum. This means that the maximum
  83 // analog gain is not large enough to give the desired loudness. Nevertheless,
  84 // we can in general still attain the desired loudness. If the microphone
  85 // control slider is moved manually, the gain adaptation restarts and returns
  86 // to roughly the same position as before the change if the circumstances are
  87 // still the same. When the input microphone signal causes saturation, the
  88 // level is decreased dramatically and has to re-adapt towards the old level.
  89 // The adaptation is a slowly varying process and at the beginning of capture
  90 // this is noticed by a slow increase in volume. Smaller changes in microphone
  91 // input level is leveled out by the built-in digital control. For larger
  92 // differences we need to rely on the slow adaptation.
  93 // See http://en.wikipedia.org/wiki/Automatic_gain_control for more details.
  94 //
  95 // AGC implementation details:
  96 //
  97 // The adaptive analog mode of the AGC is always enabled for desktop platforms
  98 // in WebRTC.
  99 //
 100 // Before recording starts, the ADM enables AGC on the AudioInputDevice.
 101 //
 102 // A capture session with AGC is started up as follows (simplified):
 103 //
 104 //                            [renderer]
 105 //                                |
 106 //                     ADM::StartRecording()
 107 //             AudioInputDevice::InitializeOnIOThread()
 108 //           AudioInputHostMsg_CreateStream(..., agc=true)               [IPC]
 109 //                                |
 110 //                       [IPC to the browser]
 111 //                                |
 112 //              AudioInputRendererHost::OnCreateStream()
 113 //              AudioInputController::CreateLowLatency()
 114 //         AudioInputController::DoSetAutomaticGainControl(true)
 115 //            AudioInputStream::SetAutomaticGainControl(true)
 116 //                                |
 117 // AGC is now enabled in the media layer and streaming starts (details omitted).
 118 // The figure below illustrates the AGC scheme which is active in combination
 119 // with the default media flow explained earlier.
 120 //                                |
 121 //                            [browser]
 122 //                                |
 123 //                AudioInputStream::(Capture thread loop)
 124 //  AgcAudioStream<AudioInputStream>::GetAgcVolume() => get latest mic volume
 125 //                 AudioInputData::OnData(..., volume)
 126 //              AudioInputController::OnData(..., volume)
 127 //               AudioInputSyncWriter::Write(..., volume)
 128 //                                |
 129 //      [volume | size | data] is sent to the renderer         [shared memory]
 130 //                                |
 131 //                            [renderer]
 132 //                                |
 133 //          AudioInputDevice::AudioThreadCallback::Process()
 134 //            WebRtcAudioDeviceImpl::Capture(..., volume)
 135 //    AudioTransport::RecordedDataIsAvailable(...,volume, new_volume)
 136 //                                |
 137 // The AGC now uses the current volume input and computes a suitable new
 138 // level given by the |new_level| output. This value is only non-zero if the
 139 // AGC has take a decision that the microphone level should change.
 140 //                                |
 141 //                      if (new_volume != 0)
 142 //              AudioInputDevice::SetVolume(new_volume)
 143 //              AudioInputHostMsg_SetVolume(new_volume)                  [IPC]
 144 //                                |
 145 //                       [IPC to the browser]
 146 //                                |
 147 //                 AudioInputRendererHost::OnSetVolume()
 148 //                  AudioInputController::SetVolume()
 149 //             AudioInputStream::SetVolume(scaled_volume)
 150 //                                |
 151 // Here we set the new microphone level in the media layer and at the same time
 152 // read the new setting (we might not get exactly what is set).
 153 //                                |
 154 //             AudioInputData::OnData(..., updated_volume)
 155 //           AudioInputController::OnData(..., updated_volume)
 156 //                                |
 157 //                                |
 158 // This process repeats until we stop capturing data. Note that, a common
 159 // steady state is that the volume control reaches its max and the new_volume
 160 // value from the AGC is zero. A loud voice input is required to break this
 161 // state and start lowering the level again.
 162 //
 163 // Implementation notes:
 164 //
 165 //  - This class must be created and destroyed on the main render thread and
 166 //    most methods are called on the same thread. However, some methods are
 167 //    also called on a Libjingle worker thread. RenderData is called on the
 168 //    AudioOutputDevice thread and CaptureData on the AudioInputDevice thread.
 169 //    To summarize: this class lives on four different threads.
 170 //  - The webrtc::AudioDeviceModule is reference counted.
 171 //  - AGC is only supported in combination with the WASAPI-based audio layer
 172 //    on Windows, i.e., it is not supported on Windows XP.
 173 //  - All volume levels required for the AGC scheme are transfered in a
 174 //    normalized range [0.0, 1.0]. Scaling takes place in both endpoints
 175 //    (WebRTC client a media layer). This approach ensures that we can avoid
 176 //    transferring maximum levels between the renderer and the browser.
 177 //
 178
 179 namespace content {
 180
 181 class WebRtcAudioCapturer;
 182 class WebRtcAudioRenderer;
 183
 184 // TODO(xians): Move the following two interfaces to webrtc so that
 185 // libjingle can own references to the renderer and capturer.
 186 class WebRtcAudioRendererSource {
 187  public:
 188   // Callback to get the rendered interleaved data.
 189   // TODO(xians): Change uint8* to int16*.
 190   virtual void RenderData(uint8* audio_data,
 191                           int number_of_channels,
 192                           int number_of_frames,
 193                           int audio_delay_milliseconds) = 0;
 194
 195   // Set the format for the capture audio parameters.
 196   virtual void SetRenderFormat(const media::AudioParameters& params) = 0;
 197
 198   // Callback to notify the client that the renderer is going away.
 199   virtual void RemoveAudioRenderer(WebRtcAudioRenderer* renderer) = 0;
 200
 201  protected:
 202   virtual ~WebRtcAudioRendererSource() {}
 203 };
 204
 205 class PeerConnectionAudioSink {
 206  public:
 207   // Callback to deliver the captured interleaved data.
 208   // |channels| contains a vector of WebRtc VoE channels.
 209   // |audio_data| is the pointer to the audio data.
 210   // |sample_rate| is the sample frequency of audio data.
 211   // |number_of_channels| is the number of channels reflecting the order of
 212   // surround sound channels.
 213   // |audio_delay_milliseconds| is recording delay value.
 214   // |current_volume| is current microphone volume, in range of |0, 255].
 215   // |need_audio_processing| indicates if the audio needs WebRtc AEC/NS/AGC
 216   // audio processing.
 217   // The return value is the new microphone volume, in the range of |0, 255].
 218   // When the volume does not need to be updated, it returns 0.
 219   virtual int OnData(const int16* audio_data,
 220                      int sample_rate,
 221                      int number_of_channels,
 222                      int number_of_frames,
 223                      const std::vector<int>& channels,
 224                      int audio_delay_milliseconds,
 225                      int current_volume,
 226                      bool need_audio_processing,
 227                      bool key_pressed) = 0;
 228
 229   // Set the format for the capture audio parameters.
 230   // This is called when the capture format has changed, and it must be called
 231   // on the same thread as calling CaptureData().
 232   virtual void OnSetFormat(const media::AudioParameters& params) = 0;
 233
 234  protected:
 235  virtual ~PeerConnectionAudioSink() {}
 236 };
 237
 238 // Note that this class inherits from webrtc::AudioDeviceModule but due to
 239 // the high number of non-implemented methods, we move the cruft over to the
 240 // WebRtcAudioDeviceNotImpl.
 241 class CONTENT_EXPORT WebRtcAudioDeviceImpl
 242     : NON_EXPORTED_BASE(public PeerConnectionAudioSink),
 243       NON_EXPORTED_BASE(public WebRtcAudioDeviceNotImpl),
 244       NON_EXPORTED_BASE(public WebRtcAudioRendererSource) {
 245  public:
 246   // The maximum volume value WebRtc uses.
 247   static const int kMaxVolumeLevel = 255;
 248
 249   // Instances of this object are created on the main render thread.
 250   WebRtcAudioDeviceImpl();
 251
 252   // webrtc::RefCountedModule implementation.
 253   // The creator must call AddRef() after construction and use Release()
 254   // to release the reference and delete this object.
 255   // Called on the main render thread.
 256   virtual int32_t AddRef() OVERRIDE;
 257   virtual int32_t Release() OVERRIDE;
 258
 259   // webrtc::AudioDeviceModule implementation.
 260   // All implemented methods are called on the main render thread unless
 261   // anything else is stated.
 262
 263   virtual int32_t RegisterAudioCallback(webrtc::AudioTransport* audio_callback)
 264       OVERRIDE;
 265
 266   virtual int32_t Init() OVERRIDE;
 267   virtual int32_t Terminate() OVERRIDE;
 268   virtual bool Initialized() const OVERRIDE;
 269
 270   virtual int32_t PlayoutIsAvailable(bool* available) OVERRIDE;
 271   virtual bool PlayoutIsInitialized() const OVERRIDE;
 272   virtual int32_t RecordingIsAvailable(bool* available) OVERRIDE;
 273   virtual bool RecordingIsInitialized() const OVERRIDE;
 274
 275   // All Start/Stop methods are called on a libJingle worker thread.
 276   virtual int32_t StartPlayout() OVERRIDE;
 277   virtual int32_t StopPlayout() OVERRIDE;
 278   virtual bool Playing() const OVERRIDE;
 279   virtual int32_t StartRecording() OVERRIDE;
 280   virtual int32_t StopRecording() OVERRIDE;
 281   virtual bool Recording() const OVERRIDE;
 282
 283   // Called on the AudioInputDevice worker thread.
 284   virtual int32_t SetMicrophoneVolume(uint32_t volume) OVERRIDE;
 285
 286   // TODO(henrika): sort out calling thread once we start using this API.
 287   virtual int32_t MicrophoneVolume(uint32_t* volume) const OVERRIDE;
 288
 289   virtual int32_t MaxMicrophoneVolume(uint32_t* max_volume) const OVERRIDE;
 290   virtual int32_t MinMicrophoneVolume(uint32_t* min_volume) const OVERRIDE;
 291   virtual int32_t StereoPlayoutIsAvailable(bool* available) const OVERRIDE;
 292   virtual int32_t StereoRecordingIsAvailable(bool* available) const OVERRIDE;
 293   virtual int32_t PlayoutDelay(uint16_t* delay_ms) const OVERRIDE;
 294   virtual int32_t RecordingDelay(uint16_t* delay_ms) const OVERRIDE;
 295   virtual int32_t RecordingSampleRate(uint32_t* samples_per_sec) const OVERRIDE;
 296   virtual int32_t PlayoutSampleRate(uint32_t* samples_per_sec) const OVERRIDE;
 297
 298   // Sets the |renderer_|, returns false if |renderer_| already exists.
 299   // Called on the main renderer thread.
 300   bool SetAudioRenderer(WebRtcAudioRenderer* renderer);
 301
 302   // Adds/Removes the capturer to the ADM.
 303   // TODO(xians): Remove these two methods once the ADM does not need to pass
 304   // hardware information up to WebRtc.
 305   void AddAudioCapturer(const scoped_refptr<WebRtcAudioCapturer>& capturer);
 306   void RemoveAudioCapturer(const scoped_refptr<WebRtcAudioCapturer>& capturer);
 307
 308   // Gets the default capturer, which is the last capturer in |capturers_|.
 309   scoped_refptr<WebRtcAudioCapturer> GetDefaultCapturer() const;
 310
 311   // Gets paired device information of the capture device for the audio
 312   // renderer. This is used to pass on a session id, sample rate and buffer
 313   // size to a webrtc audio renderer (either local or remote), so that audio
 314   // will be rendered to a matching output device.
 315   // Returns true if the capture device has a paired output device, otherwise
 316   // false. Note that if there are more than one open capture device the
 317   // function will not be able to pick an appropriate device and return false.
 318   bool GetAuthorizedDeviceInfoForAudioRenderer(
 319       int* session_id, int* output_sample_rate, int* output_buffer_size);
 320
 321   const scoped_refptr<WebRtcAudioRenderer>& renderer() const {
 322     return renderer_;
 323   }
 324   int output_buffer_size() const {
 325     return output_audio_parameters_.frames_per_buffer();
 326   }
 327   int output_channels() const {
 328     return output_audio_parameters_.channels();
 329   }
 330   int output_sample_rate() const {
 331     return output_audio_parameters_.sample_rate();
 332   }
 333
 334  private:
 335   typedef std::list<scoped_refptr<WebRtcAudioCapturer> > CapturerList;
 336
 337   // Make destructor private to ensure that we can only be deleted by Release().
 338   virtual ~WebRtcAudioDeviceImpl();
 339
 340   // PeerConnectionAudioSink implementation.
 341
 342   // Called on the AudioInputDevice worker thread.
 343   virtual int OnData(const int16* audio_data,
 344                      int sample_rate,
 345                      int number_of_channels,
 346                      int number_of_frames,
 347                      const std::vector<int>& channels,
 348                      int audio_delay_milliseconds,
 349                      int current_volume,
 350                      bool need_audio_processing,
 351                      bool key_pressed) OVERRIDE;
 352
 353   // Called on the AudioInputDevice worker thread.
 354   virtual void OnSetFormat(const media::AudioParameters& params) OVERRIDE;
 355
 356   // WebRtcAudioRendererSource implementation.
 357
 358   // Called on the AudioInputDevice worker thread.
 359   virtual void RenderData(uint8* audio_data,
 360                           int number_of_channels,
 361                           int number_of_frames,
 362                           int audio_delay_milliseconds) OVERRIDE;
 363
 364   // Called on the main render thread.
 365   virtual void SetRenderFormat(const media::AudioParameters& params) OVERRIDE;
 366   virtual void RemoveAudioRenderer(WebRtcAudioRenderer* renderer) OVERRIDE;
 367
 368   // Used to DCHECK that we are called on the correct thread.
 369   base::ThreadChecker thread_checker_;
 370
 371   int ref_count_;
 372
 373   // List of captures which provides access to the native audio input layer
 374   // in the browser process.
 375   CapturerList capturers_;
 376
 377   // Provides access to the audio renderer in the browser process.
 378   scoped_refptr<WebRtcAudioRenderer> renderer_;
 379
 380   // Weak reference to the audio callback.
 381   // The webrtc client defines |audio_transport_callback_| by calling
 382   // RegisterAudioCallback().
 383   webrtc::AudioTransport* audio_transport_callback_;
 384
 385   // Cached values of used output audio parameters. Platform dependent.
 386   media::AudioParameters output_audio_parameters_;
 387
 388   // Cached value of the current audio delay on the input/capture side.
 389   int input_delay_ms_;
 390
 391   // Cached value of the current audio delay on the output/renderer side.
 392   int output_delay_ms_;
 393
 394   // Protects |recording_|, |output_delay_ms_|, |input_delay_ms_|, |renderer_|
 395   // |recording_| and |microphone_volume_|.
 396   mutable base::Lock lock_;
 397
 398   // Used to protect the racing of calling OnData() since there can be more
 399   // than one input stream calling OnData().
 400   mutable base::Lock capture_callback_lock_;
 401
 402   bool initialized_;
 403   bool playing_;
 404   bool recording_;
 405
 406   // Used for histograms of total recording and playout times.
 407   base::Time start_capture_time_;
 408   base::Time start_render_time_;
 409
 410   // Stores latest microphone volume received in a CaptureData() callback.
 411   // Range is [0, 255].
 412   uint32_t microphone_volume_;
 413
 414   DISALLOW_COPY_AND_ASSIGN(WebRtcAudioDeviceImpl);
 415 };
 416
 417 }  // namespace content
 418
 419 #endif  // CONTENT_RENDERER_MEDIA_WEBRTC_AUDIO_DEVICE_IMPL_H_