src/content/renderer/media/media_stream_audio_processor.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/renderer/media/media_stream_audio_processor.h"
   6
   7 #include "base/command_line.h"
   8 #include "base/debug/trace_event.h"
   9 #include "base/metrics/field_trial.h"
  10 #include "base/metrics/histogram.h"
  11 #include "content/public/common/content_switches.h"
  12 #include "content/renderer/media/media_stream_audio_processor_options.h"
  13 #include "content/renderer/media/rtc_media_constraints.h"
  14 #include "media/audio/audio_parameters.h"
  15 #include "media/base/audio_converter.h"
  16 #include "media/base/audio_fifo.h"
  17 #include "media/base/channel_layout.h"
  18 #include "third_party/WebKit/public/platform/WebMediaConstraints.h"
  19 #include "third_party/libjingle/source/talk/app/webrtc/mediaconstraintsinterface.h"
  20 #include "third_party/webrtc/modules/audio_processing/typing_detection.h"
  21
  22 namespace content {
  23
  24 namespace {
  25
  26 using webrtc::AudioProcessing;
  27 using webrtc::MediaConstraintsInterface;
  28
  29 #if defined(OS_ANDROID)
  30 const int kAudioProcessingSampleRate = 16000;
  31 #else
  32 const int kAudioProcessingSampleRate = 32000;
  33 #endif
  34 const int kAudioProcessingNumberOfChannels = 1;
  35
  36 const int kMaxNumberOfBuffersInFifo = 2;
  37
  38 // Used by UMA histograms and entries shouldn't be re-ordered or removed.
  39 enum AudioTrackProcessingStates {
  40   AUDIO_PROCESSING_ENABLED = 0,
  41   AUDIO_PROCESSING_DISABLED,
  42   AUDIO_PROCESSING_IN_WEBRTC,
  43   AUDIO_PROCESSING_MAX
  44 };
  45
  46 void RecordProcessingState(AudioTrackProcessingStates state) {
  47   UMA_HISTOGRAM_ENUMERATION("Media.AudioTrackProcessingStates",
  48                             state, AUDIO_PROCESSING_MAX);
  49 }
  50
  51 }  // namespace
  52
  53 class MediaStreamAudioProcessor::MediaStreamAudioConverter
  54     : public media::AudioConverter::InputCallback {
  55  public:
  56   MediaStreamAudioConverter(const media::AudioParameters& source_params,
  57                             const media::AudioParameters& sink_params)
  58      : source_params_(source_params),
  59        sink_params_(sink_params),
  60        audio_converter_(source_params, sink_params_, false) {
  61     // An instance of MediaStreamAudioConverter may be created in the main
  62     // render thread and used in the audio thread, for example, the
  63     // |MediaStreamAudioProcessor::capture_converter_|.
  64     thread_checker_.DetachFromThread();
  65     audio_converter_.AddInput(this);
  66     // Create and initialize audio fifo and audio bus wrapper.
  67     // The size of the FIFO should be at least twice of the source buffer size
  68     // or twice of the sink buffer size.
  69     int buffer_size = std::max(
  70         kMaxNumberOfBuffersInFifo * source_params_.frames_per_buffer(),
  71         kMaxNumberOfBuffersInFifo * sink_params_.frames_per_buffer());
  72     fifo_.reset(new media::AudioFifo(source_params_.channels(), buffer_size));
  73     // TODO(xians): Use CreateWrapper to save one memcpy.
  74     audio_wrapper_ = media::AudioBus::Create(sink_params_.channels(),
  75                                              sink_params_.frames_per_buffer());
  76   }
  77
  78   virtual ~MediaStreamAudioConverter() {
  79     audio_converter_.RemoveInput(this);
  80   }
  81
  82   void Push(media::AudioBus* audio_source) {
  83     // Called on the audio thread, which is the capture audio thread for
  84     // |MediaStreamAudioProcessor::capture_converter_|, and render audio thread
  85     // for |MediaStreamAudioProcessor::render_converter_|.
  86     // And it must be the same thread as calling Convert().
  87     DCHECK(thread_checker_.CalledOnValidThread());
  88     fifo_->Push(audio_source);
  89   }
  90
  91   bool Convert(webrtc::AudioFrame* out) {
  92     // Called on the audio thread, which is the capture audio thread for
  93     // |MediaStreamAudioProcessor::capture_converter_|, and render audio thread
  94     // for |MediaStreamAudioProcessor::render_converter_|.
  95     DCHECK(thread_checker_.CalledOnValidThread());
  96     // Return false if there is not enough data in the FIFO, this happens when
  97     // fifo_->frames() / source_params_.sample_rate() is less than
  98     // sink_params.frames_per_buffer() / sink_params.sample_rate().
  99     if (fifo_->frames() * sink_params_.sample_rate() <
 100         sink_params_.frames_per_buffer() * source_params_.sample_rate()) {
 101       return false;
 102     }
 103
 104     // Convert data to the output format, this will trigger ProvideInput().
 105     audio_converter_.Convert(audio_wrapper_.get());
 106
 107     // TODO(xians): Figure out a better way to handle the interleaved and
 108     // deinterleaved format switching.
 109     DCHECK_EQ(audio_wrapper_->frames(), sink_params_.frames_per_buffer());
 110     audio_wrapper_->ToInterleaved(audio_wrapper_->frames(),
 111                                   sink_params_.bits_per_sample() / 8,
 112                                   out->data_);
 113
 114     out->samples_per_channel_ = sink_params_.frames_per_buffer();
 115     out->sample_rate_hz_ = sink_params_.sample_rate();
 116     out->speech_type_ = webrtc::AudioFrame::kNormalSpeech;
 117     out->vad_activity_ = webrtc::AudioFrame::kVadUnknown;
 118     out->num_channels_ = sink_params_.channels();
 119
 120     return true;
 121   }
 122
 123   const media::AudioParameters& source_parameters() const {
 124     return source_params_;
 125   }
 126   const media::AudioParameters& sink_parameters() const {
 127     return sink_params_;
 128   }
 129
 130  private:
 131   // AudioConverter::InputCallback implementation.
 132   virtual double ProvideInput(media::AudioBus* audio_bus,
 133                               base::TimeDelta buffer_delay) OVERRIDE {
 134     // Called on realtime audio thread.
 135     // TODO(xians): Figure out why the first Convert() triggers ProvideInput
 136     // two times.
 137     if (fifo_->frames() < audio_bus->frames())
 138       return 0;
 139
 140     fifo_->Consume(audio_bus, 0, audio_bus->frames());
 141
 142     // Return 1.0 to indicate no volume scaling on the data.
 143     return 1.0;
 144   }
 145
 146   base::ThreadChecker thread_checker_;
 147   const media::AudioParameters source_params_;
 148   const media::AudioParameters sink_params_;
 149
 150   // TODO(xians): consider using SincResampler to save some memcpy.
 151   // Handles mixing and resampling between input and output parameters.
 152   media::AudioConverter audio_converter_;
 153   scoped_ptr<media::AudioBus> audio_wrapper_;
 154   scoped_ptr<media::AudioFifo> fifo_;
 155 };
 156
 157 MediaStreamAudioProcessor::MediaStreamAudioProcessor(
 158     const blink::WebMediaConstraints& constraints,
 159     int effects,
 160     MediaStreamType type,
 161     WebRtcPlayoutDataSource* playout_data_source)
 162     : render_delay_ms_(0),
 163       playout_data_source_(playout_data_source),
 164       audio_mirroring_(false),
 165       typing_detected_(false) {
 166   capture_thread_checker_.DetachFromThread();
 167   render_thread_checker_.DetachFromThread();
 168   InitializeAudioProcessingModule(constraints, effects, type);
 169 }
 170
 171 MediaStreamAudioProcessor::~MediaStreamAudioProcessor() {
 172   DCHECK(main_thread_checker_.CalledOnValidThread());
 173   StopAudioProcessing();
 174 }
 175
 176 void MediaStreamAudioProcessor::OnCaptureFormatChanged(
 177     const media::AudioParameters& source_params) {
 178   DCHECK(main_thread_checker_.CalledOnValidThread());
 179   // There is no need to hold a lock here since the caller guarantees that
 180   // there is no more PushCaptureData() and ProcessAndConsumeData() callbacks
 181   // on the capture thread.
 182   InitializeCaptureConverter(source_params);
 183
 184   // Reset the |capture_thread_checker_| since the capture data will come from
 185   // a new capture thread.
 186   capture_thread_checker_.DetachFromThread();
 187 }
 188
 189 void MediaStreamAudioProcessor::PushCaptureData(media::AudioBus* audio_source) {
 190   DCHECK(capture_thread_checker_.CalledOnValidThread());
 191   DCHECK_EQ(audio_source->channels(),
 192             capture_converter_->source_parameters().channels());
 193   DCHECK_EQ(audio_source->frames(),
 194             capture_converter_->source_parameters().frames_per_buffer());
 195
 196   if (audio_mirroring_ &&
 197       capture_converter_->source_parameters().channel_layout() ==
 198           media::CHANNEL_LAYOUT_STEREO) {
 199     // Swap the first and second channels.
 200     audio_source->SwapChannels(0, 1);
 201   }
 202
 203   capture_converter_->Push(audio_source);
 204 }
 205
 206 bool MediaStreamAudioProcessor::ProcessAndConsumeData(
 207     base::TimeDelta capture_delay, int volume, bool key_pressed,
 208     int* new_volume, int16** out) {
 209   DCHECK(capture_thread_checker_.CalledOnValidThread());
 210   TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessAndConsumeData");
 211
 212   if (!capture_converter_->Convert(&capture_frame_))
 213     return false;
 214
 215   *new_volume = ProcessData(&capture_frame_, capture_delay, volume,
 216                             key_pressed);
 217   *out = capture_frame_.data_;
 218
 219   return true;
 220 }
 221
 222 const media::AudioParameters& MediaStreamAudioProcessor::InputFormat() const {
 223   return capture_converter_->source_parameters();
 224 }
 225
 226 const media::AudioParameters& MediaStreamAudioProcessor::OutputFormat() const {
 227   return capture_converter_->sink_parameters();
 228 }
 229
 230 void MediaStreamAudioProcessor::StartAecDump(
 231     const base::PlatformFile& aec_dump_file) {
 232   if (audio_processing_)
 233     StartEchoCancellationDump(audio_processing_.get(), aec_dump_file);
 234 }
 235
 236 void MediaStreamAudioProcessor::StopAecDump() {
 237   if (audio_processing_)
 238     StopEchoCancellationDump(audio_processing_.get());
 239 }
 240
 241 void MediaStreamAudioProcessor::OnPlayoutData(media::AudioBus* audio_bus,
 242                                               int sample_rate,
 243                                               int audio_delay_milliseconds) {
 244   DCHECK(render_thread_checker_.CalledOnValidThread());
 245 #if defined(OS_ANDROID) || defined(OS_IOS)
 246   DCHECK(audio_processing_->echo_control_mobile()->is_enabled());
 247 #else
 248   DCHECK(audio_processing_->echo_cancellation()->is_enabled());
 249 #endif
 250
 251   TRACE_EVENT0("audio", "MediaStreamAudioProcessor::OnPlayoutData");
 252   DCHECK_LT(audio_delay_milliseconds,
 253             std::numeric_limits<base::subtle::Atomic32>::max());
 254   base::subtle::Release_Store(&render_delay_ms_, audio_delay_milliseconds);
 255
 256   InitializeRenderConverterIfNeeded(sample_rate, audio_bus->channels(),
 257                                     audio_bus->frames());
 258
 259   render_converter_->Push(audio_bus);
 260   while (render_converter_->Convert(&render_frame_))
 261     audio_processing_->AnalyzeReverseStream(&render_frame_);
 262 }
 263
 264 void MediaStreamAudioProcessor::OnPlayoutDataSourceChanged() {
 265   DCHECK(main_thread_checker_.CalledOnValidThread());
 266   // There is no need to hold a lock here since the caller guarantees that
 267   // there is no more OnPlayoutData() callback on the render thread.
 268   render_thread_checker_.DetachFromThread();
 269   render_converter_.reset();
 270 }
 271
 272 void MediaStreamAudioProcessor::GetStats(AudioProcessorStats* stats) {
 273   stats->typing_noise_detected =
 274       (base::subtle::Acquire_Load(&typing_detected_) != false);
 275   GetAecStats(audio_processing_.get(), stats);
 276 }
 277
 278 void MediaStreamAudioProcessor::InitializeAudioProcessingModule(
 279     const blink::WebMediaConstraints& constraints, int effects,
 280     MediaStreamType type) {
 281   DCHECK(!audio_processing_);
 282
 283   RTCMediaConstraints native_constraints(constraints);
 284
 285   // Audio mirroring can be enabled even though audio processing is otherwise
 286   // disabled.
 287   audio_mirroring_ = GetPropertyFromConstraints(
 288       &native_constraints, webrtc::MediaConstraintsInterface::kAudioMirroring);
 289
 290   if (!IsAudioTrackProcessingEnabled()) {
 291     RecordProcessingState(AUDIO_PROCESSING_IN_WEBRTC);
 292     return;
 293   }
 294
 295   // Only apply the fixed constraints for gUM of MEDIA_DEVICE_AUDIO_CAPTURE.
 296   DCHECK(IsAudioMediaType(type));
 297   if (type == MEDIA_DEVICE_AUDIO_CAPTURE)
 298     ApplyFixedAudioConstraints(&native_constraints);
 299
 300   if (effects & media::AudioParameters::ECHO_CANCELLER) {
 301     // If platform echo canceller is enabled, disable the software AEC.
 302     native_constraints.AddMandatory(
 303         MediaConstraintsInterface::kEchoCancellation,
 304         MediaConstraintsInterface::kValueFalse, true);
 305   }
 306
 307 #if defined(OS_IOS)
 308   // On iOS, VPIO provides built-in AEC and AGC.
 309   const bool enable_aec = false;
 310   const bool enable_agc = false;
 311 #else
 312   const bool enable_aec = GetPropertyFromConstraints(
 313       &native_constraints, MediaConstraintsInterface::kEchoCancellation);
 314   const bool enable_agc = GetPropertyFromConstraints(
 315       &native_constraints, webrtc::MediaConstraintsInterface::kAutoGainControl);
 316 #endif
 317
 318 #if defined(OS_IOS) || defined(OS_ANDROID)
 319   const bool enable_experimental_aec = false;
 320   const bool enable_typing_detection = false;
 321 #else
 322   const bool enable_experimental_aec = GetPropertyFromConstraints(
 323       &native_constraints,
 324       MediaConstraintsInterface::kExperimentalEchoCancellation);
 325   const bool enable_typing_detection = GetPropertyFromConstraints(
 326       &native_constraints, MediaConstraintsInterface::kTypingNoiseDetection);
 327 #endif
 328
 329   const bool enable_ns = GetPropertyFromConstraints(
 330       &native_constraints, MediaConstraintsInterface::kNoiseSuppression);
 331   const bool enable_experimental_ns = GetPropertyFromConstraints(
 332         &native_constraints,
 333         MediaConstraintsInterface::kExperimentalNoiseSuppression);
 334   const bool enable_high_pass_filter = GetPropertyFromConstraints(
 335       &native_constraints, MediaConstraintsInterface::kHighpassFilter);
 336
 337   // Return immediately if no audio processing component is enabled.
 338   if (!enable_aec && !enable_experimental_aec && !enable_ns &&
 339       !enable_high_pass_filter && !enable_typing_detection && !enable_agc &&
 340       !enable_experimental_ns) {
 341     RecordProcessingState(AUDIO_PROCESSING_DISABLED);
 342     return;
 343   }
 344
 345   // Create and configure the webrtc::AudioProcessing.
 346   audio_processing_.reset(webrtc::AudioProcessing::Create(0));
 347
 348   // Enable the audio processing components.
 349   if (enable_aec) {
 350     EnableEchoCancellation(audio_processing_.get());
 351     if (enable_experimental_aec)
 352       EnableExperimentalEchoCancellation(audio_processing_.get());
 353
 354     if (playout_data_source_)
 355       playout_data_source_->AddPlayoutSink(this);
 356   }
 357
 358   if (enable_ns)
 359     EnableNoiseSuppression(audio_processing_.get());
 360
 361   if (enable_experimental_ns)
 362     EnableExperimentalNoiseSuppression(audio_processing_.get());
 363
 364   if (enable_high_pass_filter)
 365     EnableHighPassFilter(audio_processing_.get());
 366
 367   if (enable_typing_detection) {
 368     // TODO(xians): Remove this |typing_detector_| after the typing suppression
 369     // is enabled by default.
 370     typing_detector_.reset(new webrtc::TypingDetection());
 371     EnableTypingDetection(audio_processing_.get(), typing_detector_.get());
 372   }
 373
 374   if (enable_agc)
 375     EnableAutomaticGainControl(audio_processing_.get());
 376
 377   // Configure the audio format the audio processing is running on. This
 378   // has to be done after all the needed components are enabled.
 379   CHECK_EQ(0,
 380            audio_processing_->set_sample_rate_hz(kAudioProcessingSampleRate));
 381   CHECK_EQ(0, audio_processing_->set_num_channels(
 382       kAudioProcessingNumberOfChannels, kAudioProcessingNumberOfChannels));
 383
 384   RecordProcessingState(AUDIO_PROCESSING_ENABLED);
 385 }
 386
 387 void MediaStreamAudioProcessor::InitializeCaptureConverter(
 388     const media::AudioParameters& source_params) {
 389   DCHECK(main_thread_checker_.CalledOnValidThread());
 390   DCHECK(source_params.IsValid());
 391
 392   // Create and initialize audio converter for the source data.
 393   // When the webrtc AudioProcessing is enabled, the sink format of the
 394   // converter will be the same as the post-processed data format, which is
 395   // 32k mono for desktops and 16k mono for Android. When the AudioProcessing
 396   // is disabled, the sink format will be the same as the source format.
 397   const int sink_sample_rate = audio_processing_ ?
 398       kAudioProcessingSampleRate : source_params.sample_rate();
 399   const media::ChannelLayout sink_channel_layout = audio_processing_ ?
 400       media::GuessChannelLayout(kAudioProcessingNumberOfChannels) :
 401       source_params.channel_layout();
 402
 403   // WebRtc AudioProcessing requires 10ms as its packet size. We use this
 404   // native size when processing is enabled. While processing is disabled, and
 405   // the source is running with a buffer size smaller than 10ms buffer, we use
 406   // same buffer size as the incoming format to avoid extra FIFO for WebAudio.
 407   int sink_buffer_size =  sink_sample_rate / 100;
 408   if (!audio_processing_ &&
 409       source_params.frames_per_buffer() < sink_buffer_size) {
 410     sink_buffer_size = source_params.frames_per_buffer();
 411   }
 412
 413   media::AudioParameters sink_params(
 414       media::AudioParameters::AUDIO_PCM_LOW_LATENCY, sink_channel_layout,
 415       sink_sample_rate, 16, sink_buffer_size);
 416   capture_converter_.reset(
 417       new MediaStreamAudioConverter(source_params, sink_params));
 418 }
 419
 420 void MediaStreamAudioProcessor::InitializeRenderConverterIfNeeded(
 421     int sample_rate, int number_of_channels, int frames_per_buffer) {
 422   DCHECK(render_thread_checker_.CalledOnValidThread());
 423   // TODO(xians): Figure out if we need to handle the buffer size change.
 424   if (render_converter_.get() &&
 425       render_converter_->source_parameters().sample_rate() == sample_rate &&
 426       render_converter_->source_parameters().channels() == number_of_channels) {
 427     // Do nothing if the |render_converter_| has been setup properly.
 428     return;
 429   }
 430
 431   // Create and initialize audio converter for the render data.
 432   // webrtc::AudioProcessing accepts the same format as what it uses to process
 433   // capture data, which is 32k mono for desktops and 16k mono for Android.
 434   media::AudioParameters source_params(
 435       media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
 436       media::GuessChannelLayout(number_of_channels), sample_rate, 16,
 437       frames_per_buffer);
 438   media::AudioParameters sink_params(
 439       media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
 440       media::CHANNEL_LAYOUT_MONO, kAudioProcessingSampleRate, 16,
 441       kAudioProcessingSampleRate / 100);
 442   render_converter_.reset(
 443       new MediaStreamAudioConverter(source_params, sink_params));
 444   render_data_bus_ = media::AudioBus::Create(number_of_channels,
 445                                              frames_per_buffer);
 446 }
 447
 448 int MediaStreamAudioProcessor::ProcessData(webrtc::AudioFrame* audio_frame,
 449                                            base::TimeDelta capture_delay,
 450                                            int volume,
 451                                            bool key_pressed) {
 452   DCHECK(capture_thread_checker_.CalledOnValidThread());
 453   if (!audio_processing_)
 454     return 0;
 455
 456   TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessData");
 457   DCHECK_EQ(audio_processing_->sample_rate_hz(),
 458             capture_converter_->sink_parameters().sample_rate());
 459   DCHECK_EQ(audio_processing_->num_input_channels(),
 460             capture_converter_->sink_parameters().channels());
 461   DCHECK_EQ(audio_processing_->num_output_channels(),
 462             capture_converter_->sink_parameters().channels());
 463
 464   base::subtle::Atomic32 render_delay_ms =
 465       base::subtle::Acquire_Load(&render_delay_ms_);
 466   int64 capture_delay_ms = capture_delay.InMilliseconds();
 467   DCHECK_LT(capture_delay_ms,
 468             std::numeric_limits<base::subtle::Atomic32>::max());
 469   int total_delay_ms =  capture_delay_ms + render_delay_ms;
 470   if (total_delay_ms > 300) {
 471     LOG(WARNING) << "Large audio delay, capture delay: " << capture_delay_ms
 472                  << "ms; render delay: " << render_delay_ms << "ms";
 473   }
 474
 475   audio_processing_->set_stream_delay_ms(total_delay_ms);
 476
 477   webrtc::GainControl* agc = audio_processing_->gain_control();
 478   int err = agc->set_stream_analog_level(volume);
 479   DCHECK_EQ(err, 0) << "set_stream_analog_level() error: " << err;
 480
 481   audio_processing_->set_stream_key_pressed(key_pressed);
 482
 483   err = audio_processing_->ProcessStream(audio_frame);
 484   DCHECK_EQ(err, 0) << "ProcessStream() error: " << err;
 485
 486   if (typing_detector_ &&
 487       audio_frame->vad_activity_ != webrtc::AudioFrame::kVadUnknown) {
 488     bool vad_active =
 489         (audio_frame->vad_activity_ == webrtc::AudioFrame::kVadActive);
 490     bool typing_detected = typing_detector_->Process(key_pressed, vad_active);
 491     base::subtle::Release_Store(&typing_detected_, typing_detected);
 492   }
 493
 494   // Return 0 if the volume has not been changed, otherwise return the new
 495   // volume.
 496   return (agc->stream_analog_level() == volume) ?
 497       0 : agc->stream_analog_level();
 498 }
 499
 500 void MediaStreamAudioProcessor::StopAudioProcessing() {
 501   if (!audio_processing_.get())
 502     return;
 503
 504   StopAecDump();
 505
 506   if (playout_data_source_)
 507     playout_data_source_->RemovePlayoutSink(this);
 508
 509   audio_processing_.reset();
 510 }
 511
 512 bool MediaStreamAudioProcessor::IsAudioTrackProcessingEnabled() const {
 513   const std::string group_name =
 514       base::FieldTrialList::FindFullName("MediaStreamAudioTrackProcessing");
 515   return group_name == "Enabled" || CommandLine::ForCurrentProcess()->HasSwitch(
 516       switches::kEnableAudioTrackProcessing);
 517 }
 518
 519 }  // namespace content