1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/renderer/media/media_stream_audio_processor.h"
7 #include "base/command_line.h"
8 #include "base/debug/trace_event.h"
10 #include "base/metrics/field_trial.h"
12 #include "base/metrics/histogram.h"
13 #include "content/public/common/content_switches.h"
14 #include "content/renderer/media/media_stream_audio_processor_options.h"
15 #include "content/renderer/media/rtc_media_constraints.h"
16 #include "content/renderer/media/webrtc_audio_device_impl.h"
17 #include "media/audio/audio_parameters.h"
18 #include "media/base/audio_converter.h"
19 #include "media/base/audio_fifo.h"
20 #include "media/base/channel_layout.h"
21 #include "third_party/WebKit/public/platform/WebMediaConstraints.h"
22 #include "third_party/libjingle/source/talk/app/webrtc/mediaconstraintsinterface.h"
23 #include "third_party/webrtc/modules/audio_processing/typing_detection.h"
29 using webrtc::AudioProcessing;
31 #if defined(OS_ANDROID)
32 const int kAudioProcessingSampleRate = 16000;
34 const int kAudioProcessingSampleRate = 32000;
36 const int kAudioProcessingNumberOfChannels = 1;
38 AudioProcessing::ChannelLayout MapLayout(media::ChannelLayout media_layout) {
39 switch (media_layout) {
40 case media::CHANNEL_LAYOUT_MONO:
41 return AudioProcessing::kMono;
42 case media::CHANNEL_LAYOUT_STEREO:
43 return AudioProcessing::kStereo;
44 case media::CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC:
45 return AudioProcessing::kStereoAndKeyboard;
47 NOTREACHED() << "Layout not supported: " << media_layout;
48 return AudioProcessing::kMono;
52 // This is only used for playout data where only max two channels is supported.
53 AudioProcessing::ChannelLayout ChannelsToLayout(int num_channels) {
54 switch (num_channels) {
56 return AudioProcessing::kMono;
58 return AudioProcessing::kStereo;
60 NOTREACHED() << "Channels not supported: " << num_channels;
61 return AudioProcessing::kMono;
65 // Used by UMA histograms and entries shouldn't be re-ordered or removed.
66 enum AudioTrackProcessingStates {
67 AUDIO_PROCESSING_ENABLED = 0,
68 AUDIO_PROCESSING_DISABLED,
69 AUDIO_PROCESSING_IN_WEBRTC,
73 void RecordProcessingState(AudioTrackProcessingStates state) {
74 UMA_HISTOGRAM_ENUMERATION("Media.AudioTrackProcessingStates",
75 state, AUDIO_PROCESSING_MAX);
80 // Wraps AudioBus to provide access to the array of channel pointers, since this
81 // is the type webrtc::AudioProcessing deals in. The array is refreshed on every
82 // channel_ptrs() call, and will be valid until the underlying AudioBus pointers
83 // are changed, e.g. through calls to SetChannelData() or SwapChannels().
85 // All methods are called on one of the capture or render audio threads
87 class MediaStreamAudioBus {
89 MediaStreamAudioBus(int channels, int frames)
90 : bus_(media::AudioBus::Create(channels, frames)),
91 channel_ptrs_(new float*[channels]) {
92 // May be created in the main render thread and used in the audio threads.
93 thread_checker_.DetachFromThread();
96 media::AudioBus* bus() {
97 DCHECK(thread_checker_.CalledOnValidThread());
101 float* const* channel_ptrs() {
102 DCHECK(thread_checker_.CalledOnValidThread());
103 for (int i = 0; i < bus_->channels(); ++i) {
104 channel_ptrs_[i] = bus_->channel(i);
106 return channel_ptrs_.get();
110 base::ThreadChecker thread_checker_;
111 scoped_ptr<media::AudioBus> bus_;
112 scoped_ptr<float*[]> channel_ptrs_;
115 // Wraps AudioFifo to provide a cleaner interface to MediaStreamAudioProcessor.
116 // It avoids the FIFO when the source and destination frames match. All methods
117 // are called on one of the capture or render audio threads exclusively. If
118 // |source_channels| is larger than |destination_channels|, only the first
119 // |destination_channels| are kept from the source.
120 class MediaStreamAudioFifo {
122 MediaStreamAudioFifo(int source_channels,
123 int destination_channels,
125 int destination_frames)
126 : source_channels_(source_channels),
127 source_frames_(source_frames),
129 new MediaStreamAudioBus(destination_channels, destination_frames)),
130 data_available_(false) {
131 DCHECK_GE(source_channels, destination_channels);
133 if (source_channels > destination_channels) {
134 audio_source_intermediate_ =
135 media::AudioBus::CreateWrapper(destination_channels);
138 if (source_frames != destination_frames) {
139 // Since we require every Push to be followed by as many Consumes as
140 // possible, twice the larger of the two is a (probably) loose upper bound
142 const int fifo_frames = 2 * std::max(source_frames, destination_frames);
143 fifo_.reset(new media::AudioFifo(destination_channels, fifo_frames));
146 // May be created in the main render thread and used in the audio threads.
147 thread_checker_.DetachFromThread();
150 void Push(const media::AudioBus* source) {
151 DCHECK(thread_checker_.CalledOnValidThread());
152 DCHECK_EQ(source->channels(), source_channels_);
153 DCHECK_EQ(source->frames(), source_frames_);
155 const media::AudioBus* source_to_push = source;
157 if (audio_source_intermediate_) {
158 for (int i = 0; i < destination_->bus()->channels(); ++i) {
159 audio_source_intermediate_->SetChannelData(
161 const_cast<float*>(source->channel(i)));
163 audio_source_intermediate_->set_frames(source->frames());
164 source_to_push = audio_source_intermediate_.get();
168 fifo_->Push(source_to_push);
170 source_to_push->CopyTo(destination_->bus());
171 data_available_ = true;
175 // Returns true if there are destination_frames() of data available to be
176 // consumed, and otherwise false.
177 bool Consume(MediaStreamAudioBus** destination) {
178 DCHECK(thread_checker_.CalledOnValidThread());
181 if (fifo_->frames() < destination_->bus()->frames())
184 fifo_->Consume(destination_->bus(), 0, destination_->bus()->frames());
186 if (!data_available_)
189 // The data was already copied to |destination_| in this case.
190 data_available_ = false;
193 *destination = destination_.get();
198 base::ThreadChecker thread_checker_;
199 const int source_channels_; // For a DCHECK.
200 const int source_frames_; // For a DCHECK.
201 scoped_ptr<media::AudioBus> audio_source_intermediate_;
202 scoped_ptr<MediaStreamAudioBus> destination_;
203 scoped_ptr<media::AudioFifo> fifo_;
204 // Only used when the FIFO is disabled;
205 bool data_available_;
208 bool MediaStreamAudioProcessor::IsAudioTrackProcessingEnabled() {
209 return !CommandLine::ForCurrentProcess()->HasSwitch(
210 switches::kDisableAudioTrackProcessing);
213 MediaStreamAudioProcessor::MediaStreamAudioProcessor(
214 const blink::WebMediaConstraints& constraints,
216 WebRtcPlayoutDataSource* playout_data_source)
217 : render_delay_ms_(0),
218 playout_data_source_(playout_data_source),
219 audio_mirroring_(false),
220 typing_detected_(false),
222 capture_thread_checker_.DetachFromThread();
223 render_thread_checker_.DetachFromThread();
224 InitializeAudioProcessingModule(constraints, effects);
225 if (IsAudioTrackProcessingEnabled()) {
226 aec_dump_message_filter_ = AecDumpMessageFilter::Get();
227 // In unit tests not creating a message filter, |aec_dump_message_filter_|
228 // will be NULL. We can just ignore that. Other unit tests and browser tests
229 // ensure that we do get the filter when we should.
230 if (aec_dump_message_filter_.get())
231 aec_dump_message_filter_->AddDelegate(this);
235 MediaStreamAudioProcessor::~MediaStreamAudioProcessor() {
236 DCHECK(main_thread_checker_.CalledOnValidThread());
240 void MediaStreamAudioProcessor::OnCaptureFormatChanged(
241 const media::AudioParameters& input_format) {
242 DCHECK(main_thread_checker_.CalledOnValidThread());
243 // There is no need to hold a lock here since the caller guarantees that
244 // there is no more PushCaptureData() and ProcessAndConsumeData() callbacks
245 // on the capture thread.
246 InitializeCaptureFifo(input_format);
248 // Reset the |capture_thread_checker_| since the capture data will come from
249 // a new capture thread.
250 capture_thread_checker_.DetachFromThread();
253 void MediaStreamAudioProcessor::PushCaptureData(
254 const media::AudioBus* audio_source) {
255 DCHECK(capture_thread_checker_.CalledOnValidThread());
257 capture_fifo_->Push(audio_source);
260 bool MediaStreamAudioProcessor::ProcessAndConsumeData(
261 base::TimeDelta capture_delay, int volume, bool key_pressed,
262 int* new_volume, int16** out) {
263 DCHECK(capture_thread_checker_.CalledOnValidThread());
264 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessAndConsumeData");
266 MediaStreamAudioBus* process_bus;
267 if (!capture_fifo_->Consume(&process_bus))
270 // Use the process bus directly if audio processing is disabled.
271 MediaStreamAudioBus* output_bus = process_bus;
273 if (audio_processing_) {
274 output_bus = output_bus_.get();
275 *new_volume = ProcessData(process_bus->channel_ptrs(),
276 process_bus->bus()->frames(), capture_delay,
277 volume, key_pressed, output_bus->channel_ptrs());
280 // Swap channels before interleaving the data.
281 if (audio_mirroring_ &&
282 output_format_.channel_layout() == media::CHANNEL_LAYOUT_STEREO) {
283 // Swap the first and second channels.
284 output_bus->bus()->SwapChannels(0, 1);
287 output_bus->bus()->ToInterleaved(output_bus->bus()->frames(),
290 *out = output_data_.get();
295 void MediaStreamAudioProcessor::Stop() {
296 DCHECK(main_thread_checker_.CalledOnValidThread());
302 if (aec_dump_message_filter_.get()) {
303 aec_dump_message_filter_->RemoveDelegate(this);
304 aec_dump_message_filter_ = NULL;
307 if (!audio_processing_.get())
310 StopEchoCancellationDump(audio_processing_.get());
312 if (playout_data_source_) {
313 playout_data_source_->RemovePlayoutSink(this);
314 playout_data_source_ = NULL;
318 const media::AudioParameters& MediaStreamAudioProcessor::InputFormat() const {
319 return input_format_;
322 const media::AudioParameters& MediaStreamAudioProcessor::OutputFormat() const {
323 return output_format_;
326 void MediaStreamAudioProcessor::OnAecDumpFile(
327 const IPC::PlatformFileForTransit& file_handle) {
328 DCHECK(main_thread_checker_.CalledOnValidThread());
330 base::File file = IPC::PlatformFileForTransitToFile(file_handle);
331 DCHECK(file.IsValid());
333 if (audio_processing_)
334 StartEchoCancellationDump(audio_processing_.get(), file.Pass());
339 void MediaStreamAudioProcessor::OnDisableAecDump() {
340 DCHECK(main_thread_checker_.CalledOnValidThread());
341 if (audio_processing_)
342 StopEchoCancellationDump(audio_processing_.get());
345 void MediaStreamAudioProcessor::OnIpcClosing() {
346 DCHECK(main_thread_checker_.CalledOnValidThread());
347 aec_dump_message_filter_ = NULL;
350 void MediaStreamAudioProcessor::OnPlayoutData(media::AudioBus* audio_bus,
352 int audio_delay_milliseconds) {
353 DCHECK(render_thread_checker_.CalledOnValidThread());
354 DCHECK(audio_processing_->echo_control_mobile()->is_enabled() ^
355 audio_processing_->echo_cancellation()->is_enabled());
357 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::OnPlayoutData");
358 DCHECK_LT(audio_delay_milliseconds,
359 std::numeric_limits<base::subtle::Atomic32>::max());
360 base::subtle::Release_Store(&render_delay_ms_, audio_delay_milliseconds);
362 InitializeRenderFifoIfNeeded(sample_rate, audio_bus->channels(),
363 audio_bus->frames());
365 render_fifo_->Push(audio_bus);
366 MediaStreamAudioBus* analysis_bus;
367 while (render_fifo_->Consume(&analysis_bus)) {
368 audio_processing_->AnalyzeReverseStream(
369 analysis_bus->channel_ptrs(),
370 analysis_bus->bus()->frames(),
372 ChannelsToLayout(audio_bus->channels()));
376 void MediaStreamAudioProcessor::OnPlayoutDataSourceChanged() {
377 DCHECK(main_thread_checker_.CalledOnValidThread());
378 // There is no need to hold a lock here since the caller guarantees that
379 // there is no more OnPlayoutData() callback on the render thread.
380 render_thread_checker_.DetachFromThread();
381 render_fifo_.reset();
384 void MediaStreamAudioProcessor::GetStats(AudioProcessorStats* stats) {
385 stats->typing_noise_detected =
386 (base::subtle::Acquire_Load(&typing_detected_) != false);
387 GetAecStats(audio_processing_.get(), stats);
388 if (echo_information_)
389 echo_information_.get()->UpdateAecDelayStats(stats->echo_delay_median_ms);
392 void MediaStreamAudioProcessor::InitializeAudioProcessingModule(
393 const blink::WebMediaConstraints& constraints, int effects) {
394 DCHECK(main_thread_checker_.CalledOnValidThread());
395 DCHECK(!audio_processing_);
397 MediaAudioConstraints audio_constraints(constraints, effects);
399 // Audio mirroring can be enabled even though audio processing is otherwise
401 audio_mirroring_ = audio_constraints.GetProperty(
402 MediaAudioConstraints::kGoogAudioMirroring);
404 if (!IsAudioTrackProcessingEnabled()) {
405 RecordProcessingState(AUDIO_PROCESSING_IN_WEBRTC);
410 // On iOS, VPIO provides built-in AGC and AEC.
411 const bool echo_cancellation = false;
412 const bool goog_agc = false;
414 const bool echo_cancellation =
415 audio_constraints.GetEchoCancellationProperty();
416 const bool goog_agc = audio_constraints.GetProperty(
417 MediaAudioConstraints::kGoogAutoGainControl);
420 #if defined(OS_IOS) || defined(OS_ANDROID)
421 const bool goog_experimental_aec = false;
422 const bool goog_typing_detection = false;
424 const bool goog_experimental_aec = audio_constraints.GetProperty(
425 MediaAudioConstraints::kGoogExperimentalEchoCancellation);
426 const bool goog_typing_detection = audio_constraints.GetProperty(
427 MediaAudioConstraints::kGoogTypingNoiseDetection);
430 const bool goog_ns = audio_constraints.GetProperty(
431 MediaAudioConstraints::kGoogNoiseSuppression);
432 const bool goog_experimental_ns = audio_constraints.GetProperty(
433 MediaAudioConstraints::kGoogExperimentalNoiseSuppression);
434 const bool goog_high_pass_filter = audio_constraints.GetProperty(
435 MediaAudioConstraints::kGoogHighpassFilter);
437 // Return immediately if no goog constraint is enabled.
438 if (!echo_cancellation && !goog_experimental_aec && !goog_ns &&
439 !goog_high_pass_filter && !goog_typing_detection &&
440 !goog_agc && !goog_experimental_ns) {
441 RecordProcessingState(AUDIO_PROCESSING_DISABLED);
445 // Experimental options provided at creation.
446 webrtc::Config config;
447 if (goog_experimental_aec)
448 config.Set<webrtc::DelayCorrection>(new webrtc::DelayCorrection(true));
449 if (goog_experimental_ns)
450 config.Set<webrtc::ExperimentalNs>(new webrtc::ExperimentalNs(true));
451 #if defined(OS_MACOSX)
452 if (base::FieldTrialList::FindFullName("NoReportedDelayOnMac") == "Enabled")
453 config.Set<webrtc::ReportedDelay>(new webrtc::ReportedDelay(false));
456 // Create and configure the webrtc::AudioProcessing.
457 audio_processing_.reset(webrtc::AudioProcessing::Create(config));
459 // Enable the audio processing components.
460 if (echo_cancellation) {
461 EnableEchoCancellation(audio_processing_.get());
463 if (playout_data_source_)
464 playout_data_source_->AddPlayoutSink(this);
466 // Prepare for logging echo information. If there are data remaining in
467 // |echo_information_| we simply discard it.
468 echo_information_.reset(new EchoInformation());
472 EnableNoiseSuppression(audio_processing_.get());
474 if (goog_high_pass_filter)
475 EnableHighPassFilter(audio_processing_.get());
477 if (goog_typing_detection) {
478 // TODO(xians): Remove this |typing_detector_| after the typing suppression
479 // is enabled by default.
480 typing_detector_.reset(new webrtc::TypingDetection());
481 EnableTypingDetection(audio_processing_.get(), typing_detector_.get());
485 EnableAutomaticGainControl(audio_processing_.get());
487 RecordProcessingState(AUDIO_PROCESSING_ENABLED);
490 void MediaStreamAudioProcessor::InitializeCaptureFifo(
491 const media::AudioParameters& input_format) {
492 DCHECK(main_thread_checker_.CalledOnValidThread());
493 DCHECK(input_format.IsValid());
494 input_format_ = input_format;
496 // TODO(ajm): For now, we assume fixed parameters for the output when audio
497 // processing is enabled, to match the previous behavior. We should either
498 // use the input parameters (in which case, audio processing will convert
499 // at output) or ideally, have a backchannel from the sink to know what
500 // format it would prefer.
501 const int output_sample_rate = audio_processing_ ?
502 kAudioProcessingSampleRate : input_format.sample_rate();
503 media::ChannelLayout output_channel_layout = audio_processing_ ?
504 media::GuessChannelLayout(kAudioProcessingNumberOfChannels) :
505 input_format.channel_layout();
507 // The output channels from the fifo is normally the same as input.
508 int fifo_output_channels = input_format.channels();
510 // Special case for if we have a keyboard mic channel on the input and no
511 // audio processing is used. We will then have the fifo strip away that
512 // channel. So we use stereo as output layout, and also change the output
513 // channels for the fifo.
514 if (input_format.channel_layout() ==
515 media::CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC &&
516 !audio_processing_) {
517 output_channel_layout = media::CHANNEL_LAYOUT_STEREO;
518 fifo_output_channels = ChannelLayoutToChannelCount(output_channel_layout);
521 // webrtc::AudioProcessing requires a 10 ms chunk size. We use this native
522 // size when processing is enabled. When disabled we use the same size as
523 // the source if less than 10 ms.
525 // TODO(ajm): This conditional buffer size appears to be assuming knowledge of
526 // the sink based on the source parameters. PeerConnection sinks seem to want
527 // 10 ms chunks regardless, while WebAudio sinks want less, and we're assuming
528 // we can identify WebAudio sinks by the input chunk size. Less fragile would
529 // be to have the sink actually tell us how much it wants (as in the above
531 int processing_frames = input_format.sample_rate() / 100;
532 int output_frames = output_sample_rate / 100;
533 if (!audio_processing_ && input_format.frames_per_buffer() < output_frames) {
534 processing_frames = input_format.frames_per_buffer();
535 output_frames = processing_frames;
538 output_format_ = media::AudioParameters(
539 media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
540 output_channel_layout,
546 new MediaStreamAudioFifo(input_format.channels(),
547 fifo_output_channels,
548 input_format.frames_per_buffer(),
551 if (audio_processing_) {
552 output_bus_.reset(new MediaStreamAudioBus(output_format_.channels(),
555 output_data_.reset(new int16[output_format_.GetBytesPerBuffer() /
559 void MediaStreamAudioProcessor::InitializeRenderFifoIfNeeded(
560 int sample_rate, int number_of_channels, int frames_per_buffer) {
561 DCHECK(render_thread_checker_.CalledOnValidThread());
562 if (render_fifo_.get() &&
563 render_format_.sample_rate() == sample_rate &&
564 render_format_.channels() == number_of_channels &&
565 render_format_.frames_per_buffer() == frames_per_buffer) {
566 // Do nothing if the |render_fifo_| has been setup properly.
570 render_format_ = media::AudioParameters(
571 media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
572 media::GuessChannelLayout(number_of_channels),
577 const int analysis_frames = sample_rate / 100; // 10 ms chunks.
579 new MediaStreamAudioFifo(number_of_channels,
585 int MediaStreamAudioProcessor::ProcessData(const float* const* process_ptrs,
587 base::TimeDelta capture_delay,
590 float* const* output_ptrs) {
591 DCHECK(audio_processing_);
592 DCHECK(capture_thread_checker_.CalledOnValidThread());
594 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessData");
596 base::subtle::Atomic32 render_delay_ms =
597 base::subtle::Acquire_Load(&render_delay_ms_);
598 int64 capture_delay_ms = capture_delay.InMilliseconds();
599 DCHECK_LT(capture_delay_ms,
600 std::numeric_limits<base::subtle::Atomic32>::max());
601 int total_delay_ms = capture_delay_ms + render_delay_ms;
602 if (total_delay_ms > 300) {
603 LOG(WARNING) << "Large audio delay, capture delay: " << capture_delay_ms
604 << "ms; render delay: " << render_delay_ms << "ms";
607 webrtc::AudioProcessing* ap = audio_processing_.get();
608 ap->set_stream_delay_ms(total_delay_ms);
610 DCHECK_LE(volume, WebRtcAudioDeviceImpl::kMaxVolumeLevel);
611 webrtc::GainControl* agc = ap->gain_control();
612 int err = agc->set_stream_analog_level(volume);
613 DCHECK_EQ(err, 0) << "set_stream_analog_level() error: " << err;
615 ap->set_stream_key_pressed(key_pressed);
617 err = ap->ProcessStream(process_ptrs,
619 input_format_.sample_rate(),
620 MapLayout(input_format_.channel_layout()),
621 output_format_.sample_rate(),
622 MapLayout(output_format_.channel_layout()),
624 DCHECK_EQ(err, 0) << "ProcessStream() error: " << err;
626 if (typing_detector_) {
627 webrtc::VoiceDetection* vad = ap->voice_detection();
628 DCHECK(vad->is_enabled());
629 bool detected = typing_detector_->Process(key_pressed,
630 vad->stream_has_voice());
631 base::subtle::Release_Store(&typing_detected_, detected);
634 // Return 0 if the volume hasn't been changed, and otherwise the new volume.
635 return (agc->stream_analog_level() == volume) ?
636 0 : agc->stream_analog_level();
639 } // namespace content