1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/renderer/media/media_stream_audio_processor.h"
7 #include "base/command_line.h"
8 #include "base/debug/trace_event.h"
9 #include "base/metrics/field_trial.h"
10 #include "base/metrics/histogram.h"
11 #include "content/public/common/content_switches.h"
12 #include "content/renderer/media/media_stream_audio_processor_options.h"
13 #include "content/renderer/media/rtc_media_constraints.h"
14 #include "media/audio/audio_parameters.h"
15 #include "media/base/audio_converter.h"
16 #include "media/base/audio_fifo.h"
17 #include "media/base/channel_layout.h"
18 #include "third_party/WebKit/public/platform/WebMediaConstraints.h"
19 #include "third_party/libjingle/source/talk/app/webrtc/mediaconstraintsinterface.h"
20 #include "third_party/webrtc/modules/audio_processing/typing_detection.h"
26 using webrtc::AudioProcessing;
27 using webrtc::MediaConstraintsInterface;
29 #if defined(OS_ANDROID)
30 const int kAudioProcessingSampleRate = 16000;
32 const int kAudioProcessingSampleRate = 32000;
34 const int kAudioProcessingNumberOfChannels = 1;
36 const int kMaxNumberOfBuffersInFifo = 2;
38 // Used by UMA histograms and entries shouldn't be re-ordered or removed.
39 enum AudioTrackProcessingStates {
40 AUDIO_PROCESSING_ENABLED = 0,
41 AUDIO_PROCESSING_DISABLED,
42 AUDIO_PROCESSING_IN_WEBRTC,
46 void RecordProcessingState(AudioTrackProcessingStates state) {
47 UMA_HISTOGRAM_ENUMERATION("Media.AudioTrackProcessingStates",
48 state, AUDIO_PROCESSING_MAX);
53 class MediaStreamAudioProcessor::MediaStreamAudioConverter
54 : public media::AudioConverter::InputCallback {
56 MediaStreamAudioConverter(const media::AudioParameters& source_params,
57 const media::AudioParameters& sink_params)
58 : source_params_(source_params),
59 sink_params_(sink_params),
60 audio_converter_(source_params, sink_params_, false) {
61 // An instance of MediaStreamAudioConverter may be created in the main
62 // render thread and used in the audio thread, for example, the
63 // |MediaStreamAudioProcessor::capture_converter_|.
64 thread_checker_.DetachFromThread();
65 audio_converter_.AddInput(this);
66 // Create and initialize audio fifo and audio bus wrapper.
67 // The size of the FIFO should be at least twice of the source buffer size
68 // or twice of the sink buffer size.
69 int buffer_size = std::max(
70 kMaxNumberOfBuffersInFifo * source_params_.frames_per_buffer(),
71 kMaxNumberOfBuffersInFifo * sink_params_.frames_per_buffer());
72 fifo_.reset(new media::AudioFifo(source_params_.channels(), buffer_size));
73 // TODO(xians): Use CreateWrapper to save one memcpy.
74 audio_wrapper_ = media::AudioBus::Create(sink_params_.channels(),
75 sink_params_.frames_per_buffer());
78 virtual ~MediaStreamAudioConverter() {
79 audio_converter_.RemoveInput(this);
82 void Push(media::AudioBus* audio_source) {
83 // Called on the audio thread, which is the capture audio thread for
84 // |MediaStreamAudioProcessor::capture_converter_|, and render audio thread
85 // for |MediaStreamAudioProcessor::render_converter_|.
86 // And it must be the same thread as calling Convert().
87 DCHECK(thread_checker_.CalledOnValidThread());
88 fifo_->Push(audio_source);
91 bool Convert(webrtc::AudioFrame* out) {
92 // Called on the audio thread, which is the capture audio thread for
93 // |MediaStreamAudioProcessor::capture_converter_|, and render audio thread
94 // for |MediaStreamAudioProcessor::render_converter_|.
95 DCHECK(thread_checker_.CalledOnValidThread());
96 // Return false if there is not enough data in the FIFO, this happens when
97 // fifo_->frames() / source_params_.sample_rate() is less than
98 // sink_params.frames_per_buffer() / sink_params.sample_rate().
99 if (fifo_->frames() * sink_params_.sample_rate() <
100 sink_params_.frames_per_buffer() * source_params_.sample_rate()) {
104 // Convert data to the output format, this will trigger ProvideInput().
105 audio_converter_.Convert(audio_wrapper_.get());
107 // TODO(xians): Figure out a better way to handle the interleaved and
108 // deinterleaved format switching.
109 DCHECK_EQ(audio_wrapper_->frames(), sink_params_.frames_per_buffer());
110 audio_wrapper_->ToInterleaved(audio_wrapper_->frames(),
111 sink_params_.bits_per_sample() / 8,
114 out->samples_per_channel_ = sink_params_.frames_per_buffer();
115 out->sample_rate_hz_ = sink_params_.sample_rate();
116 out->speech_type_ = webrtc::AudioFrame::kNormalSpeech;
117 out->vad_activity_ = webrtc::AudioFrame::kVadUnknown;
118 out->num_channels_ = sink_params_.channels();
123 const media::AudioParameters& source_parameters() const {
124 return source_params_;
126 const media::AudioParameters& sink_parameters() const {
131 // AudioConverter::InputCallback implementation.
132 virtual double ProvideInput(media::AudioBus* audio_bus,
133 base::TimeDelta buffer_delay) OVERRIDE {
134 // Called on realtime audio thread.
135 // TODO(xians): Figure out why the first Convert() triggers ProvideInput
137 if (fifo_->frames() < audio_bus->frames())
140 fifo_->Consume(audio_bus, 0, audio_bus->frames());
142 // Return 1.0 to indicate no volume scaling on the data.
146 base::ThreadChecker thread_checker_;
147 const media::AudioParameters source_params_;
148 const media::AudioParameters sink_params_;
150 // TODO(xians): consider using SincResampler to save some memcpy.
151 // Handles mixing and resampling between input and output parameters.
152 media::AudioConverter audio_converter_;
153 scoped_ptr<media::AudioBus> audio_wrapper_;
154 scoped_ptr<media::AudioFifo> fifo_;
157 MediaStreamAudioProcessor::MediaStreamAudioProcessor(
158 const blink::WebMediaConstraints& constraints,
160 MediaStreamType type,
161 WebRtcPlayoutDataSource* playout_data_source)
162 : render_delay_ms_(0),
163 playout_data_source_(playout_data_source),
164 audio_mirroring_(false),
165 typing_detected_(false) {
166 capture_thread_checker_.DetachFromThread();
167 render_thread_checker_.DetachFromThread();
168 InitializeAudioProcessingModule(constraints, effects, type);
171 MediaStreamAudioProcessor::~MediaStreamAudioProcessor() {
172 DCHECK(main_thread_checker_.CalledOnValidThread());
173 StopAudioProcessing();
176 void MediaStreamAudioProcessor::OnCaptureFormatChanged(
177 const media::AudioParameters& source_params) {
178 DCHECK(main_thread_checker_.CalledOnValidThread());
179 // There is no need to hold a lock here since the caller guarantees that
180 // there is no more PushCaptureData() and ProcessAndConsumeData() callbacks
181 // on the capture thread.
182 InitializeCaptureConverter(source_params);
184 // Reset the |capture_thread_checker_| since the capture data will come from
185 // a new capture thread.
186 capture_thread_checker_.DetachFromThread();
189 void MediaStreamAudioProcessor::PushCaptureData(media::AudioBus* audio_source) {
190 DCHECK(capture_thread_checker_.CalledOnValidThread());
191 DCHECK_EQ(audio_source->channels(),
192 capture_converter_->source_parameters().channels());
193 DCHECK_EQ(audio_source->frames(),
194 capture_converter_->source_parameters().frames_per_buffer());
196 if (audio_mirroring_ &&
197 capture_converter_->source_parameters().channel_layout() ==
198 media::CHANNEL_LAYOUT_STEREO) {
199 // Swap the first and second channels.
200 audio_source->SwapChannels(0, 1);
203 capture_converter_->Push(audio_source);
206 bool MediaStreamAudioProcessor::ProcessAndConsumeData(
207 base::TimeDelta capture_delay, int volume, bool key_pressed,
208 int* new_volume, int16** out) {
209 DCHECK(capture_thread_checker_.CalledOnValidThread());
210 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessAndConsumeData");
212 if (!capture_converter_->Convert(&capture_frame_))
215 *new_volume = ProcessData(&capture_frame_, capture_delay, volume,
217 *out = capture_frame_.data_;
222 const media::AudioParameters& MediaStreamAudioProcessor::InputFormat() const {
223 return capture_converter_->source_parameters();
226 const media::AudioParameters& MediaStreamAudioProcessor::OutputFormat() const {
227 return capture_converter_->sink_parameters();
230 void MediaStreamAudioProcessor::StartAecDump(
231 const base::PlatformFile& aec_dump_file) {
232 if (audio_processing_)
233 StartEchoCancellationDump(audio_processing_.get(), aec_dump_file);
236 void MediaStreamAudioProcessor::StopAecDump() {
237 if (audio_processing_)
238 StopEchoCancellationDump(audio_processing_.get());
241 void MediaStreamAudioProcessor::OnPlayoutData(media::AudioBus* audio_bus,
243 int audio_delay_milliseconds) {
244 DCHECK(render_thread_checker_.CalledOnValidThread());
245 #if defined(OS_ANDROID) || defined(OS_IOS)
246 DCHECK(audio_processing_->echo_control_mobile()->is_enabled());
248 DCHECK(audio_processing_->echo_cancellation()->is_enabled());
251 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::OnPlayoutData");
252 DCHECK_LT(audio_delay_milliseconds,
253 std::numeric_limits<base::subtle::Atomic32>::max());
254 base::subtle::Release_Store(&render_delay_ms_, audio_delay_milliseconds);
256 InitializeRenderConverterIfNeeded(sample_rate, audio_bus->channels(),
257 audio_bus->frames());
259 render_converter_->Push(audio_bus);
260 while (render_converter_->Convert(&render_frame_))
261 audio_processing_->AnalyzeReverseStream(&render_frame_);
264 void MediaStreamAudioProcessor::OnPlayoutDataSourceChanged() {
265 DCHECK(main_thread_checker_.CalledOnValidThread());
266 // There is no need to hold a lock here since the caller guarantees that
267 // there is no more OnPlayoutData() callback on the render thread.
268 render_thread_checker_.DetachFromThread();
269 render_converter_.reset();
272 void MediaStreamAudioProcessor::GetStats(AudioProcessorStats* stats) {
273 stats->typing_noise_detected =
274 (base::subtle::Acquire_Load(&typing_detected_) != false);
275 GetAecStats(audio_processing_.get(), stats);
278 void MediaStreamAudioProcessor::InitializeAudioProcessingModule(
279 const blink::WebMediaConstraints& constraints, int effects,
280 MediaStreamType type) {
281 DCHECK(!audio_processing_);
283 RTCMediaConstraints native_constraints(constraints);
285 // Audio mirroring can be enabled even though audio processing is otherwise
287 audio_mirroring_ = GetPropertyFromConstraints(
288 &native_constraints, webrtc::MediaConstraintsInterface::kAudioMirroring);
290 if (!IsAudioTrackProcessingEnabled()) {
291 RecordProcessingState(AUDIO_PROCESSING_IN_WEBRTC);
295 // Only apply the fixed constraints for gUM of MEDIA_DEVICE_AUDIO_CAPTURE.
296 DCHECK(IsAudioMediaType(type));
297 if (type == MEDIA_DEVICE_AUDIO_CAPTURE)
298 ApplyFixedAudioConstraints(&native_constraints);
300 if (effects & media::AudioParameters::ECHO_CANCELLER) {
301 // If platform echo canceller is enabled, disable the software AEC.
302 native_constraints.AddMandatory(
303 MediaConstraintsInterface::kEchoCancellation,
304 MediaConstraintsInterface::kValueFalse, true);
308 // On iOS, VPIO provides built-in AEC and AGC.
309 const bool enable_aec = false;
310 const bool enable_agc = false;
312 const bool enable_aec = GetPropertyFromConstraints(
313 &native_constraints, MediaConstraintsInterface::kEchoCancellation);
314 const bool enable_agc = GetPropertyFromConstraints(
315 &native_constraints, webrtc::MediaConstraintsInterface::kAutoGainControl);
318 #if defined(OS_IOS) || defined(OS_ANDROID)
319 const bool enable_experimental_aec = false;
320 const bool enable_typing_detection = false;
322 const bool enable_experimental_aec = GetPropertyFromConstraints(
324 MediaConstraintsInterface::kExperimentalEchoCancellation);
325 const bool enable_typing_detection = GetPropertyFromConstraints(
326 &native_constraints, MediaConstraintsInterface::kTypingNoiseDetection);
329 const bool enable_ns = GetPropertyFromConstraints(
330 &native_constraints, MediaConstraintsInterface::kNoiseSuppression);
331 const bool enable_experimental_ns = GetPropertyFromConstraints(
333 MediaConstraintsInterface::kExperimentalNoiseSuppression);
334 const bool enable_high_pass_filter = GetPropertyFromConstraints(
335 &native_constraints, MediaConstraintsInterface::kHighpassFilter);
337 // Return immediately if no audio processing component is enabled.
338 if (!enable_aec && !enable_experimental_aec && !enable_ns &&
339 !enable_high_pass_filter && !enable_typing_detection && !enable_agc &&
340 !enable_experimental_ns) {
341 RecordProcessingState(AUDIO_PROCESSING_DISABLED);
345 // Create and configure the webrtc::AudioProcessing.
346 audio_processing_.reset(webrtc::AudioProcessing::Create(0));
348 // Enable the audio processing components.
350 EnableEchoCancellation(audio_processing_.get());
351 if (enable_experimental_aec)
352 EnableExperimentalEchoCancellation(audio_processing_.get());
354 if (playout_data_source_)
355 playout_data_source_->AddPlayoutSink(this);
359 EnableNoiseSuppression(audio_processing_.get());
361 if (enable_experimental_ns)
362 EnableExperimentalNoiseSuppression(audio_processing_.get());
364 if (enable_high_pass_filter)
365 EnableHighPassFilter(audio_processing_.get());
367 if (enable_typing_detection) {
368 // TODO(xians): Remove this |typing_detector_| after the typing suppression
369 // is enabled by default.
370 typing_detector_.reset(new webrtc::TypingDetection());
371 EnableTypingDetection(audio_processing_.get(), typing_detector_.get());
375 EnableAutomaticGainControl(audio_processing_.get());
377 // Configure the audio format the audio processing is running on. This
378 // has to be done after all the needed components are enabled.
380 audio_processing_->set_sample_rate_hz(kAudioProcessingSampleRate));
381 CHECK_EQ(0, audio_processing_->set_num_channels(
382 kAudioProcessingNumberOfChannels, kAudioProcessingNumberOfChannels));
384 RecordProcessingState(AUDIO_PROCESSING_ENABLED);
387 void MediaStreamAudioProcessor::InitializeCaptureConverter(
388 const media::AudioParameters& source_params) {
389 DCHECK(main_thread_checker_.CalledOnValidThread());
390 DCHECK(source_params.IsValid());
392 // Create and initialize audio converter for the source data.
393 // When the webrtc AudioProcessing is enabled, the sink format of the
394 // converter will be the same as the post-processed data format, which is
395 // 32k mono for desktops and 16k mono for Android. When the AudioProcessing
396 // is disabled, the sink format will be the same as the source format.
397 const int sink_sample_rate = audio_processing_ ?
398 kAudioProcessingSampleRate : source_params.sample_rate();
399 const media::ChannelLayout sink_channel_layout = audio_processing_ ?
400 media::GuessChannelLayout(kAudioProcessingNumberOfChannels) :
401 source_params.channel_layout();
403 // WebRtc AudioProcessing requires 10ms as its packet size. We use this
404 // native size when processing is enabled. While processing is disabled, and
405 // the source is running with a buffer size smaller than 10ms buffer, we use
406 // same buffer size as the incoming format to avoid extra FIFO for WebAudio.
407 int sink_buffer_size = sink_sample_rate / 100;
408 if (!audio_processing_ &&
409 source_params.frames_per_buffer() < sink_buffer_size) {
410 sink_buffer_size = source_params.frames_per_buffer();
413 media::AudioParameters sink_params(
414 media::AudioParameters::AUDIO_PCM_LOW_LATENCY, sink_channel_layout,
415 sink_sample_rate, 16, sink_buffer_size);
416 capture_converter_.reset(
417 new MediaStreamAudioConverter(source_params, sink_params));
420 void MediaStreamAudioProcessor::InitializeRenderConverterIfNeeded(
421 int sample_rate, int number_of_channels, int frames_per_buffer) {
422 DCHECK(render_thread_checker_.CalledOnValidThread());
423 // TODO(xians): Figure out if we need to handle the buffer size change.
424 if (render_converter_.get() &&
425 render_converter_->source_parameters().sample_rate() == sample_rate &&
426 render_converter_->source_parameters().channels() == number_of_channels) {
427 // Do nothing if the |render_converter_| has been setup properly.
431 // Create and initialize audio converter for the render data.
432 // webrtc::AudioProcessing accepts the same format as what it uses to process
433 // capture data, which is 32k mono for desktops and 16k mono for Android.
434 media::AudioParameters source_params(
435 media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
436 media::GuessChannelLayout(number_of_channels), sample_rate, 16,
438 media::AudioParameters sink_params(
439 media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
440 media::CHANNEL_LAYOUT_MONO, kAudioProcessingSampleRate, 16,
441 kAudioProcessingSampleRate / 100);
442 render_converter_.reset(
443 new MediaStreamAudioConverter(source_params, sink_params));
444 render_data_bus_ = media::AudioBus::Create(number_of_channels,
448 int MediaStreamAudioProcessor::ProcessData(webrtc::AudioFrame* audio_frame,
449 base::TimeDelta capture_delay,
452 DCHECK(capture_thread_checker_.CalledOnValidThread());
453 if (!audio_processing_)
456 TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessData");
457 DCHECK_EQ(audio_processing_->sample_rate_hz(),
458 capture_converter_->sink_parameters().sample_rate());
459 DCHECK_EQ(audio_processing_->num_input_channels(),
460 capture_converter_->sink_parameters().channels());
461 DCHECK_EQ(audio_processing_->num_output_channels(),
462 capture_converter_->sink_parameters().channels());
464 base::subtle::Atomic32 render_delay_ms =
465 base::subtle::Acquire_Load(&render_delay_ms_);
466 int64 capture_delay_ms = capture_delay.InMilliseconds();
467 DCHECK_LT(capture_delay_ms,
468 std::numeric_limits<base::subtle::Atomic32>::max());
469 int total_delay_ms = capture_delay_ms + render_delay_ms;
470 if (total_delay_ms > 300) {
471 LOG(WARNING) << "Large audio delay, capture delay: " << capture_delay_ms
472 << "ms; render delay: " << render_delay_ms << "ms";
475 audio_processing_->set_stream_delay_ms(total_delay_ms);
477 webrtc::GainControl* agc = audio_processing_->gain_control();
478 int err = agc->set_stream_analog_level(volume);
479 DCHECK_EQ(err, 0) << "set_stream_analog_level() error: " << err;
481 audio_processing_->set_stream_key_pressed(key_pressed);
483 err = audio_processing_->ProcessStream(audio_frame);
484 DCHECK_EQ(err, 0) << "ProcessStream() error: " << err;
486 if (typing_detector_ &&
487 audio_frame->vad_activity_ != webrtc::AudioFrame::kVadUnknown) {
489 (audio_frame->vad_activity_ == webrtc::AudioFrame::kVadActive);
490 bool typing_detected = typing_detector_->Process(key_pressed, vad_active);
491 base::subtle::Release_Store(&typing_detected_, typing_detected);
494 // Return 0 if the volume has not been changed, otherwise return the new
496 return (agc->stream_analog_level() == volume) ?
497 0 : agc->stream_analog_level();
500 void MediaStreamAudioProcessor::StopAudioProcessing() {
501 if (!audio_processing_.get())
506 if (playout_data_source_)
507 playout_data_source_->RemovePlayoutSink(this);
509 audio_processing_.reset();
512 bool MediaStreamAudioProcessor::IsAudioTrackProcessingEnabled() const {
513 const std::string group_name =
514 base::FieldTrialList::FindFullName("MediaStreamAudioTrackProcessing");
515 return group_name == "Enabled" || CommandLine::ForCurrentProcess()->HasSwitch(
516 switches::kEnableAudioTrackProcessing);
519 } // namespace content