c83ffff7eb408417dd9a641fc3c93f92a032119f
[platform/framework/web/crosswalk.git] / src / content / renderer / media / media_stream_audio_processor.cc
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/renderer/media/media_stream_audio_processor.h"
6
7 #include "base/command_line.h"
8 #include "base/debug/trace_event.h"
9 #include "base/metrics/field_trial.h"
10 #include "base/metrics/histogram.h"
11 #include "content/public/common/content_switches.h"
12 #include "content/renderer/media/media_stream_audio_processor_options.h"
13 #include "content/renderer/media/rtc_media_constraints.h"
14 #include "media/audio/audio_parameters.h"
15 #include "media/base/audio_converter.h"
16 #include "media/base/audio_fifo.h"
17 #include "media/base/channel_layout.h"
18 #include "third_party/WebKit/public/platform/WebMediaConstraints.h"
19 #include "third_party/libjingle/source/talk/app/webrtc/mediaconstraintsinterface.h"
20 #include "third_party/webrtc/modules/audio_processing/typing_detection.h"
21
22 namespace content {
23
24 namespace {
25
26 using webrtc::AudioProcessing;
27 using webrtc::MediaConstraintsInterface;
28
29 #if defined(OS_ANDROID)
30 const int kAudioProcessingSampleRate = 16000;
31 #else
32 const int kAudioProcessingSampleRate = 32000;
33 #endif
34 const int kAudioProcessingNumberOfChannels = 1;
35
36 const int kMaxNumberOfBuffersInFifo = 2;
37
38 // Used by UMA histograms and entries shouldn't be re-ordered or removed.
39 enum AudioTrackProcessingStates {
40   AUDIO_PROCESSING_ENABLED = 0,
41   AUDIO_PROCESSING_DISABLED,
42   AUDIO_PROCESSING_IN_WEBRTC,
43   AUDIO_PROCESSING_MAX
44 };
45
46 void RecordProcessingState(AudioTrackProcessingStates state) {
47   UMA_HISTOGRAM_ENUMERATION("Media.AudioTrackProcessingStates",
48                             state, AUDIO_PROCESSING_MAX);
49 }
50
51 }  // namespace
52
53 class MediaStreamAudioProcessor::MediaStreamAudioConverter
54     : public media::AudioConverter::InputCallback {
55  public:
56   MediaStreamAudioConverter(const media::AudioParameters& source_params,
57                             const media::AudioParameters& sink_params)
58      : source_params_(source_params),
59        sink_params_(sink_params),
60        audio_converter_(source_params, sink_params_, false) {
61     // An instance of MediaStreamAudioConverter may be created in the main
62     // render thread and used in the audio thread, for example, the
63     // |MediaStreamAudioProcessor::capture_converter_|.
64     thread_checker_.DetachFromThread();
65     audio_converter_.AddInput(this);
66     // Create and initialize audio fifo and audio bus wrapper.
67     // The size of the FIFO should be at least twice of the source buffer size
68     // or twice of the sink buffer size.
69     int buffer_size = std::max(
70         kMaxNumberOfBuffersInFifo * source_params_.frames_per_buffer(),
71         kMaxNumberOfBuffersInFifo * sink_params_.frames_per_buffer());
72     fifo_.reset(new media::AudioFifo(source_params_.channels(), buffer_size));
73     // TODO(xians): Use CreateWrapper to save one memcpy.
74     audio_wrapper_ = media::AudioBus::Create(sink_params_.channels(),
75                                              sink_params_.frames_per_buffer());
76   }
77
78   virtual ~MediaStreamAudioConverter() {
79     audio_converter_.RemoveInput(this);
80   }
81
82   void Push(media::AudioBus* audio_source) {
83     // Called on the audio thread, which is the capture audio thread for
84     // |MediaStreamAudioProcessor::capture_converter_|, and render audio thread
85     // for |MediaStreamAudioProcessor::render_converter_|.
86     // And it must be the same thread as calling Convert().
87     DCHECK(thread_checker_.CalledOnValidThread());
88     fifo_->Push(audio_source);
89   }
90
91   bool Convert(webrtc::AudioFrame* out) {
92     // Called on the audio thread, which is the capture audio thread for
93     // |MediaStreamAudioProcessor::capture_converter_|, and render audio thread
94     // for |MediaStreamAudioProcessor::render_converter_|.
95     DCHECK(thread_checker_.CalledOnValidThread());
96     // Return false if there is not enough data in the FIFO, this happens when
97     // fifo_->frames() / source_params_.sample_rate() is less than
98     // sink_params.frames_per_buffer() / sink_params.sample_rate().
99     if (fifo_->frames() * sink_params_.sample_rate() <
100         sink_params_.frames_per_buffer() * source_params_.sample_rate()) {
101       return false;
102     }
103
104     // Convert data to the output format, this will trigger ProvideInput().
105     audio_converter_.Convert(audio_wrapper_.get());
106
107     // TODO(xians): Figure out a better way to handle the interleaved and
108     // deinterleaved format switching.
109     DCHECK_EQ(audio_wrapper_->frames(), sink_params_.frames_per_buffer());
110     audio_wrapper_->ToInterleaved(audio_wrapper_->frames(),
111                                   sink_params_.bits_per_sample() / 8,
112                                   out->data_);
113
114     out->samples_per_channel_ = sink_params_.frames_per_buffer();
115     out->sample_rate_hz_ = sink_params_.sample_rate();
116     out->speech_type_ = webrtc::AudioFrame::kNormalSpeech;
117     out->vad_activity_ = webrtc::AudioFrame::kVadUnknown;
118     out->num_channels_ = sink_params_.channels();
119
120     return true;
121   }
122
123   const media::AudioParameters& source_parameters() const {
124     return source_params_;
125   }
126   const media::AudioParameters& sink_parameters() const {
127     return sink_params_;
128   }
129
130  private:
131   // AudioConverter::InputCallback implementation.
132   virtual double ProvideInput(media::AudioBus* audio_bus,
133                               base::TimeDelta buffer_delay) OVERRIDE {
134     // Called on realtime audio thread.
135     // TODO(xians): Figure out why the first Convert() triggers ProvideInput
136     // two times.
137     if (fifo_->frames() < audio_bus->frames())
138       return 0;
139
140     fifo_->Consume(audio_bus, 0, audio_bus->frames());
141
142     // Return 1.0 to indicate no volume scaling on the data.
143     return 1.0;
144   }
145
146   base::ThreadChecker thread_checker_;
147   const media::AudioParameters source_params_;
148   const media::AudioParameters sink_params_;
149
150   // TODO(xians): consider using SincResampler to save some memcpy.
151   // Handles mixing and resampling between input and output parameters.
152   media::AudioConverter audio_converter_;
153   scoped_ptr<media::AudioBus> audio_wrapper_;
154   scoped_ptr<media::AudioFifo> fifo_;
155 };
156
157 MediaStreamAudioProcessor::MediaStreamAudioProcessor(
158     const blink::WebMediaConstraints& constraints,
159     int effects,
160     MediaStreamType type,
161     WebRtcPlayoutDataSource* playout_data_source)
162     : render_delay_ms_(0),
163       playout_data_source_(playout_data_source),
164       audio_mirroring_(false),
165       typing_detected_(false) {
166   capture_thread_checker_.DetachFromThread();
167   render_thread_checker_.DetachFromThread();
168   InitializeAudioProcessingModule(constraints, effects, type);
169 }
170
171 MediaStreamAudioProcessor::~MediaStreamAudioProcessor() {
172   DCHECK(main_thread_checker_.CalledOnValidThread());
173   StopAudioProcessing();
174 }
175
176 void MediaStreamAudioProcessor::OnCaptureFormatChanged(
177     const media::AudioParameters& source_params) {
178   DCHECK(main_thread_checker_.CalledOnValidThread());
179   // There is no need to hold a lock here since the caller guarantees that
180   // there is no more PushCaptureData() and ProcessAndConsumeData() callbacks
181   // on the capture thread.
182   InitializeCaptureConverter(source_params);
183
184   // Reset the |capture_thread_checker_| since the capture data will come from
185   // a new capture thread.
186   capture_thread_checker_.DetachFromThread();
187 }
188
189 void MediaStreamAudioProcessor::PushCaptureData(media::AudioBus* audio_source) {
190   DCHECK(capture_thread_checker_.CalledOnValidThread());
191   DCHECK_EQ(audio_source->channels(),
192             capture_converter_->source_parameters().channels());
193   DCHECK_EQ(audio_source->frames(),
194             capture_converter_->source_parameters().frames_per_buffer());
195
196   if (audio_mirroring_ &&
197       capture_converter_->source_parameters().channel_layout() ==
198           media::CHANNEL_LAYOUT_STEREO) {
199     // Swap the first and second channels.
200     audio_source->SwapChannels(0, 1);
201   }
202
203   capture_converter_->Push(audio_source);
204 }
205
206 bool MediaStreamAudioProcessor::ProcessAndConsumeData(
207     base::TimeDelta capture_delay, int volume, bool key_pressed,
208     int* new_volume, int16** out) {
209   DCHECK(capture_thread_checker_.CalledOnValidThread());
210   TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessAndConsumeData");
211
212   if (!capture_converter_->Convert(&capture_frame_))
213     return false;
214
215   *new_volume = ProcessData(&capture_frame_, capture_delay, volume,
216                             key_pressed);
217   *out = capture_frame_.data_;
218
219   return true;
220 }
221
222 const media::AudioParameters& MediaStreamAudioProcessor::InputFormat() const {
223   return capture_converter_->source_parameters();
224 }
225
226 const media::AudioParameters& MediaStreamAudioProcessor::OutputFormat() const {
227   return capture_converter_->sink_parameters();
228 }
229
230 void MediaStreamAudioProcessor::StartAecDump(
231     const base::PlatformFile& aec_dump_file) {
232   if (audio_processing_)
233     StartEchoCancellationDump(audio_processing_.get(), aec_dump_file);
234 }
235
236 void MediaStreamAudioProcessor::StopAecDump() {
237   if (audio_processing_)
238     StopEchoCancellationDump(audio_processing_.get());
239 }
240
241 void MediaStreamAudioProcessor::OnPlayoutData(media::AudioBus* audio_bus,
242                                               int sample_rate,
243                                               int audio_delay_milliseconds) {
244   DCHECK(render_thread_checker_.CalledOnValidThread());
245 #if defined(OS_ANDROID) || defined(OS_IOS)
246   DCHECK(audio_processing_->echo_control_mobile()->is_enabled());
247 #else
248   DCHECK(audio_processing_->echo_cancellation()->is_enabled());
249 #endif
250
251   TRACE_EVENT0("audio", "MediaStreamAudioProcessor::OnPlayoutData");
252   DCHECK_LT(audio_delay_milliseconds,
253             std::numeric_limits<base::subtle::Atomic32>::max());
254   base::subtle::Release_Store(&render_delay_ms_, audio_delay_milliseconds);
255
256   InitializeRenderConverterIfNeeded(sample_rate, audio_bus->channels(),
257                                     audio_bus->frames());
258
259   render_converter_->Push(audio_bus);
260   while (render_converter_->Convert(&render_frame_))
261     audio_processing_->AnalyzeReverseStream(&render_frame_);
262 }
263
264 void MediaStreamAudioProcessor::OnPlayoutDataSourceChanged() {
265   DCHECK(main_thread_checker_.CalledOnValidThread());
266   // There is no need to hold a lock here since the caller guarantees that
267   // there is no more OnPlayoutData() callback on the render thread.
268   render_thread_checker_.DetachFromThread();
269   render_converter_.reset();
270 }
271
272 void MediaStreamAudioProcessor::GetStats(AudioProcessorStats* stats) {
273   stats->typing_noise_detected =
274       (base::subtle::Acquire_Load(&typing_detected_) != false);
275   GetAecStats(audio_processing_.get(), stats);
276 }
277
278 void MediaStreamAudioProcessor::InitializeAudioProcessingModule(
279     const blink::WebMediaConstraints& constraints, int effects,
280     MediaStreamType type) {
281   DCHECK(!audio_processing_);
282
283   RTCMediaConstraints native_constraints(constraints);
284
285   // Audio mirroring can be enabled even though audio processing is otherwise
286   // disabled.
287   audio_mirroring_ = GetPropertyFromConstraints(
288       &native_constraints, webrtc::MediaConstraintsInterface::kAudioMirroring);
289
290   if (!IsAudioTrackProcessingEnabled()) {
291     RecordProcessingState(AUDIO_PROCESSING_IN_WEBRTC);
292     return;
293   }
294
295   // Only apply the fixed constraints for gUM of MEDIA_DEVICE_AUDIO_CAPTURE.
296   DCHECK(IsAudioMediaType(type));
297   if (type == MEDIA_DEVICE_AUDIO_CAPTURE)
298     ApplyFixedAudioConstraints(&native_constraints);
299
300   if (effects & media::AudioParameters::ECHO_CANCELLER) {
301     // If platform echo canceller is enabled, disable the software AEC.
302     native_constraints.AddMandatory(
303         MediaConstraintsInterface::kEchoCancellation,
304         MediaConstraintsInterface::kValueFalse, true);
305   }
306
307 #if defined(OS_IOS)
308   // On iOS, VPIO provides built-in AEC and AGC.
309   const bool enable_aec = false;
310   const bool enable_agc = false;
311 #else
312   const bool enable_aec = GetPropertyFromConstraints(
313       &native_constraints, MediaConstraintsInterface::kEchoCancellation);
314   const bool enable_agc = GetPropertyFromConstraints(
315       &native_constraints, webrtc::MediaConstraintsInterface::kAutoGainControl);
316 #endif
317
318 #if defined(OS_IOS) || defined(OS_ANDROID)
319   const bool enable_experimental_aec = false;
320   const bool enable_typing_detection = false;
321 #else
322   const bool enable_experimental_aec = GetPropertyFromConstraints(
323       &native_constraints,
324       MediaConstraintsInterface::kExperimentalEchoCancellation);
325   const bool enable_typing_detection = GetPropertyFromConstraints(
326       &native_constraints, MediaConstraintsInterface::kTypingNoiseDetection);
327 #endif
328
329   const bool enable_ns = GetPropertyFromConstraints(
330       &native_constraints, MediaConstraintsInterface::kNoiseSuppression);
331   const bool enable_experimental_ns = GetPropertyFromConstraints(
332         &native_constraints,
333         MediaConstraintsInterface::kExperimentalNoiseSuppression);
334   const bool enable_high_pass_filter = GetPropertyFromConstraints(
335       &native_constraints, MediaConstraintsInterface::kHighpassFilter);
336
337   // Return immediately if no audio processing component is enabled.
338   if (!enable_aec && !enable_experimental_aec && !enable_ns &&
339       !enable_high_pass_filter && !enable_typing_detection && !enable_agc &&
340       !enable_experimental_ns) {
341     RecordProcessingState(AUDIO_PROCESSING_DISABLED);
342     return;
343   }
344
345   // Create and configure the webrtc::AudioProcessing.
346   audio_processing_.reset(webrtc::AudioProcessing::Create(0));
347
348   // Enable the audio processing components.
349   if (enable_aec) {
350     EnableEchoCancellation(audio_processing_.get());
351     if (enable_experimental_aec)
352       EnableExperimentalEchoCancellation(audio_processing_.get());
353
354     if (playout_data_source_)
355       playout_data_source_->AddPlayoutSink(this);
356   }
357
358   if (enable_ns)
359     EnableNoiseSuppression(audio_processing_.get());
360
361   if (enable_experimental_ns)
362     EnableExperimentalNoiseSuppression(audio_processing_.get());
363
364   if (enable_high_pass_filter)
365     EnableHighPassFilter(audio_processing_.get());
366
367   if (enable_typing_detection) {
368     // TODO(xians): Remove this |typing_detector_| after the typing suppression
369     // is enabled by default.
370     typing_detector_.reset(new webrtc::TypingDetection());
371     EnableTypingDetection(audio_processing_.get(), typing_detector_.get());
372   }
373
374   if (enable_agc)
375     EnableAutomaticGainControl(audio_processing_.get());
376
377   // Configure the audio format the audio processing is running on. This
378   // has to be done after all the needed components are enabled.
379   CHECK_EQ(0,
380            audio_processing_->set_sample_rate_hz(kAudioProcessingSampleRate));
381   CHECK_EQ(0, audio_processing_->set_num_channels(
382       kAudioProcessingNumberOfChannels, kAudioProcessingNumberOfChannels));
383
384   RecordProcessingState(AUDIO_PROCESSING_ENABLED);
385 }
386
387 void MediaStreamAudioProcessor::InitializeCaptureConverter(
388     const media::AudioParameters& source_params) {
389   DCHECK(main_thread_checker_.CalledOnValidThread());
390   DCHECK(source_params.IsValid());
391
392   // Create and initialize audio converter for the source data.
393   // When the webrtc AudioProcessing is enabled, the sink format of the
394   // converter will be the same as the post-processed data format, which is
395   // 32k mono for desktops and 16k mono for Android. When the AudioProcessing
396   // is disabled, the sink format will be the same as the source format.
397   const int sink_sample_rate = audio_processing_ ?
398       kAudioProcessingSampleRate : source_params.sample_rate();
399   const media::ChannelLayout sink_channel_layout = audio_processing_ ?
400       media::GuessChannelLayout(kAudioProcessingNumberOfChannels) :
401       source_params.channel_layout();
402
403   // WebRtc AudioProcessing requires 10ms as its packet size. We use this
404   // native size when processing is enabled. While processing is disabled, and
405   // the source is running with a buffer size smaller than 10ms buffer, we use
406   // same buffer size as the incoming format to avoid extra FIFO for WebAudio.
407   int sink_buffer_size =  sink_sample_rate / 100;
408   if (!audio_processing_ &&
409       source_params.frames_per_buffer() < sink_buffer_size) {
410     sink_buffer_size = source_params.frames_per_buffer();
411   }
412
413   media::AudioParameters sink_params(
414       media::AudioParameters::AUDIO_PCM_LOW_LATENCY, sink_channel_layout,
415       sink_sample_rate, 16, sink_buffer_size);
416   capture_converter_.reset(
417       new MediaStreamAudioConverter(source_params, sink_params));
418 }
419
420 void MediaStreamAudioProcessor::InitializeRenderConverterIfNeeded(
421     int sample_rate, int number_of_channels, int frames_per_buffer) {
422   DCHECK(render_thread_checker_.CalledOnValidThread());
423   // TODO(xians): Figure out if we need to handle the buffer size change.
424   if (render_converter_.get() &&
425       render_converter_->source_parameters().sample_rate() == sample_rate &&
426       render_converter_->source_parameters().channels() == number_of_channels) {
427     // Do nothing if the |render_converter_| has been setup properly.
428     return;
429   }
430
431   // Create and initialize audio converter for the render data.
432   // webrtc::AudioProcessing accepts the same format as what it uses to process
433   // capture data, which is 32k mono for desktops and 16k mono for Android.
434   media::AudioParameters source_params(
435       media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
436       media::GuessChannelLayout(number_of_channels), sample_rate, 16,
437       frames_per_buffer);
438   media::AudioParameters sink_params(
439       media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
440       media::CHANNEL_LAYOUT_MONO, kAudioProcessingSampleRate, 16,
441       kAudioProcessingSampleRate / 100);
442   render_converter_.reset(
443       new MediaStreamAudioConverter(source_params, sink_params));
444   render_data_bus_ = media::AudioBus::Create(number_of_channels,
445                                              frames_per_buffer);
446 }
447
448 int MediaStreamAudioProcessor::ProcessData(webrtc::AudioFrame* audio_frame,
449                                            base::TimeDelta capture_delay,
450                                            int volume,
451                                            bool key_pressed) {
452   DCHECK(capture_thread_checker_.CalledOnValidThread());
453   if (!audio_processing_)
454     return 0;
455
456   TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessData");
457   DCHECK_EQ(audio_processing_->sample_rate_hz(),
458             capture_converter_->sink_parameters().sample_rate());
459   DCHECK_EQ(audio_processing_->num_input_channels(),
460             capture_converter_->sink_parameters().channels());
461   DCHECK_EQ(audio_processing_->num_output_channels(),
462             capture_converter_->sink_parameters().channels());
463
464   base::subtle::Atomic32 render_delay_ms =
465       base::subtle::Acquire_Load(&render_delay_ms_);
466   int64 capture_delay_ms = capture_delay.InMilliseconds();
467   DCHECK_LT(capture_delay_ms,
468             std::numeric_limits<base::subtle::Atomic32>::max());
469   int total_delay_ms =  capture_delay_ms + render_delay_ms;
470   if (total_delay_ms > 300) {
471     LOG(WARNING) << "Large audio delay, capture delay: " << capture_delay_ms
472                  << "ms; render delay: " << render_delay_ms << "ms";
473   }
474
475   audio_processing_->set_stream_delay_ms(total_delay_ms);
476
477   webrtc::GainControl* agc = audio_processing_->gain_control();
478   int err = agc->set_stream_analog_level(volume);
479   DCHECK_EQ(err, 0) << "set_stream_analog_level() error: " << err;
480
481   audio_processing_->set_stream_key_pressed(key_pressed);
482
483   err = audio_processing_->ProcessStream(audio_frame);
484   DCHECK_EQ(err, 0) << "ProcessStream() error: " << err;
485
486   if (typing_detector_ &&
487       audio_frame->vad_activity_ != webrtc::AudioFrame::kVadUnknown) {
488     bool vad_active =
489         (audio_frame->vad_activity_ == webrtc::AudioFrame::kVadActive);
490     bool typing_detected = typing_detector_->Process(key_pressed, vad_active);
491     base::subtle::Release_Store(&typing_detected_, typing_detected);
492   }
493
494   // Return 0 if the volume has not been changed, otherwise return the new
495   // volume.
496   return (agc->stream_analog_level() == volume) ?
497       0 : agc->stream_analog_level();
498 }
499
500 void MediaStreamAudioProcessor::StopAudioProcessing() {
501   if (!audio_processing_.get())
502     return;
503
504   StopAecDump();
505
506   if (playout_data_source_)
507     playout_data_source_->RemovePlayoutSink(this);
508
509   audio_processing_.reset();
510 }
511
512 bool MediaStreamAudioProcessor::IsAudioTrackProcessingEnabled() const {
513   const std::string group_name =
514       base::FieldTrialList::FindFullName("MediaStreamAudioTrackProcessing");
515   return group_name == "Enabled" || CommandLine::ForCurrentProcess()->HasSwitch(
516       switches::kEnableAudioTrackProcessing);
517 }
518
519 }  // namespace content