Upstream version 10.39.225.0
[platform/framework/web/crosswalk.git] / src / content / renderer / media / media_stream_audio_processor.cc
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/renderer/media/media_stream_audio_processor.h"
6
7 #include "base/command_line.h"
8 #include "base/debug/trace_event.h"
9 #if defined(OS_MACOSX)
10 #include "base/metrics/field_trial.h"
11 #endif
12 #include "base/metrics/histogram.h"
13 #include "content/public/common/content_switches.h"
14 #include "content/renderer/media/media_stream_audio_processor_options.h"
15 #include "content/renderer/media/rtc_media_constraints.h"
16 #include "content/renderer/media/webrtc_audio_device_impl.h"
17 #include "media/audio/audio_parameters.h"
18 #include "media/base/audio_converter.h"
19 #include "media/base/audio_fifo.h"
20 #include "media/base/channel_layout.h"
21 #include "third_party/WebKit/public/platform/WebMediaConstraints.h"
22 #include "third_party/libjingle/source/talk/app/webrtc/mediaconstraintsinterface.h"
23 #include "third_party/webrtc/modules/audio_processing/typing_detection.h"
24
25 namespace content {
26
27 namespace {
28
29 using webrtc::AudioProcessing;
30
31 #if defined(OS_ANDROID)
32 const int kAudioProcessingSampleRate = 16000;
33 #else
34 const int kAudioProcessingSampleRate = 32000;
35 #endif
36 const int kAudioProcessingNumberOfChannels = 1;
37
38 AudioProcessing::ChannelLayout MapLayout(media::ChannelLayout media_layout) {
39   switch (media_layout) {
40     case media::CHANNEL_LAYOUT_MONO:
41       return AudioProcessing::kMono;
42     case media::CHANNEL_LAYOUT_STEREO:
43       return AudioProcessing::kStereo;
44     case media::CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC:
45       return AudioProcessing::kStereoAndKeyboard;
46     default:
47       NOTREACHED() << "Layout not supported: " << media_layout;
48       return AudioProcessing::kMono;
49   }
50 }
51
52 AudioProcessing::ChannelLayout ChannelsToLayout(int num_channels) {
53   switch (num_channels) {
54     case 1:
55       return AudioProcessing::kMono;
56     case 2:
57       return AudioProcessing::kStereo;
58     default:
59       NOTREACHED() << "Channels not supported: " << num_channels;
60       return AudioProcessing::kMono;
61   }
62 }
63
64 // Used by UMA histograms and entries shouldn't be re-ordered or removed.
65 enum AudioTrackProcessingStates {
66   AUDIO_PROCESSING_ENABLED = 0,
67   AUDIO_PROCESSING_DISABLED,
68   AUDIO_PROCESSING_IN_WEBRTC,
69   AUDIO_PROCESSING_MAX
70 };
71
72 void RecordProcessingState(AudioTrackProcessingStates state) {
73   UMA_HISTOGRAM_ENUMERATION("Media.AudioTrackProcessingStates",
74                             state, AUDIO_PROCESSING_MAX);
75 }
76
77 }  // namespace
78
79 // Wraps AudioBus to provide access to the array of channel pointers, since this
80 // is the type webrtc::AudioProcessing deals in. The array is refreshed on every
81 // channel_ptrs() call, and will be valid until the underlying AudioBus pointers
82 // are changed, e.g. through calls to SetChannelData() or SwapChannels().
83 //
84 // All methods are called on one of the capture or render audio threads
85 // exclusively.
86 class MediaStreamAudioBus {
87  public:
88   MediaStreamAudioBus(int channels, int frames)
89       : bus_(media::AudioBus::Create(channels, frames)),
90         channel_ptrs_(new float*[channels]) {
91     // May be created in the main render thread and used in the audio threads.
92     thread_checker_.DetachFromThread();
93   }
94
95   media::AudioBus* bus() {
96     DCHECK(thread_checker_.CalledOnValidThread());
97     return bus_.get();
98   }
99
100   float* const* channel_ptrs() {
101     DCHECK(thread_checker_.CalledOnValidThread());
102     for (int i = 0; i < bus_->channels(); ++i) {
103       channel_ptrs_[i] = bus_->channel(i);
104     }
105     return channel_ptrs_.get();
106   }
107
108  private:
109   base::ThreadChecker thread_checker_;
110   scoped_ptr<media::AudioBus> bus_;
111   scoped_ptr<float*[]> channel_ptrs_;
112 };
113
114 // Wraps AudioFifo to provide a cleaner interface to MediaStreamAudioProcessor.
115 // It avoids the FIFO when the source and destination frames match. All methods
116 // are called on one of the capture or render audio threads exclusively.
117 class MediaStreamAudioFifo {
118  public:
119   MediaStreamAudioFifo(int channels, int source_frames,
120                        int destination_frames)
121      : source_frames_(source_frames),
122        destination_(new MediaStreamAudioBus(channels, destination_frames)),
123        data_available_(false) {
124     if (source_frames != destination_frames) {
125       // Since we require every Push to be followed by as many Consumes as
126       // possible, twice the larger of the two is a (probably) loose upper bound
127       // on the FIFO size.
128       const int fifo_frames = 2 * std::max(source_frames, destination_frames);
129       fifo_.reset(new media::AudioFifo(channels, fifo_frames));
130     }
131
132     // May be created in the main render thread and used in the audio threads.
133     thread_checker_.DetachFromThread();
134   }
135
136   void Push(const media::AudioBus* source) {
137     DCHECK(thread_checker_.CalledOnValidThread());
138     DCHECK_EQ(source->channels(), destination_->bus()->channels());
139     DCHECK_EQ(source->frames(), source_frames_);
140
141     if (fifo_) {
142       fifo_->Push(source);
143     } else {
144       source->CopyTo(destination_->bus());
145       data_available_ = true;
146     }
147   }
148
149   // Returns true if there are destination_frames() of data available to be
150   // consumed, and otherwise false.
151   bool Consume(MediaStreamAudioBus** destination) {
152     DCHECK(thread_checker_.CalledOnValidThread());
153
154     if (fifo_) {
155       if (fifo_->frames() < destination_->bus()->frames())
156         return false;
157
158       fifo_->Consume(destination_->bus(), 0, destination_->bus()->frames());
159     } else {
160       if (!data_available_)
161         return false;
162
163       // The data was already copied to |destination_| in this case.
164       data_available_ = false;
165     }
166
167     *destination = destination_.get();
168     return true;
169   }
170
171  private:
172   base::ThreadChecker thread_checker_;
173   const int source_frames_;  // For a DCHECK.
174   scoped_ptr<MediaStreamAudioBus> destination_;
175   scoped_ptr<media::AudioFifo> fifo_;
176   // Only used when the FIFO is disabled;
177   bool data_available_;
178 };
179
180 bool MediaStreamAudioProcessor::IsAudioTrackProcessingEnabled() {
181   return !CommandLine::ForCurrentProcess()->HasSwitch(
182       switches::kDisableAudioTrackProcessing);
183 }
184
185 MediaStreamAudioProcessor::MediaStreamAudioProcessor(
186     const blink::WebMediaConstraints& constraints,
187     int effects,
188     WebRtcPlayoutDataSource* playout_data_source)
189     : render_delay_ms_(0),
190       playout_data_source_(playout_data_source),
191       audio_mirroring_(false),
192       typing_detected_(false),
193       stopped_(false) {
194   capture_thread_checker_.DetachFromThread();
195   render_thread_checker_.DetachFromThread();
196   InitializeAudioProcessingModule(constraints, effects);
197   if (IsAudioTrackProcessingEnabled()) {
198     aec_dump_message_filter_ = AecDumpMessageFilter::Get();
199     // In unit tests not creating a message filter, |aec_dump_message_filter_|
200     // will be NULL. We can just ignore that. Other unit tests and browser tests
201     // ensure that we do get the filter when we should.
202     if (aec_dump_message_filter_.get())
203       aec_dump_message_filter_->AddDelegate(this);
204   }
205 }
206
207 MediaStreamAudioProcessor::~MediaStreamAudioProcessor() {
208   DCHECK(main_thread_checker_.CalledOnValidThread());
209   Stop();
210 }
211
212 void MediaStreamAudioProcessor::OnCaptureFormatChanged(
213     const media::AudioParameters& input_format) {
214   DCHECK(main_thread_checker_.CalledOnValidThread());
215   // There is no need to hold a lock here since the caller guarantees that
216   // there is no more PushCaptureData() and ProcessAndConsumeData() callbacks
217   // on the capture thread.
218   InitializeCaptureFifo(input_format);
219
220   // Reset the |capture_thread_checker_| since the capture data will come from
221   // a new capture thread.
222   capture_thread_checker_.DetachFromThread();
223 }
224
225 void MediaStreamAudioProcessor::PushCaptureData(
226     const media::AudioBus* audio_source) {
227   DCHECK(capture_thread_checker_.CalledOnValidThread());
228
229   capture_fifo_->Push(audio_source);
230 }
231
232 bool MediaStreamAudioProcessor::ProcessAndConsumeData(
233     base::TimeDelta capture_delay, int volume, bool key_pressed,
234     int* new_volume, int16** out) {
235   DCHECK(capture_thread_checker_.CalledOnValidThread());
236   TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessAndConsumeData");
237
238   MediaStreamAudioBus* process_bus;
239   if (!capture_fifo_->Consume(&process_bus))
240     return false;
241
242   // Use the process bus directly if audio processing is disabled.
243   MediaStreamAudioBus* output_bus = process_bus;
244   *new_volume = 0;
245   if (audio_processing_) {
246     output_bus = output_bus_.get();
247     *new_volume = ProcessData(process_bus->channel_ptrs(),
248                               process_bus->bus()->frames(), capture_delay,
249                               volume, key_pressed, output_bus->channel_ptrs());
250   }
251
252   // Swap channels before interleaving the data.
253   if (audio_mirroring_ &&
254       output_format_.channel_layout() == media::CHANNEL_LAYOUT_STEREO) {
255     // Swap the first and second channels.
256     output_bus->bus()->SwapChannels(0, 1);
257   }
258
259   output_bus->bus()->ToInterleaved(output_bus->bus()->frames(),
260                                    sizeof(int16),
261                                    output_data_.get());
262   *out = output_data_.get();
263
264   return true;
265 }
266
267 void MediaStreamAudioProcessor::Stop() {
268   DCHECK(main_thread_checker_.CalledOnValidThread());
269   if (stopped_)
270     return;
271
272   stopped_ = true;
273
274   if (aec_dump_message_filter_.get()) {
275     aec_dump_message_filter_->RemoveDelegate(this);
276     aec_dump_message_filter_ = NULL;
277   }
278
279   if (!audio_processing_.get())
280     return;
281
282   StopEchoCancellationDump(audio_processing_.get());
283
284   if (playout_data_source_) {
285     playout_data_source_->RemovePlayoutSink(this);
286     playout_data_source_ = NULL;
287   }
288 }
289
290 const media::AudioParameters& MediaStreamAudioProcessor::InputFormat() const {
291   return input_format_;
292 }
293
294 const media::AudioParameters& MediaStreamAudioProcessor::OutputFormat() const {
295   return output_format_;
296 }
297
298 void MediaStreamAudioProcessor::OnAecDumpFile(
299     const IPC::PlatformFileForTransit& file_handle) {
300   DCHECK(main_thread_checker_.CalledOnValidThread());
301
302   base::File file = IPC::PlatformFileForTransitToFile(file_handle);
303   DCHECK(file.IsValid());
304
305   if (audio_processing_)
306     StartEchoCancellationDump(audio_processing_.get(), file.Pass());
307   else
308     file.Close();
309 }
310
311 void MediaStreamAudioProcessor::OnDisableAecDump() {
312   DCHECK(main_thread_checker_.CalledOnValidThread());
313   if (audio_processing_)
314     StopEchoCancellationDump(audio_processing_.get());
315 }
316
317 void MediaStreamAudioProcessor::OnIpcClosing() {
318   DCHECK(main_thread_checker_.CalledOnValidThread());
319   aec_dump_message_filter_ = NULL;
320 }
321
322 void MediaStreamAudioProcessor::OnPlayoutData(media::AudioBus* audio_bus,
323                                               int sample_rate,
324                                               int audio_delay_milliseconds) {
325   DCHECK(render_thread_checker_.CalledOnValidThread());
326   DCHECK(audio_processing_->echo_control_mobile()->is_enabled() ^
327          audio_processing_->echo_cancellation()->is_enabled());
328
329   TRACE_EVENT0("audio", "MediaStreamAudioProcessor::OnPlayoutData");
330   DCHECK_LT(audio_delay_milliseconds,
331             std::numeric_limits<base::subtle::Atomic32>::max());
332   base::subtle::Release_Store(&render_delay_ms_, audio_delay_milliseconds);
333
334   InitializeRenderFifoIfNeeded(sample_rate, audio_bus->channels(),
335                                audio_bus->frames());
336
337   render_fifo_->Push(audio_bus);
338   MediaStreamAudioBus* analysis_bus;
339   while (render_fifo_->Consume(&analysis_bus)) {
340     audio_processing_->AnalyzeReverseStream(
341         analysis_bus->channel_ptrs(),
342         analysis_bus->bus()->frames(),
343         sample_rate,
344         ChannelsToLayout(audio_bus->channels()));
345   }
346 }
347
348 void MediaStreamAudioProcessor::OnPlayoutDataSourceChanged() {
349   DCHECK(main_thread_checker_.CalledOnValidThread());
350   // There is no need to hold a lock here since the caller guarantees that
351   // there is no more OnPlayoutData() callback on the render thread.
352   render_thread_checker_.DetachFromThread();
353   render_fifo_.reset();
354 }
355
356 void MediaStreamAudioProcessor::GetStats(AudioProcessorStats* stats) {
357   stats->typing_noise_detected =
358       (base::subtle::Acquire_Load(&typing_detected_) != false);
359   GetAecStats(audio_processing_.get(), stats);
360 }
361
362 void MediaStreamAudioProcessor::InitializeAudioProcessingModule(
363     const blink::WebMediaConstraints& constraints, int effects) {
364   DCHECK(!audio_processing_);
365
366   MediaAudioConstraints audio_constraints(constraints, effects);
367
368   // Audio mirroring can be enabled even though audio processing is otherwise
369   // disabled.
370   audio_mirroring_ = audio_constraints.GetProperty(
371       MediaAudioConstraints::kGoogAudioMirroring);
372
373   if (!IsAudioTrackProcessingEnabled()) {
374     RecordProcessingState(AUDIO_PROCESSING_IN_WEBRTC);
375     return;
376   }
377
378 #if defined(OS_IOS)
379   // On iOS, VPIO provides built-in AGC and AEC.
380   const bool echo_cancellation = false;
381   const bool goog_agc = false;
382 #else
383   const bool echo_cancellation =
384       audio_constraints.GetEchoCancellationProperty();
385   const bool goog_agc = audio_constraints.GetProperty(
386       MediaAudioConstraints::kGoogAutoGainControl);
387 #endif
388
389 #if defined(OS_IOS) || defined(OS_ANDROID)
390   const bool goog_experimental_aec = false;
391   const bool goog_typing_detection = false;
392 #else
393   const bool goog_experimental_aec = audio_constraints.GetProperty(
394       MediaAudioConstraints::kGoogExperimentalEchoCancellation);
395   const bool goog_typing_detection = audio_constraints.GetProperty(
396       MediaAudioConstraints::kGoogTypingNoiseDetection);
397 #endif
398
399   const bool goog_ns = audio_constraints.GetProperty(
400       MediaAudioConstraints::kGoogNoiseSuppression);
401   const bool goog_experimental_ns = audio_constraints.GetProperty(
402       MediaAudioConstraints::kGoogExperimentalNoiseSuppression);
403  const bool goog_high_pass_filter = audio_constraints.GetProperty(
404      MediaAudioConstraints::kGoogHighpassFilter);
405
406   // Return immediately if no goog constraint is enabled.
407   if (!echo_cancellation && !goog_experimental_aec && !goog_ns &&
408       !goog_high_pass_filter && !goog_typing_detection &&
409       !goog_agc && !goog_experimental_ns) {
410     RecordProcessingState(AUDIO_PROCESSING_DISABLED);
411     return;
412   }
413
414   // Experimental options provided at creation.
415   webrtc::Config config;
416   if (goog_experimental_aec)
417     config.Set<webrtc::DelayCorrection>(new webrtc::DelayCorrection(true));
418   if (goog_experimental_ns)
419     config.Set<webrtc::ExperimentalNs>(new webrtc::ExperimentalNs(true));
420 #if defined(OS_MACOSX)
421   if (base::FieldTrialList::FindFullName("NoReportedDelayOnMac") == "Enabled")
422     config.Set<webrtc::ReportedDelay>(new webrtc::ReportedDelay(false));
423 #endif
424
425   // Create and configure the webrtc::AudioProcessing.
426   audio_processing_.reset(webrtc::AudioProcessing::Create(config));
427
428   // Enable the audio processing components.
429   if (echo_cancellation) {
430     EnableEchoCancellation(audio_processing_.get());
431
432     if (playout_data_source_)
433       playout_data_source_->AddPlayoutSink(this);
434   }
435
436   if (goog_ns)
437     EnableNoiseSuppression(audio_processing_.get());
438
439   if (goog_high_pass_filter)
440     EnableHighPassFilter(audio_processing_.get());
441
442   if (goog_typing_detection) {
443     // TODO(xians): Remove this |typing_detector_| after the typing suppression
444     // is enabled by default.
445     typing_detector_.reset(new webrtc::TypingDetection());
446     EnableTypingDetection(audio_processing_.get(), typing_detector_.get());
447   }
448
449   if (goog_agc)
450     EnableAutomaticGainControl(audio_processing_.get());
451
452   RecordProcessingState(AUDIO_PROCESSING_ENABLED);
453 }
454
455 void MediaStreamAudioProcessor::InitializeCaptureFifo(
456     const media::AudioParameters& input_format) {
457   DCHECK(main_thread_checker_.CalledOnValidThread());
458   DCHECK(input_format.IsValid());
459   input_format_ = input_format;
460
461   // TODO(ajm): For now, we assume fixed parameters for the output when audio
462   // processing is enabled, to match the previous behavior. We should either
463   // use the input parameters (in which case, audio processing will convert
464   // at output) or ideally, have a backchannel from the sink to know what
465   // format it would prefer.
466   const int output_sample_rate = audio_processing_ ?
467       kAudioProcessingSampleRate : input_format.sample_rate();
468   const media::ChannelLayout output_channel_layout = audio_processing_ ?
469       media::GuessChannelLayout(kAudioProcessingNumberOfChannels) :
470       input_format.channel_layout();
471
472   // webrtc::AudioProcessing requires a 10 ms chunk size. We use this native
473   // size when processing is enabled. When disabled we use the same size as
474   // the source if less than 10 ms.
475   //
476   // TODO(ajm): This conditional buffer size appears to be assuming knowledge of
477   // the sink based on the source parameters. PeerConnection sinks seem to want
478   // 10 ms chunks regardless, while WebAudio sinks want less, and we're assuming
479   // we can identify WebAudio sinks by the input chunk size. Less fragile would
480   // be to have the sink actually tell us how much it wants (as in the above
481   // TODO).
482   int processing_frames = input_format.sample_rate() / 100;
483   int output_frames = output_sample_rate / 100;
484   if (!audio_processing_ && input_format.frames_per_buffer() < output_frames) {
485     processing_frames = input_format.frames_per_buffer();
486     output_frames = processing_frames;
487   }
488
489   output_format_ = media::AudioParameters(
490       media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
491       output_channel_layout,
492       output_sample_rate,
493       16,
494       output_frames);
495
496   capture_fifo_.reset(
497       new MediaStreamAudioFifo(input_format.channels(),
498                                input_format.frames_per_buffer(),
499                                processing_frames));
500
501   if (audio_processing_) {
502     output_bus_.reset(new MediaStreamAudioBus(output_format_.channels(),
503                                               output_frames));
504   }
505   output_data_.reset(new int16[output_format_.GetBytesPerBuffer() /
506                                sizeof(int16)]);
507 }
508
509 void MediaStreamAudioProcessor::InitializeRenderFifoIfNeeded(
510     int sample_rate, int number_of_channels, int frames_per_buffer) {
511   DCHECK(render_thread_checker_.CalledOnValidThread());
512   if (render_fifo_.get() &&
513       render_format_.sample_rate() == sample_rate &&
514       render_format_.channels() == number_of_channels &&
515       render_format_.frames_per_buffer() == frames_per_buffer) {
516     // Do nothing if the |render_fifo_| has been setup properly.
517     return;
518   }
519
520   render_format_ = media::AudioParameters(
521       media::AudioParameters::AUDIO_PCM_LOW_LATENCY,
522       media::GuessChannelLayout(number_of_channels),
523       sample_rate,
524       16,
525       frames_per_buffer);
526
527   const int analysis_frames = sample_rate / 100;  // 10 ms chunks.
528   render_fifo_.reset(
529       new MediaStreamAudioFifo(number_of_channels,
530                                frames_per_buffer,
531                                analysis_frames));
532 }
533
534 int MediaStreamAudioProcessor::ProcessData(const float* const* process_ptrs,
535                                            int process_frames,
536                                            base::TimeDelta capture_delay,
537                                            int volume,
538                                            bool key_pressed,
539                                            float* const* output_ptrs) {
540   DCHECK(audio_processing_);
541   DCHECK(capture_thread_checker_.CalledOnValidThread());
542
543   TRACE_EVENT0("audio", "MediaStreamAudioProcessor::ProcessData");
544
545   base::subtle::Atomic32 render_delay_ms =
546       base::subtle::Acquire_Load(&render_delay_ms_);
547   int64 capture_delay_ms = capture_delay.InMilliseconds();
548   DCHECK_LT(capture_delay_ms,
549             std::numeric_limits<base::subtle::Atomic32>::max());
550   int total_delay_ms =  capture_delay_ms + render_delay_ms;
551   if (total_delay_ms > 300) {
552     LOG(WARNING) << "Large audio delay, capture delay: " << capture_delay_ms
553                  << "ms; render delay: " << render_delay_ms << "ms";
554   }
555
556   webrtc::AudioProcessing* ap = audio_processing_.get();
557   ap->set_stream_delay_ms(total_delay_ms);
558
559   DCHECK_LE(volume, WebRtcAudioDeviceImpl::kMaxVolumeLevel);
560   webrtc::GainControl* agc = ap->gain_control();
561   int err = agc->set_stream_analog_level(volume);
562   DCHECK_EQ(err, 0) << "set_stream_analog_level() error: " << err;
563
564   ap->set_stream_key_pressed(key_pressed);
565
566   err = ap->ProcessStream(process_ptrs,
567                           process_frames,
568                           input_format_.sample_rate(),
569                           MapLayout(input_format_.channel_layout()),
570                           output_format_.sample_rate(),
571                           MapLayout(output_format_.channel_layout()),
572                           output_ptrs);
573   DCHECK_EQ(err, 0) << "ProcessStream() error: " << err;
574
575   if (typing_detector_) {
576     webrtc::VoiceDetection* vad = ap->voice_detection();
577     DCHECK(vad->is_enabled());
578     bool detected = typing_detector_->Process(key_pressed,
579                                               vad->stream_has_voice());
580     base::subtle::Release_Store(&typing_detected_, detected);
581   }
582
583   // Return 0 if the volume hasn't been changed, and otherwise the new volume.
584   return (agc->stream_analog_level() == volume) ?
585       0 : agc->stream_analog_level();
586 }
587
588 }  // namespace content