Upstream version 10.39.225.0
[platform/framework/web/crosswalk.git] / src / third_party / webrtc / modules / audio_processing / audio_buffer.cc
1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10
11 #include "webrtc/modules/audio_processing/audio_buffer.h"
12
13 #include "webrtc/common_audio/include/audio_util.h"
14 #include "webrtc/common_audio/resampler/push_sinc_resampler.h"
15 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
16
17 namespace webrtc {
18 namespace {
19
20 enum {
21   kSamplesPer8kHzChannel = 80,
22   kSamplesPer16kHzChannel = 160,
23   kSamplesPer32kHzChannel = 320
24 };
25
26 bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {
27   switch (layout) {
28     case AudioProcessing::kMono:
29     case AudioProcessing::kStereo:
30       return false;
31     case AudioProcessing::kMonoAndKeyboard:
32     case AudioProcessing::kStereoAndKeyboard:
33       return true;
34   }
35   assert(false);
36   return false;
37 }
38
39 int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {
40   switch (layout) {
41     case AudioProcessing::kMono:
42     case AudioProcessing::kStereo:
43       assert(false);
44       return -1;
45     case AudioProcessing::kMonoAndKeyboard:
46       return 1;
47     case AudioProcessing::kStereoAndKeyboard:
48       return 2;
49   }
50   assert(false);
51   return -1;
52 }
53
54 void StereoToMono(const float* left, const float* right, float* out,
55                   int samples_per_channel) {
56   for (int i = 0; i < samples_per_channel; ++i) {
57     out[i] = (left[i] + right[i]) / 2;
58   }
59 }
60
61 void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out,
62                   int samples_per_channel) {
63   for (int i = 0; i < samples_per_channel; ++i) {
64     out[i] = (left[i] + right[i]) >> 1;
65   }
66 }
67
68 }  // namespace
69
70 // One int16_t and one float ChannelBuffer that are kept in sync. The sync is
71 // broken when someone requests write access to either ChannelBuffer, and
72 // reestablished when someone requests the outdated ChannelBuffer. It is
73 // therefore safe to use the return value of ibuf_const() and fbuf_const()
74 // until the next call to ibuf() or fbuf(), and the return value of ibuf() and
75 // fbuf() until the next call to any of the other functions.
76 class IFChannelBuffer {
77  public:
78   IFChannelBuffer(int samples_per_channel, int num_channels)
79       : ivalid_(true),
80         ibuf_(samples_per_channel, num_channels),
81         fvalid_(true),
82         fbuf_(samples_per_channel, num_channels) {}
83
84   ChannelBuffer<int16_t>* ibuf() { return ibuf(false); }
85   ChannelBuffer<float>* fbuf() { return fbuf(false); }
86   const ChannelBuffer<int16_t>* ibuf_const() { return ibuf(true); }
87   const ChannelBuffer<float>* fbuf_const() { return fbuf(true); }
88
89  private:
90   ChannelBuffer<int16_t>* ibuf(bool readonly) {
91     RefreshI();
92     fvalid_ = readonly;
93     return &ibuf_;
94   }
95
96   ChannelBuffer<float>* fbuf(bool readonly) {
97     RefreshF();
98     ivalid_ = readonly;
99     return &fbuf_;
100   }
101
102   void RefreshF() {
103     if (!fvalid_) {
104       assert(ivalid_);
105       const int16_t* const int_data = ibuf_.data();
106       float* const float_data = fbuf_.data();
107       const int length = fbuf_.length();
108       for (int i = 0; i < length; ++i)
109         float_data[i] = int_data[i];
110       fvalid_ = true;
111     }
112   }
113
114   void RefreshI() {
115     if (!ivalid_) {
116       assert(fvalid_);
117       const float* const float_data = fbuf_.data();
118       int16_t* const int_data = ibuf_.data();
119       const int length = ibuf_.length();
120       for (int i = 0; i < length; ++i)
121         int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits<int16_t>::max(),
122                                      float_data[i],
123                                      std::numeric_limits<int16_t>::min());
124       ivalid_ = true;
125     }
126   }
127
128   bool ivalid_;
129   ChannelBuffer<int16_t> ibuf_;
130   bool fvalid_;
131   ChannelBuffer<float> fbuf_;
132 };
133
134 AudioBuffer::AudioBuffer(int input_samples_per_channel,
135                          int num_input_channels,
136                          int process_samples_per_channel,
137                          int num_process_channels,
138                          int output_samples_per_channel)
139   : input_samples_per_channel_(input_samples_per_channel),
140     num_input_channels_(num_input_channels),
141     proc_samples_per_channel_(process_samples_per_channel),
142     num_proc_channels_(num_process_channels),
143     output_samples_per_channel_(output_samples_per_channel),
144     samples_per_split_channel_(proc_samples_per_channel_),
145     mixed_low_pass_valid_(false),
146     reference_copied_(false),
147     activity_(AudioFrame::kVadUnknown),
148     keyboard_data_(NULL),
149     channels_(new IFChannelBuffer(proc_samples_per_channel_,
150                                   num_proc_channels_)) {
151   assert(input_samples_per_channel_ > 0);
152   assert(proc_samples_per_channel_ > 0);
153   assert(output_samples_per_channel_ > 0);
154   assert(num_input_channels_ > 0 && num_input_channels_ <= 2);
155   assert(num_proc_channels_ <= num_input_channels);
156
157   if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
158     input_buffer_.reset(new ChannelBuffer<float>(input_samples_per_channel_,
159                                                  num_proc_channels_));
160   }
161
162   if (input_samples_per_channel_ != proc_samples_per_channel_ ||
163       output_samples_per_channel_ != proc_samples_per_channel_) {
164     // Create an intermediate buffer for resampling.
165     process_buffer_.reset(new ChannelBuffer<float>(proc_samples_per_channel_,
166                                                    num_proc_channels_));
167   }
168
169   if (input_samples_per_channel_ != proc_samples_per_channel_) {
170     input_resamplers_.reserve(num_proc_channels_);
171     for (int i = 0; i < num_proc_channels_; ++i) {
172       input_resamplers_.push_back(
173           new PushSincResampler(input_samples_per_channel_,
174                                 proc_samples_per_channel_));
175     }
176   }
177
178   if (output_samples_per_channel_ != proc_samples_per_channel_) {
179     output_resamplers_.reserve(num_proc_channels_);
180     for (int i = 0; i < num_proc_channels_; ++i) {
181       output_resamplers_.push_back(
182           new PushSincResampler(proc_samples_per_channel_,
183                                 output_samples_per_channel_));
184     }
185   }
186
187   if (proc_samples_per_channel_ == kSamplesPer32kHzChannel) {
188     samples_per_split_channel_ = kSamplesPer16kHzChannel;
189     split_channels_low_.reset(new IFChannelBuffer(samples_per_split_channel_,
190                                                   num_proc_channels_));
191     split_channels_high_.reset(new IFChannelBuffer(samples_per_split_channel_,
192                                                    num_proc_channels_));
193     filter_states_.reset(new SplitFilterStates[num_proc_channels_]);
194   }
195 }
196
197 AudioBuffer::~AudioBuffer() {}
198
199 void AudioBuffer::CopyFrom(const float* const* data,
200                            int samples_per_channel,
201                            AudioProcessing::ChannelLayout layout) {
202   assert(samples_per_channel == input_samples_per_channel_);
203   assert(ChannelsFromLayout(layout) == num_input_channels_);
204   InitForNewData();
205
206   if (HasKeyboardChannel(layout)) {
207     keyboard_data_ = data[KeyboardChannelIndex(layout)];
208   }
209
210   // Downmix.
211   const float* const* data_ptr = data;
212   if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
213     StereoToMono(data[0],
214                  data[1],
215                  input_buffer_->channel(0),
216                  input_samples_per_channel_);
217     data_ptr = input_buffer_->channels();
218   }
219
220   // Resample.
221   if (input_samples_per_channel_ != proc_samples_per_channel_) {
222     for (int i = 0; i < num_proc_channels_; ++i) {
223       input_resamplers_[i]->Resample(data_ptr[i],
224                                      input_samples_per_channel_,
225                                      process_buffer_->channel(i),
226                                      proc_samples_per_channel_);
227     }
228     data_ptr = process_buffer_->channels();
229   }
230
231   // Convert to int16.
232   for (int i = 0; i < num_proc_channels_; ++i) {
233     ScaleAndRoundToInt16(data_ptr[i], proc_samples_per_channel_,
234                          channels_->ibuf()->channel(i));
235   }
236 }
237
238 void AudioBuffer::CopyTo(int samples_per_channel,
239                          AudioProcessing::ChannelLayout layout,
240                          float* const* data) {
241   assert(samples_per_channel == output_samples_per_channel_);
242   assert(ChannelsFromLayout(layout) == num_proc_channels_);
243
244   // Convert to float.
245   float* const* data_ptr = data;
246   if (output_samples_per_channel_ != proc_samples_per_channel_) {
247     // Convert to an intermediate buffer for subsequent resampling.
248     data_ptr = process_buffer_->channels();
249   }
250   for (int i = 0; i < num_proc_channels_; ++i) {
251     ScaleToFloat(channels_->ibuf()->channel(i),
252                  proc_samples_per_channel_,
253                  data_ptr[i]);
254   }
255
256   // Resample.
257   if (output_samples_per_channel_ != proc_samples_per_channel_) {
258     for (int i = 0; i < num_proc_channels_; ++i) {
259       output_resamplers_[i]->Resample(data_ptr[i],
260                                       proc_samples_per_channel_,
261                                       data[i],
262                                       output_samples_per_channel_);
263     }
264   }
265 }
266
267 void AudioBuffer::InitForNewData() {
268   keyboard_data_ = NULL;
269   mixed_low_pass_valid_ = false;
270   reference_copied_ = false;
271   activity_ = AudioFrame::kVadUnknown;
272 }
273
274 const int16_t* AudioBuffer::data(int channel) const {
275   return channels_->ibuf_const()->channel(channel);
276 }
277
278 int16_t* AudioBuffer::data(int channel) {
279   mixed_low_pass_valid_ = false;
280   return channels_->ibuf()->channel(channel);
281 }
282
283 const float* AudioBuffer::data_f(int channel) const {
284   return channels_->fbuf_const()->channel(channel);
285 }
286
287 float* AudioBuffer::data_f(int channel) {
288   mixed_low_pass_valid_ = false;
289   return channels_->fbuf()->channel(channel);
290 }
291
292 const int16_t* AudioBuffer::low_pass_split_data(int channel) const {
293   return split_channels_low_.get()
294       ? split_channels_low_->ibuf_const()->channel(channel)
295       : data(channel);
296 }
297
298 int16_t* AudioBuffer::low_pass_split_data(int channel) {
299   mixed_low_pass_valid_ = false;
300   return split_channels_low_.get()
301       ? split_channels_low_->ibuf()->channel(channel)
302       : data(channel);
303 }
304
305 const float* AudioBuffer::low_pass_split_data_f(int channel) const {
306   return split_channels_low_.get()
307       ? split_channels_low_->fbuf_const()->channel(channel)
308       : data_f(channel);
309 }
310
311 float* AudioBuffer::low_pass_split_data_f(int channel) {
312   mixed_low_pass_valid_ = false;
313   return split_channels_low_.get()
314       ? split_channels_low_->fbuf()->channel(channel)
315       : data_f(channel);
316 }
317
318 const int16_t* AudioBuffer::high_pass_split_data(int channel) const {
319   return split_channels_high_.get()
320       ? split_channels_high_->ibuf_const()->channel(channel)
321       : NULL;
322 }
323
324 int16_t* AudioBuffer::high_pass_split_data(int channel) {
325   return split_channels_high_.get()
326       ? split_channels_high_->ibuf()->channel(channel)
327       : NULL;
328 }
329
330 const float* AudioBuffer::high_pass_split_data_f(int channel) const {
331   return split_channels_high_.get()
332       ? split_channels_high_->fbuf_const()->channel(channel)
333       : NULL;
334 }
335
336 float* AudioBuffer::high_pass_split_data_f(int channel) {
337   return split_channels_high_.get()
338       ? split_channels_high_->fbuf()->channel(channel)
339       : NULL;
340 }
341
342 const int16_t* AudioBuffer::mixed_low_pass_data() {
343   // Currently only mixing stereo to mono is supported.
344   assert(num_proc_channels_ == 1 || num_proc_channels_ == 2);
345
346   if (num_proc_channels_ == 1) {
347     return low_pass_split_data(0);
348   }
349
350   if (!mixed_low_pass_valid_) {
351     if (!mixed_low_pass_channels_.get()) {
352       mixed_low_pass_channels_.reset(
353           new ChannelBuffer<int16_t>(samples_per_split_channel_, 1));
354     }
355     StereoToMono(low_pass_split_data(0),
356                  low_pass_split_data(1),
357                  mixed_low_pass_channels_->data(),
358                  samples_per_split_channel_);
359     mixed_low_pass_valid_ = true;
360   }
361   return mixed_low_pass_channels_->data();
362 }
363
364 const int16_t* AudioBuffer::low_pass_reference(int channel) const {
365   if (!reference_copied_) {
366     return NULL;
367   }
368
369   return low_pass_reference_channels_->channel(channel);
370 }
371
372 const float* AudioBuffer::keyboard_data() const {
373   return keyboard_data_;
374 }
375
376 SplitFilterStates* AudioBuffer::filter_states(int channel) {
377   assert(channel >= 0 && channel < num_proc_channels_);
378   return &filter_states_[channel];
379 }
380
381 void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
382   activity_ = activity;
383 }
384
385 AudioFrame::VADActivity AudioBuffer::activity() const {
386   return activity_;
387 }
388
389 int AudioBuffer::num_channels() const {
390   return num_proc_channels_;
391 }
392
393 int AudioBuffer::samples_per_channel() const {
394   return proc_samples_per_channel_;
395 }
396
397 int AudioBuffer::samples_per_split_channel() const {
398   return samples_per_split_channel_;
399 }
400
401 int AudioBuffer::samples_per_keyboard_channel() const {
402   // We don't resample the keyboard channel.
403   return input_samples_per_channel_;
404 }
405
406 // TODO(andrew): Do deinterleaving and mixing in one step?
407 void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
408   assert(proc_samples_per_channel_ == input_samples_per_channel_);
409   assert(frame->num_channels_ == num_input_channels_);
410   assert(frame->samples_per_channel_ ==  proc_samples_per_channel_);
411   InitForNewData();
412   activity_ = frame->vad_activity_;
413
414   if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
415     // Downmix directly; no explicit deinterleaving needed.
416     int16_t* downmixed = channels_->ibuf()->channel(0);
417     for (int i = 0; i < input_samples_per_channel_; ++i) {
418       // HACK(ajm): The downmixing in the int16_t path is in practice never
419       // called from production code. We do this weird scaling to and from float
420       // to satisfy tests checking for bit-exactness with the float path.
421       float downmix_float = (ScaleToFloat(frame->data_[i * 2]) +
422                              ScaleToFloat(frame->data_[i * 2 + 1])) / 2;
423       downmixed[i] = ScaleAndRoundToInt16(downmix_float);
424     }
425   } else {
426     assert(num_proc_channels_ == num_input_channels_);
427     int16_t* interleaved = frame->data_;
428     for (int i = 0; i < num_proc_channels_; ++i) {
429       int16_t* deinterleaved = channels_->ibuf()->channel(i);
430       int interleaved_idx = i;
431       for (int j = 0; j < proc_samples_per_channel_; ++j) {
432         deinterleaved[j] = interleaved[interleaved_idx];
433         interleaved_idx += num_proc_channels_;
434       }
435     }
436   }
437 }
438
439 void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
440   assert(proc_samples_per_channel_ == output_samples_per_channel_);
441   assert(num_proc_channels_ == num_input_channels_);
442   assert(frame->num_channels_ == num_proc_channels_);
443   assert(frame->samples_per_channel_ == proc_samples_per_channel_);
444   frame->vad_activity_ = activity_;
445
446   if (!data_changed) {
447     return;
448   }
449
450   int16_t* interleaved = frame->data_;
451   for (int i = 0; i < num_proc_channels_; i++) {
452     int16_t* deinterleaved = channels_->ibuf()->channel(i);
453     int interleaved_idx = i;
454     for (int j = 0; j < proc_samples_per_channel_; j++) {
455       interleaved[interleaved_idx] = deinterleaved[j];
456       interleaved_idx += num_proc_channels_;
457     }
458   }
459 }
460
461 void AudioBuffer::CopyLowPassToReference() {
462   reference_copied_ = true;
463   if (!low_pass_reference_channels_.get()) {
464     low_pass_reference_channels_.reset(
465         new ChannelBuffer<int16_t>(samples_per_split_channel_,
466                                    num_proc_channels_));
467   }
468   for (int i = 0; i < num_proc_channels_; i++) {
469     low_pass_reference_channels_->CopyFrom(low_pass_split_data(i), i);
470   }
471 }
472
473 }  // namespace webrtc