2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
11 #include "webrtc/modules/audio_processing/audio_buffer.h"
13 #include "webrtc/common_audio/include/audio_util.h"
14 #include "webrtc/common_audio/resampler/push_sinc_resampler.h"
15 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
21 kSamplesPer8kHzChannel = 80,
22 kSamplesPer16kHzChannel = 160,
23 kSamplesPer32kHzChannel = 320
26 bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {
28 case AudioProcessing::kMono:
29 case AudioProcessing::kStereo:
31 case AudioProcessing::kMonoAndKeyboard:
32 case AudioProcessing::kStereoAndKeyboard:
39 int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {
41 case AudioProcessing::kMono:
42 case AudioProcessing::kStereo:
45 case AudioProcessing::kMonoAndKeyboard:
47 case AudioProcessing::kStereoAndKeyboard:
54 void StereoToMono(const float* left, const float* right, float* out,
55 int samples_per_channel) {
56 for (int i = 0; i < samples_per_channel; ++i) {
57 out[i] = (left[i] + right[i]) / 2;
61 void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out,
62 int samples_per_channel) {
63 for (int i = 0; i < samples_per_channel; ++i) {
64 out[i] = (left[i] + right[i]) >> 1;
70 // One int16_t and one float ChannelBuffer that are kept in sync. The sync is
71 // broken when someone requests write access to either ChannelBuffer, and
72 // reestablished when someone requests the outdated ChannelBuffer. It is
73 // therefore safe to use the return value of ibuf_const() and fbuf_const()
74 // until the next call to ibuf() or fbuf(), and the return value of ibuf() and
75 // fbuf() until the next call to any of the other functions.
76 class IFChannelBuffer {
78 IFChannelBuffer(int samples_per_channel, int num_channels)
80 ibuf_(samples_per_channel, num_channels),
82 fbuf_(samples_per_channel, num_channels) {}
84 ChannelBuffer<int16_t>* ibuf() { return ibuf(false); }
85 ChannelBuffer<float>* fbuf() { return fbuf(false); }
86 const ChannelBuffer<int16_t>* ibuf_const() { return ibuf(true); }
87 const ChannelBuffer<float>* fbuf_const() { return fbuf(true); }
90 ChannelBuffer<int16_t>* ibuf(bool readonly) {
96 ChannelBuffer<float>* fbuf(bool readonly) {
105 const int16_t* const int_data = ibuf_.data();
106 float* const float_data = fbuf_.data();
107 const int length = fbuf_.length();
108 for (int i = 0; i < length; ++i)
109 float_data[i] = int_data[i];
117 const float* const float_data = fbuf_.data();
118 int16_t* const int_data = ibuf_.data();
119 const int length = ibuf_.length();
120 for (int i = 0; i < length; ++i)
121 int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits<int16_t>::max(),
123 std::numeric_limits<int16_t>::min());
129 ChannelBuffer<int16_t> ibuf_;
131 ChannelBuffer<float> fbuf_;
134 AudioBuffer::AudioBuffer(int input_samples_per_channel,
135 int num_input_channels,
136 int process_samples_per_channel,
137 int num_process_channels,
138 int output_samples_per_channel)
139 : input_samples_per_channel_(input_samples_per_channel),
140 num_input_channels_(num_input_channels),
141 proc_samples_per_channel_(process_samples_per_channel),
142 num_proc_channels_(num_process_channels),
143 output_samples_per_channel_(output_samples_per_channel),
144 samples_per_split_channel_(proc_samples_per_channel_),
145 mixed_low_pass_valid_(false),
146 reference_copied_(false),
147 activity_(AudioFrame::kVadUnknown),
148 keyboard_data_(NULL),
149 channels_(new IFChannelBuffer(proc_samples_per_channel_,
150 num_proc_channels_)) {
151 assert(input_samples_per_channel_ > 0);
152 assert(proc_samples_per_channel_ > 0);
153 assert(output_samples_per_channel_ > 0);
154 assert(num_input_channels_ > 0 && num_input_channels_ <= 2);
155 assert(num_proc_channels_ <= num_input_channels);
157 if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
158 input_buffer_.reset(new ChannelBuffer<float>(input_samples_per_channel_,
159 num_proc_channels_));
162 if (input_samples_per_channel_ != proc_samples_per_channel_ ||
163 output_samples_per_channel_ != proc_samples_per_channel_) {
164 // Create an intermediate buffer for resampling.
165 process_buffer_.reset(new ChannelBuffer<float>(proc_samples_per_channel_,
166 num_proc_channels_));
169 if (input_samples_per_channel_ != proc_samples_per_channel_) {
170 input_resamplers_.reserve(num_proc_channels_);
171 for (int i = 0; i < num_proc_channels_; ++i) {
172 input_resamplers_.push_back(
173 new PushSincResampler(input_samples_per_channel_,
174 proc_samples_per_channel_));
178 if (output_samples_per_channel_ != proc_samples_per_channel_) {
179 output_resamplers_.reserve(num_proc_channels_);
180 for (int i = 0; i < num_proc_channels_; ++i) {
181 output_resamplers_.push_back(
182 new PushSincResampler(proc_samples_per_channel_,
183 output_samples_per_channel_));
187 if (proc_samples_per_channel_ == kSamplesPer32kHzChannel) {
188 samples_per_split_channel_ = kSamplesPer16kHzChannel;
189 split_channels_low_.reset(new IFChannelBuffer(samples_per_split_channel_,
190 num_proc_channels_));
191 split_channels_high_.reset(new IFChannelBuffer(samples_per_split_channel_,
192 num_proc_channels_));
193 filter_states_.reset(new SplitFilterStates[num_proc_channels_]);
197 AudioBuffer::~AudioBuffer() {}
199 void AudioBuffer::CopyFrom(const float* const* data,
200 int samples_per_channel,
201 AudioProcessing::ChannelLayout layout) {
202 assert(samples_per_channel == input_samples_per_channel_);
203 assert(ChannelsFromLayout(layout) == num_input_channels_);
206 if (HasKeyboardChannel(layout)) {
207 keyboard_data_ = data[KeyboardChannelIndex(layout)];
211 const float* const* data_ptr = data;
212 if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
213 StereoToMono(data[0],
215 input_buffer_->channel(0),
216 input_samples_per_channel_);
217 data_ptr = input_buffer_->channels();
221 if (input_samples_per_channel_ != proc_samples_per_channel_) {
222 for (int i = 0; i < num_proc_channels_; ++i) {
223 input_resamplers_[i]->Resample(data_ptr[i],
224 input_samples_per_channel_,
225 process_buffer_->channel(i),
226 proc_samples_per_channel_);
228 data_ptr = process_buffer_->channels();
232 for (int i = 0; i < num_proc_channels_; ++i) {
233 ScaleAndRoundToInt16(data_ptr[i], proc_samples_per_channel_,
234 channels_->ibuf()->channel(i));
238 void AudioBuffer::CopyTo(int samples_per_channel,
239 AudioProcessing::ChannelLayout layout,
240 float* const* data) {
241 assert(samples_per_channel == output_samples_per_channel_);
242 assert(ChannelsFromLayout(layout) == num_proc_channels_);
245 float* const* data_ptr = data;
246 if (output_samples_per_channel_ != proc_samples_per_channel_) {
247 // Convert to an intermediate buffer for subsequent resampling.
248 data_ptr = process_buffer_->channels();
250 for (int i = 0; i < num_proc_channels_; ++i) {
251 ScaleToFloat(channels_->ibuf()->channel(i),
252 proc_samples_per_channel_,
257 if (output_samples_per_channel_ != proc_samples_per_channel_) {
258 for (int i = 0; i < num_proc_channels_; ++i) {
259 output_resamplers_[i]->Resample(data_ptr[i],
260 proc_samples_per_channel_,
262 output_samples_per_channel_);
267 void AudioBuffer::InitForNewData() {
268 keyboard_data_ = NULL;
269 mixed_low_pass_valid_ = false;
270 reference_copied_ = false;
271 activity_ = AudioFrame::kVadUnknown;
274 const int16_t* AudioBuffer::data(int channel) const {
275 return channels_->ibuf_const()->channel(channel);
278 int16_t* AudioBuffer::data(int channel) {
279 mixed_low_pass_valid_ = false;
280 return channels_->ibuf()->channel(channel);
283 const float* AudioBuffer::data_f(int channel) const {
284 return channels_->fbuf_const()->channel(channel);
287 float* AudioBuffer::data_f(int channel) {
288 mixed_low_pass_valid_ = false;
289 return channels_->fbuf()->channel(channel);
292 const int16_t* AudioBuffer::low_pass_split_data(int channel) const {
293 return split_channels_low_.get()
294 ? split_channels_low_->ibuf_const()->channel(channel)
298 int16_t* AudioBuffer::low_pass_split_data(int channel) {
299 mixed_low_pass_valid_ = false;
300 return split_channels_low_.get()
301 ? split_channels_low_->ibuf()->channel(channel)
305 const float* AudioBuffer::low_pass_split_data_f(int channel) const {
306 return split_channels_low_.get()
307 ? split_channels_low_->fbuf_const()->channel(channel)
311 float* AudioBuffer::low_pass_split_data_f(int channel) {
312 mixed_low_pass_valid_ = false;
313 return split_channels_low_.get()
314 ? split_channels_low_->fbuf()->channel(channel)
318 const int16_t* AudioBuffer::high_pass_split_data(int channel) const {
319 return split_channels_high_.get()
320 ? split_channels_high_->ibuf_const()->channel(channel)
324 int16_t* AudioBuffer::high_pass_split_data(int channel) {
325 return split_channels_high_.get()
326 ? split_channels_high_->ibuf()->channel(channel)
330 const float* AudioBuffer::high_pass_split_data_f(int channel) const {
331 return split_channels_high_.get()
332 ? split_channels_high_->fbuf_const()->channel(channel)
336 float* AudioBuffer::high_pass_split_data_f(int channel) {
337 return split_channels_high_.get()
338 ? split_channels_high_->fbuf()->channel(channel)
342 const int16_t* AudioBuffer::mixed_low_pass_data() {
343 // Currently only mixing stereo to mono is supported.
344 assert(num_proc_channels_ == 1 || num_proc_channels_ == 2);
346 if (num_proc_channels_ == 1) {
347 return low_pass_split_data(0);
350 if (!mixed_low_pass_valid_) {
351 if (!mixed_low_pass_channels_.get()) {
352 mixed_low_pass_channels_.reset(
353 new ChannelBuffer<int16_t>(samples_per_split_channel_, 1));
355 StereoToMono(low_pass_split_data(0),
356 low_pass_split_data(1),
357 mixed_low_pass_channels_->data(),
358 samples_per_split_channel_);
359 mixed_low_pass_valid_ = true;
361 return mixed_low_pass_channels_->data();
364 const int16_t* AudioBuffer::low_pass_reference(int channel) const {
365 if (!reference_copied_) {
369 return low_pass_reference_channels_->channel(channel);
372 const float* AudioBuffer::keyboard_data() const {
373 return keyboard_data_;
376 SplitFilterStates* AudioBuffer::filter_states(int channel) {
377 assert(channel >= 0 && channel < num_proc_channels_);
378 return &filter_states_[channel];
381 void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
382 activity_ = activity;
385 AudioFrame::VADActivity AudioBuffer::activity() const {
389 int AudioBuffer::num_channels() const {
390 return num_proc_channels_;
393 int AudioBuffer::samples_per_channel() const {
394 return proc_samples_per_channel_;
397 int AudioBuffer::samples_per_split_channel() const {
398 return samples_per_split_channel_;
401 int AudioBuffer::samples_per_keyboard_channel() const {
402 // We don't resample the keyboard channel.
403 return input_samples_per_channel_;
406 // TODO(andrew): Do deinterleaving and mixing in one step?
407 void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
408 assert(proc_samples_per_channel_ == input_samples_per_channel_);
409 assert(frame->num_channels_ == num_input_channels_);
410 assert(frame->samples_per_channel_ == proc_samples_per_channel_);
412 activity_ = frame->vad_activity_;
414 if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
415 // Downmix directly; no explicit deinterleaving needed.
416 int16_t* downmixed = channels_->ibuf()->channel(0);
417 for (int i = 0; i < input_samples_per_channel_; ++i) {
418 // HACK(ajm): The downmixing in the int16_t path is in practice never
419 // called from production code. We do this weird scaling to and from float
420 // to satisfy tests checking for bit-exactness with the float path.
421 float downmix_float = (ScaleToFloat(frame->data_[i * 2]) +
422 ScaleToFloat(frame->data_[i * 2 + 1])) / 2;
423 downmixed[i] = ScaleAndRoundToInt16(downmix_float);
426 assert(num_proc_channels_ == num_input_channels_);
427 int16_t* interleaved = frame->data_;
428 for (int i = 0; i < num_proc_channels_; ++i) {
429 int16_t* deinterleaved = channels_->ibuf()->channel(i);
430 int interleaved_idx = i;
431 for (int j = 0; j < proc_samples_per_channel_; ++j) {
432 deinterleaved[j] = interleaved[interleaved_idx];
433 interleaved_idx += num_proc_channels_;
439 void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
440 assert(proc_samples_per_channel_ == output_samples_per_channel_);
441 assert(num_proc_channels_ == num_input_channels_);
442 assert(frame->num_channels_ == num_proc_channels_);
443 assert(frame->samples_per_channel_ == proc_samples_per_channel_);
444 frame->vad_activity_ = activity_;
450 int16_t* interleaved = frame->data_;
451 for (int i = 0; i < num_proc_channels_; i++) {
452 int16_t* deinterleaved = channels_->ibuf()->channel(i);
453 int interleaved_idx = i;
454 for (int j = 0; j < proc_samples_per_channel_; j++) {
455 interleaved[interleaved_idx] = deinterleaved[j];
456 interleaved_idx += num_proc_channels_;
461 void AudioBuffer::CopyLowPassToReference() {
462 reference_copied_ = true;
463 if (!low_pass_reference_channels_.get()) {
464 low_pass_reference_channels_.reset(
465 new ChannelBuffer<int16_t>(samples_per_split_channel_,
466 num_proc_channels_));
468 for (int i = 0; i < num_proc_channels_; i++) {
469 low_pass_reference_channels_->CopyFrom(low_pass_split_data(i), i);
473 } // namespace webrtc