src/third_party/webrtc/modules/audio_processing/audio_buffer.cc

   1 /*
   2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11 #include "webrtc/modules/audio_processing/audio_buffer.h"
  12
  13 #include "webrtc/common_audio/include/audio_util.h"
  14 #include "webrtc/common_audio/resampler/push_sinc_resampler.h"
  15 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
  16
  17 namespace webrtc {
  18 namespace {
  19
  20 enum {
  21   kSamplesPer8kHzChannel = 80,
  22   kSamplesPer16kHzChannel = 160,
  23   kSamplesPer32kHzChannel = 320
  24 };
  25
  26 bool HasKeyboardChannel(AudioProcessing::ChannelLayout layout) {
  27   switch (layout) {
  28     case AudioProcessing::kMono:
  29     case AudioProcessing::kStereo:
  30       return false;
  31     case AudioProcessing::kMonoAndKeyboard:
  32     case AudioProcessing::kStereoAndKeyboard:
  33       return true;
  34   }
  35   assert(false);
  36   return false;
  37 }
  38
  39 int KeyboardChannelIndex(AudioProcessing::ChannelLayout layout) {
  40   switch (layout) {
  41     case AudioProcessing::kMono:
  42     case AudioProcessing::kStereo:
  43       assert(false);
  44       return -1;
  45     case AudioProcessing::kMonoAndKeyboard:
  46       return 1;
  47     case AudioProcessing::kStereoAndKeyboard:
  48       return 2;
  49   }
  50   assert(false);
  51   return -1;
  52 }
  53
  54 void StereoToMono(const float* left, const float* right, float* out,
  55                   int samples_per_channel) {
  56   for (int i = 0; i < samples_per_channel; ++i) {
  57     out[i] = (left[i] + right[i]) / 2;
  58   }
  59 }
  60
  61 void StereoToMono(const int16_t* left, const int16_t* right, int16_t* out,
  62                   int samples_per_channel) {
  63   for (int i = 0; i < samples_per_channel; ++i) {
  64     out[i] = (left[i] + right[i]) >> 1;
  65   }
  66 }
  67
  68 }  // namespace
  69
  70 // One int16_t and one float ChannelBuffer that are kept in sync. The sync is
  71 // broken when someone requests write access to either ChannelBuffer, and
  72 // reestablished when someone requests the outdated ChannelBuffer. It is
  73 // therefore safe to use the return value of ibuf_const() and fbuf_const()
  74 // until the next call to ibuf() or fbuf(), and the return value of ibuf() and
  75 // fbuf() until the next call to any of the other functions.
  76 class IFChannelBuffer {
  77  public:
  78   IFChannelBuffer(int samples_per_channel, int num_channels)
  79       : ivalid_(true),
  80         ibuf_(samples_per_channel, num_channels),
  81         fvalid_(true),
  82         fbuf_(samples_per_channel, num_channels) {}
  83
  84   ChannelBuffer<int16_t>* ibuf() { return ibuf(false); }
  85   ChannelBuffer<float>* fbuf() { return fbuf(false); }
  86   const ChannelBuffer<int16_t>* ibuf_const() { return ibuf(true); }
  87   const ChannelBuffer<float>* fbuf_const() { return fbuf(true); }
  88
  89  private:
  90   ChannelBuffer<int16_t>* ibuf(bool readonly) {
  91     RefreshI();
  92     fvalid_ = readonly;
  93     return &ibuf_;
  94   }
  95
  96   ChannelBuffer<float>* fbuf(bool readonly) {
  97     RefreshF();
  98     ivalid_ = readonly;
  99     return &fbuf_;
 100   }
 101
 102   void RefreshF() {
 103     if (!fvalid_) {
 104       assert(ivalid_);
 105       const int16_t* const int_data = ibuf_.data();
 106       float* const float_data = fbuf_.data();
 107       const int length = fbuf_.length();
 108       for (int i = 0; i < length; ++i)
 109         float_data[i] = int_data[i];
 110       fvalid_ = true;
 111     }
 112   }
 113
 114   void RefreshI() {
 115     if (!ivalid_) {
 116       assert(fvalid_);
 117       const float* const float_data = fbuf_.data();
 118       int16_t* const int_data = ibuf_.data();
 119       const int length = ibuf_.length();
 120       for (int i = 0; i < length; ++i)
 121         int_data[i] = WEBRTC_SPL_SAT(std::numeric_limits<int16_t>::max(),
 122                                      float_data[i],
 123                                      std::numeric_limits<int16_t>::min());
 124       ivalid_ = true;
 125     }
 126   }
 127
 128   bool ivalid_;
 129   ChannelBuffer<int16_t> ibuf_;
 130   bool fvalid_;
 131   ChannelBuffer<float> fbuf_;
 132 };
 133
 134 AudioBuffer::AudioBuffer(int input_samples_per_channel,
 135                          int num_input_channels,
 136                          int process_samples_per_channel,
 137                          int num_process_channels,
 138                          int output_samples_per_channel)
 139   : input_samples_per_channel_(input_samples_per_channel),
 140     num_input_channels_(num_input_channels),
 141     proc_samples_per_channel_(process_samples_per_channel),
 142     num_proc_channels_(num_process_channels),
 143     output_samples_per_channel_(output_samples_per_channel),
 144     samples_per_split_channel_(proc_samples_per_channel_),
 145     mixed_low_pass_valid_(false),
 146     reference_copied_(false),
 147     activity_(AudioFrame::kVadUnknown),
 148     keyboard_data_(NULL),
 149     channels_(new IFChannelBuffer(proc_samples_per_channel_,
 150                                   num_proc_channels_)) {
 151   assert(input_samples_per_channel_ > 0);
 152   assert(proc_samples_per_channel_ > 0);
 153   assert(output_samples_per_channel_ > 0);
 154   assert(num_input_channels_ > 0 && num_input_channels_ <= 2);
 155   assert(num_proc_channels_ <= num_input_channels);
 156
 157   if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
 158     input_buffer_.reset(new ChannelBuffer<float>(input_samples_per_channel_,
 159                                                  num_proc_channels_));
 160   }
 161
 162   if (input_samples_per_channel_ != proc_samples_per_channel_ ||
 163       output_samples_per_channel_ != proc_samples_per_channel_) {
 164     // Create an intermediate buffer for resampling.
 165     process_buffer_.reset(new ChannelBuffer<float>(proc_samples_per_channel_,
 166                                                    num_proc_channels_));
 167   }
 168
 169   if (input_samples_per_channel_ != proc_samples_per_channel_) {
 170     input_resamplers_.reserve(num_proc_channels_);
 171     for (int i = 0; i < num_proc_channels_; ++i) {
 172       input_resamplers_.push_back(
 173           new PushSincResampler(input_samples_per_channel_,
 174                                 proc_samples_per_channel_));
 175     }
 176   }
 177
 178   if (output_samples_per_channel_ != proc_samples_per_channel_) {
 179     output_resamplers_.reserve(num_proc_channels_);
 180     for (int i = 0; i < num_proc_channels_; ++i) {
 181       output_resamplers_.push_back(
 182           new PushSincResampler(proc_samples_per_channel_,
 183                                 output_samples_per_channel_));
 184     }
 185   }
 186
 187   if (proc_samples_per_channel_ == kSamplesPer32kHzChannel) {
 188     samples_per_split_channel_ = kSamplesPer16kHzChannel;
 189     split_channels_low_.reset(new IFChannelBuffer(samples_per_split_channel_,
 190                                                   num_proc_channels_));
 191     split_channels_high_.reset(new IFChannelBuffer(samples_per_split_channel_,
 192                                                    num_proc_channels_));
 193     filter_states_.reset(new SplitFilterStates[num_proc_channels_]);
 194   }
 195 }
 196
 197 AudioBuffer::~AudioBuffer() {}
 198
 199 void AudioBuffer::CopyFrom(const float* const* data,
 200                            int samples_per_channel,
 201                            AudioProcessing::ChannelLayout layout) {
 202   assert(samples_per_channel == input_samples_per_channel_);
 203   assert(ChannelsFromLayout(layout) == num_input_channels_);
 204   InitForNewData();
 205
 206   if (HasKeyboardChannel(layout)) {
 207     keyboard_data_ = data[KeyboardChannelIndex(layout)];
 208   }
 209
 210   // Downmix.
 211   const float* const* data_ptr = data;
 212   if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
 213     StereoToMono(data[0],
 214                  data[1],
 215                  input_buffer_->channel(0),
 216                  input_samples_per_channel_);
 217     data_ptr = input_buffer_->channels();
 218   }
 219
 220   // Resample.
 221   if (input_samples_per_channel_ != proc_samples_per_channel_) {
 222     for (int i = 0; i < num_proc_channels_; ++i) {
 223       input_resamplers_[i]->Resample(data_ptr[i],
 224                                      input_samples_per_channel_,
 225                                      process_buffer_->channel(i),
 226                                      proc_samples_per_channel_);
 227     }
 228     data_ptr = process_buffer_->channels();
 229   }
 230
 231   // Convert to int16.
 232   for (int i = 0; i < num_proc_channels_; ++i) {
 233     ScaleAndRoundToInt16(data_ptr[i], proc_samples_per_channel_,
 234                          channels_->ibuf()->channel(i));
 235   }
 236 }
 237
 238 void AudioBuffer::CopyTo(int samples_per_channel,
 239                          AudioProcessing::ChannelLayout layout,
 240                          float* const* data) {
 241   assert(samples_per_channel == output_samples_per_channel_);
 242   assert(ChannelsFromLayout(layout) == num_proc_channels_);
 243
 244   // Convert to float.
 245   float* const* data_ptr = data;
 246   if (output_samples_per_channel_ != proc_samples_per_channel_) {
 247     // Convert to an intermediate buffer for subsequent resampling.
 248     data_ptr = process_buffer_->channels();
 249   }
 250   for (int i = 0; i < num_proc_channels_; ++i) {
 251     ScaleToFloat(channels_->ibuf()->channel(i),
 252                  proc_samples_per_channel_,
 253                  data_ptr[i]);
 254   }
 255
 256   // Resample.
 257   if (output_samples_per_channel_ != proc_samples_per_channel_) {
 258     for (int i = 0; i < num_proc_channels_; ++i) {
 259       output_resamplers_[i]->Resample(data_ptr[i],
 260                                       proc_samples_per_channel_,
 261                                       data[i],
 262                                       output_samples_per_channel_);
 263     }
 264   }
 265 }
 266
 267 void AudioBuffer::InitForNewData() {
 268   keyboard_data_ = NULL;
 269   mixed_low_pass_valid_ = false;
 270   reference_copied_ = false;
 271   activity_ = AudioFrame::kVadUnknown;
 272 }
 273
 274 const int16_t* AudioBuffer::data(int channel) const {
 275   return channels_->ibuf_const()->channel(channel);
 276 }
 277
 278 int16_t* AudioBuffer::data(int channel) {
 279   mixed_low_pass_valid_ = false;
 280   return channels_->ibuf()->channel(channel);
 281 }
 282
 283 const float* AudioBuffer::data_f(int channel) const {
 284   return channels_->fbuf_const()->channel(channel);
 285 }
 286
 287 float* AudioBuffer::data_f(int channel) {
 288   mixed_low_pass_valid_ = false;
 289   return channels_->fbuf()->channel(channel);
 290 }
 291
 292 const int16_t* AudioBuffer::low_pass_split_data(int channel) const {
 293   return split_channels_low_.get()
 294       ? split_channels_low_->ibuf_const()->channel(channel)
 295       : data(channel);
 296 }
 297
 298 int16_t* AudioBuffer::low_pass_split_data(int channel) {
 299   mixed_low_pass_valid_ = false;
 300   return split_channels_low_.get()
 301       ? split_channels_low_->ibuf()->channel(channel)
 302       : data(channel);
 303 }
 304
 305 const float* AudioBuffer::low_pass_split_data_f(int channel) const {
 306   return split_channels_low_.get()
 307       ? split_channels_low_->fbuf_const()->channel(channel)
 308       : data_f(channel);
 309 }
 310
 311 float* AudioBuffer::low_pass_split_data_f(int channel) {
 312   mixed_low_pass_valid_ = false;
 313   return split_channels_low_.get()
 314       ? split_channels_low_->fbuf()->channel(channel)
 315       : data_f(channel);
 316 }
 317
 318 const int16_t* AudioBuffer::high_pass_split_data(int channel) const {
 319   return split_channels_high_.get()
 320       ? split_channels_high_->ibuf_const()->channel(channel)
 321       : NULL;
 322 }
 323
 324 int16_t* AudioBuffer::high_pass_split_data(int channel) {
 325   return split_channels_high_.get()
 326       ? split_channels_high_->ibuf()->channel(channel)
 327       : NULL;
 328 }
 329
 330 const float* AudioBuffer::high_pass_split_data_f(int channel) const {
 331   return split_channels_high_.get()
 332       ? split_channels_high_->fbuf_const()->channel(channel)
 333       : NULL;
 334 }
 335
 336 float* AudioBuffer::high_pass_split_data_f(int channel) {
 337   return split_channels_high_.get()
 338       ? split_channels_high_->fbuf()->channel(channel)
 339       : NULL;
 340 }
 341
 342 const int16_t* AudioBuffer::mixed_low_pass_data() {
 343   // Currently only mixing stereo to mono is supported.
 344   assert(num_proc_channels_ == 1 || num_proc_channels_ == 2);
 345
 346   if (num_proc_channels_ == 1) {
 347     return low_pass_split_data(0);
 348   }
 349
 350   if (!mixed_low_pass_valid_) {
 351     if (!mixed_low_pass_channels_.get()) {
 352       mixed_low_pass_channels_.reset(
 353           new ChannelBuffer<int16_t>(samples_per_split_channel_, 1));
 354     }
 355     StereoToMono(low_pass_split_data(0),
 356                  low_pass_split_data(1),
 357                  mixed_low_pass_channels_->data(),
 358                  samples_per_split_channel_);
 359     mixed_low_pass_valid_ = true;
 360   }
 361   return mixed_low_pass_channels_->data();
 362 }
 363
 364 const int16_t* AudioBuffer::low_pass_reference(int channel) const {
 365   if (!reference_copied_) {
 366     return NULL;
 367   }
 368
 369   return low_pass_reference_channels_->channel(channel);
 370 }
 371
 372 const float* AudioBuffer::keyboard_data() const {
 373   return keyboard_data_;
 374 }
 375
 376 SplitFilterStates* AudioBuffer::filter_states(int channel) {
 377   assert(channel >= 0 && channel < num_proc_channels_);
 378   return &filter_states_[channel];
 379 }
 380
 381 void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
 382   activity_ = activity;
 383 }
 384
 385 AudioFrame::VADActivity AudioBuffer::activity() const {
 386   return activity_;
 387 }
 388
 389 int AudioBuffer::num_channels() const {
 390   return num_proc_channels_;
 391 }
 392
 393 int AudioBuffer::samples_per_channel() const {
 394   return proc_samples_per_channel_;
 395 }
 396
 397 int AudioBuffer::samples_per_split_channel() const {
 398   return samples_per_split_channel_;
 399 }
 400
 401 int AudioBuffer::samples_per_keyboard_channel() const {
 402   // We don't resample the keyboard channel.
 403   return input_samples_per_channel_;
 404 }
 405
 406 // TODO(andrew): Do deinterleaving and mixing in one step?
 407 void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
 408   assert(proc_samples_per_channel_ == input_samples_per_channel_);
 409   assert(frame->num_channels_ == num_input_channels_);
 410   assert(frame->samples_per_channel_ ==  proc_samples_per_channel_);
 411   InitForNewData();
 412   activity_ = frame->vad_activity_;
 413
 414   if (num_input_channels_ == 2 && num_proc_channels_ == 1) {
 415     // Downmix directly; no explicit deinterleaving needed.
 416     int16_t* downmixed = channels_->ibuf()->channel(0);
 417     for (int i = 0; i < input_samples_per_channel_; ++i) {
 418       // HACK(ajm): The downmixing in the int16_t path is in practice never
 419       // called from production code. We do this weird scaling to and from float
 420       // to satisfy tests checking for bit-exactness with the float path.
 421       float downmix_float = (ScaleToFloat(frame->data_[i * 2]) +
 422                              ScaleToFloat(frame->data_[i * 2 + 1])) / 2;
 423       downmixed[i] = ScaleAndRoundToInt16(downmix_float);
 424     }
 425   } else {
 426     assert(num_proc_channels_ == num_input_channels_);
 427     int16_t* interleaved = frame->data_;
 428     for (int i = 0; i < num_proc_channels_; ++i) {
 429       int16_t* deinterleaved = channels_->ibuf()->channel(i);
 430       int interleaved_idx = i;
 431       for (int j = 0; j < proc_samples_per_channel_; ++j) {
 432         deinterleaved[j] = interleaved[interleaved_idx];
 433         interleaved_idx += num_proc_channels_;
 434       }
 435     }
 436   }
 437 }
 438
 439 void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) const {
 440   assert(proc_samples_per_channel_ == output_samples_per_channel_);
 441   assert(num_proc_channels_ == num_input_channels_);
 442   assert(frame->num_channels_ == num_proc_channels_);
 443   assert(frame->samples_per_channel_ == proc_samples_per_channel_);
 444   frame->vad_activity_ = activity_;
 445
 446   if (!data_changed) {
 447     return;
 448   }
 449
 450   int16_t* interleaved = frame->data_;
 451   for (int i = 0; i < num_proc_channels_; i++) {
 452     int16_t* deinterleaved = channels_->ibuf()->channel(i);
 453     int interleaved_idx = i;
 454     for (int j = 0; j < proc_samples_per_channel_; j++) {
 455       interleaved[interleaved_idx] = deinterleaved[j];
 456       interleaved_idx += num_proc_channels_;
 457     }
 458   }
 459 }
 460
 461 void AudioBuffer::CopyLowPassToReference() {
 462   reference_copied_ = true;
 463   if (!low_pass_reference_channels_.get()) {
 464     low_pass_reference_channels_.reset(
 465         new ChannelBuffer<int16_t>(samples_per_split_channel_,
 466                                    num_proc_channels_));
 467   }
 468   for (int i = 0; i < num_proc_channels_; i++) {
 469     low_pass_reference_channels_->CopyFrom(low_pass_split_data(i), i);
 470   }
 471 }
 472
 473 }  // namespace webrtc