src/media/filters/audio_renderer_algorithm.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/filters/audio_renderer_algorithm.h"
   6
   7 #include <algorithm>
   8 #include <cmath>
   9
  10 #include "base/logging.h"
  11 #include "base/memory/scoped_ptr.h"
  12 #include "media/base/audio_buffer.h"
  13 #include "media/base/audio_bus.h"
  14 #include "media/base/limits.h"
  15 #include "media/filters/wsola_internals.h"
  16
  17 namespace media {
  18
  19
  20 // Waveform Similarity Overlap-and-add (WSOLA).
  21 //
  22 // One WSOLA iteration
  23 //
  24 // 1) Extract |target_block_| as input frames at indices
  25 //    [|target_block_index_|, |target_block_index_| + |ola_window_size_|).
  26 //    Note that |target_block_| is the "natural" continuation of the output.
  27 //
  28 // 2) Extract |search_block_| as input frames at indices
  29 //    [|search_block_index_|,
  30 //     |search_block_index_| + |num_candidate_blocks_| + |ola_window_size_|).
  31 //
  32 // 3) Find a block within the |search_block_| that is most similar
  33 //    to |target_block_|. Let |optimal_index| be the index of such block and
  34 //    write it to |optimal_block_|.
  35 //
  36 // 4) Update:
  37 //    |optimal_block_| = |transition_window_| * |target_block_| +
  38 //    (1 - |transition_window_|) * |optimal_block_|.
  39 //
  40 // 5) Overlap-and-add |optimal_block_| to the |wsola_output_|.
  41 //
  42 // 6) Update:
  43 //    |target_block_| = |optimal_index| + |ola_window_size_| / 2.
  44 //    |output_index_| = |output_index_| + |ola_window_size_| / 2,
  45 //    |search_block_center_offset_| = |output_index_| * |playback_rate_|, and
  46 //    |search_block_index_| = |search_block_center_offset_| -
  47 //        |search_block_center_offset_|.
  48
  49 // Overlap-and-add window size in milliseconds.
  50 static const int kOlaWindowSizeMs = 20;
  51
  52 // Size of search interval in milliseconds. The search interval is
  53 // [-delta delta] around |output_index_| * |playback_rate_|. So the search
  54 // interval is 2 * delta.
  55 static const int kWsolaSearchIntervalMs = 30;
  56
  57 // The maximum size in seconds for the |audio_buffer_|. Arbitrarily determined.
  58 static const int kMaxCapacityInSeconds = 3;
  59
  60 // The starting size in frames for |audio_buffer_|. Previous usage maintained a
  61 // queue of 16 AudioBuffers, each of 512 frames. This worked well, so we
  62 // maintain this number of frames.
  63 static const int kStartingBufferSizeInFrames = 16 * 512;
  64
  65 COMPILE_ASSERT(kStartingBufferSizeInFrames <
  66                (kMaxCapacityInSeconds * limits::kMinSampleRate),
  67                max_capacity_smaller_than_starting_buffer_size);
  68
  69 AudioRendererAlgorithm::AudioRendererAlgorithm()
  70     : channels_(0),
  71       samples_per_second_(0),
  72       playback_rate_(0),
  73       capacity_(kStartingBufferSizeInFrames),
  74       output_time_(0.0),
  75       search_block_center_offset_(0),
  76       search_block_index_(0),
  77       num_candidate_blocks_(0),
  78       target_block_index_(0),
  79       ola_window_size_(0),
  80       ola_hop_size_(0),
  81       num_complete_frames_(0) {
  82 }
  83
  84 AudioRendererAlgorithm::~AudioRendererAlgorithm() {}
  85
  86 void AudioRendererAlgorithm::Initialize(float initial_playback_rate,
  87                                         const AudioParameters& params) {
  88   CHECK(params.IsValid());
  89
  90   channels_ = params.channels();
  91   samples_per_second_ = params.sample_rate();
  92   SetPlaybackRate(initial_playback_rate);
  93   num_candidate_blocks_ = (kWsolaSearchIntervalMs * samples_per_second_) / 1000;
  94   ola_window_size_ = kOlaWindowSizeMs * samples_per_second_ / 1000;
  95
  96   // Make sure window size in an even number.
  97   ola_window_size_ += ola_window_size_ & 1;
  98   ola_hop_size_ = ola_window_size_ / 2;
  99
 100   // |num_candidate_blocks_| / 2 is the offset of the center of the search
 101   // block to the center of the first (left most) candidate block. The offset
 102   // of the center of a candidate block to its left most point is
 103   // |ola_window_size_| / 2 - 1. Note that |ola_window_size_| is even and in
 104   // our convention the center belongs to the left half, so we need to subtract
 105   // one frame to get the correct offset.
 106   //
 107   //                             Search Block
 108   //              <------------------------------------------->
 109   //
 110   //   |ola_window_size_| / 2 - 1
 111   //              <----
 112   //
 113   //             |num_candidate_blocks_| / 2
 114   //                   <----------------
 115   //                                 center
 116   //              X----X----------------X---------------X-----X
 117   //              <---------->                     <---------->
 118   //                Candidate      ...               Candidate
 119   //                   1,          ...         |num_candidate_blocks_|
 120   search_block_center_offset_ = num_candidate_blocks_ / 2 +
 121       (ola_window_size_ / 2 - 1);
 122
 123   ola_window_.reset(new float[ola_window_size_]);
 124   internal::GetSymmetricHanningWindow(ola_window_size_, ola_window_.get());
 125
 126   transition_window_.reset(new float[ola_window_size_ * 2]);
 127   internal::GetSymmetricHanningWindow(2 * ola_window_size_,
 128                                       transition_window_.get());
 129
 130   wsola_output_ = AudioBus::Create(channels_, ola_window_size_ + ola_hop_size_);
 131   wsola_output_->Zero();  // Initialize for overlap-and-add of the first block.
 132
 133   // Auxiliary containers.
 134   optimal_block_ = AudioBus::Create(channels_, ola_window_size_);
 135   search_block_ = AudioBus::Create(
 136       channels_, num_candidate_blocks_ + (ola_window_size_ - 1));
 137   target_block_ = AudioBus::Create(channels_, ola_window_size_);
 138 }
 139
 140 int AudioRendererAlgorithm::FillBuffer(AudioBus* dest, int requested_frames) {
 141   if (playback_rate_ == 0)
 142     return 0;
 143
 144   DCHECK_EQ(channels_, dest->channels());
 145
 146   int slower_step = ceil(ola_window_size_ * playback_rate_);
 147   int faster_step = ceil(ola_window_size_ / playback_rate_);
 148
 149   // Optimize the most common |playback_rate_| ~= 1 case to use a single copy
 150   // instead of copying frame by frame.
 151   if (ola_window_size_ <= faster_step && slower_step >= ola_window_size_) {
 152     const int frames_to_copy =
 153         std::min(audio_buffer_.frames(), requested_frames);
 154     const int frames_read = audio_buffer_.ReadFrames(frames_to_copy, 0, dest);
 155     DCHECK_EQ(frames_read, frames_to_copy);
 156     return frames_read;
 157   }
 158
 159   int rendered_frames = 0;
 160   do {
 161     rendered_frames += WriteCompletedFramesTo(
 162         requested_frames - rendered_frames, rendered_frames, dest);
 163   } while (rendered_frames < requested_frames && RunOneWsolaIteration());
 164   return rendered_frames;
 165 }
 166
 167 void AudioRendererAlgorithm::SetPlaybackRate(float new_rate) {
 168   DCHECK_GE(new_rate, 0);
 169   playback_rate_ = new_rate;
 170 }
 171
 172 void AudioRendererAlgorithm::FlushBuffers() {
 173   // Clear the queue of decoded packets (releasing the buffers).
 174   audio_buffer_.Clear();
 175   output_time_ = 0.0;
 176   search_block_index_ = 0;
 177   target_block_index_ = 0;
 178   wsola_output_->Zero();
 179   num_complete_frames_ = 0;
 180
 181   // Reset |capacity_| so growth triggered by underflows doesn't penalize
 182   // seek time.
 183   capacity_ = kStartingBufferSizeInFrames;
 184 }
 185
 186 base::TimeDelta AudioRendererAlgorithm::GetTime() {
 187   return audio_buffer_.current_time();
 188 }
 189
 190 void AudioRendererAlgorithm::EnqueueBuffer(
 191     const scoped_refptr<AudioBuffer>& buffer_in) {
 192   DCHECK(!buffer_in->end_of_stream());
 193   audio_buffer_.Append(buffer_in);
 194 }
 195
 196 bool AudioRendererAlgorithm::IsQueueFull() {
 197   return audio_buffer_.frames() >= capacity_;
 198 }
 199
 200 void AudioRendererAlgorithm::IncreaseQueueCapacity() {
 201   int max_capacity = kMaxCapacityInSeconds * samples_per_second_;
 202   DCHECK_LE(capacity_, max_capacity);
 203
 204   capacity_ = std::min(2 * capacity_, max_capacity);
 205 }
 206
 207 bool AudioRendererAlgorithm::CanPerformWsola() const {
 208   const int search_block_size = num_candidate_blocks_ + (ola_window_size_ - 1);
 209   const int frames = audio_buffer_.frames();
 210   return target_block_index_ + ola_window_size_ <= frames &&
 211       search_block_index_ + search_block_size <= frames;
 212 }
 213
 214 bool AudioRendererAlgorithm::RunOneWsolaIteration() {
 215   if (!CanPerformWsola())
 216     return false;
 217
 218   GetOptimalBlock();
 219
 220   // Overlap-and-add.
 221   for (int k = 0; k < channels_; ++k) {
 222     const float* const ch_opt_frame = optimal_block_->channel(k);
 223     float* ch_output = wsola_output_->channel(k) + num_complete_frames_;
 224     for (int n = 0; n < ola_hop_size_; ++n) {
 225       ch_output[n] = ch_output[n] * ola_window_[ola_hop_size_ + n] +
 226           ch_opt_frame[n] * ola_window_[n];
 227     }
 228
 229     // Copy the second half to the output.
 230     memcpy(&ch_output[ola_hop_size_], &ch_opt_frame[ola_hop_size_],
 231            sizeof(*ch_opt_frame) * ola_hop_size_);
 232   }
 233
 234   num_complete_frames_ += ola_hop_size_;
 235   UpdateOutputTime(ola_hop_size_);
 236   RemoveOldInputFrames();
 237   return true;
 238 }
 239
 240 void AudioRendererAlgorithm::UpdateOutputTime(double time_change) {
 241   output_time_ += time_change;
 242   // Center of the search region, in frames.
 243   const int search_block_center_index = static_cast<int>(
 244       output_time_ * playback_rate_ + 0.5);
 245   search_block_index_ = search_block_center_index - search_block_center_offset_;
 246 }
 247
 248 void AudioRendererAlgorithm::RemoveOldInputFrames() {
 249   const int earliest_used_index = std::min(target_block_index_,
 250                                            search_block_index_);
 251   if (earliest_used_index <= 0)
 252     return;  // Nothing to remove.
 253
 254   // Remove frames from input and adjust indices accordingly.
 255   audio_buffer_.SeekFrames(earliest_used_index);
 256   target_block_index_ -= earliest_used_index;
 257
 258   // Adjust output index.
 259   double output_time_change = static_cast<double>(earliest_used_index) /
 260       playback_rate_;
 261   CHECK_GE(output_time_, output_time_change);
 262   UpdateOutputTime(-output_time_change);
 263 }
 264
 265 int AudioRendererAlgorithm::WriteCompletedFramesTo(
 266     int requested_frames, int dest_offset, AudioBus* dest) {
 267   int rendered_frames = std::min(num_complete_frames_, requested_frames);
 268
 269   if (rendered_frames == 0)
 270     return 0;  // There is nothing to read from |wsola_output_|, return.
 271
 272   wsola_output_->CopyPartialFramesTo(0, rendered_frames, dest_offset, dest);
 273
 274   // Remove the frames which are read.
 275   int frames_to_move = wsola_output_->frames() - rendered_frames;
 276   for (int k = 0; k < channels_; ++k) {
 277     float* ch = wsola_output_->channel(k);
 278     memmove(ch, &ch[rendered_frames], sizeof(*ch) * frames_to_move);
 279   }
 280   num_complete_frames_ -= rendered_frames;
 281   return rendered_frames;
 282 }
 283
 284 bool AudioRendererAlgorithm::TargetIsWithinSearchRegion() const {
 285   const int search_block_size = num_candidate_blocks_ + (ola_window_size_ - 1);
 286
 287   return target_block_index_ >= search_block_index_ &&
 288       target_block_index_ + ola_window_size_ <=
 289       search_block_index_ + search_block_size;
 290 }
 291
 292 void AudioRendererAlgorithm::GetOptimalBlock() {
 293   int optimal_index = 0;
 294
 295   // An interval around last optimal block which is excluded from the search.
 296   // This is to reduce the buzzy sound. The number 160 is rather arbitrary and
 297   // derived heuristically.
 298   const int kExcludeIntervalLengthFrames = 160;
 299   if (TargetIsWithinSearchRegion()) {
 300     optimal_index = target_block_index_;
 301     PeekAudioWithZeroPrepend(optimal_index, optimal_block_.get());
 302   } else {
 303     PeekAudioWithZeroPrepend(target_block_index_, target_block_.get());
 304     PeekAudioWithZeroPrepend(search_block_index_, search_block_.get());
 305     int last_optimal = target_block_index_ - ola_hop_size_ -
 306         search_block_index_;
 307     internal::Interval exclude_iterval = std::make_pair(
 308         last_optimal - kExcludeIntervalLengthFrames / 2,
 309         last_optimal + kExcludeIntervalLengthFrames / 2);
 310
 311     // |optimal_index| is in frames and it is relative to the beginning of the
 312     // |search_block_|.
 313     optimal_index = internal::OptimalIndex(
 314         search_block_.get(), target_block_.get(), exclude_iterval);
 315
 316     // Translate |index| w.r.t. the beginning of |audio_buffer_| and extract the
 317     // optimal block.
 318     optimal_index += search_block_index_;
 319     PeekAudioWithZeroPrepend(optimal_index, optimal_block_.get());
 320
 321     // Make a transition from target block to the optimal block if different.
 322     // Target block has the best continuation to the current output.
 323     // Optimal block is the most similar block to the target, however, it might
 324     // introduce some discontinuity when over-lap-added. Therefore, we combine
 325     // them for a smoother transition. The length of transition window is twice
 326     // as that of the optimal-block which makes it like a weighting function
 327     // where target-block has higher weight close to zero (weight of 1 at index
 328     // 0) and lower weight close the end.
 329     for (int k = 0; k < channels_; ++k) {
 330       float* ch_opt = optimal_block_->channel(k);
 331       const float* const ch_target = target_block_->channel(k);
 332       for (int n = 0; n < ola_window_size_; ++n) {
 333         ch_opt[n] = ch_opt[n] * transition_window_[n] + ch_target[n] *
 334             transition_window_[ola_window_size_ + n];
 335       }
 336     }
 337   }
 338
 339   // Next target is one hop ahead of the current optimal.
 340   target_block_index_ = optimal_index + ola_hop_size_;
 341 }
 342
 343 void AudioRendererAlgorithm::PeekAudioWithZeroPrepend(
 344     int read_offset_frames, AudioBus* dest) {
 345   CHECK_LE(read_offset_frames + dest->frames(), audio_buffer_.frames());
 346
 347   int write_offset = 0;
 348   int num_frames_to_read = dest->frames();
 349   if (read_offset_frames < 0) {
 350     int num_zero_frames_appended = std::min(-read_offset_frames,
 351                                             num_frames_to_read);
 352     read_offset_frames = 0;
 353     num_frames_to_read -= num_zero_frames_appended;
 354     write_offset = num_zero_frames_appended;
 355     dest->ZeroFrames(num_zero_frames_appended);
 356   }
 357   audio_buffer_.PeekFrames(num_frames_to_read, read_offset_frames,
 358                            write_offset, dest);
 359 }
 360
 361 }  // namespace media