1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/filters/audio_renderer_algorithm.h"
10 #include "base/logging.h"
11 #include "base/memory/scoped_ptr.h"
12 #include "media/base/audio_buffer.h"
13 #include "media/base/audio_bus.h"
14 #include "media/base/limits.h"
15 #include "media/filters/wsola_internals.h"
20 // Waveform Similarity Overlap-and-add (WSOLA).
22 // One WSOLA iteration
24 // 1) Extract |target_block_| as input frames at indices
25 // [|target_block_index_|, |target_block_index_| + |ola_window_size_|).
26 // Note that |target_block_| is the "natural" continuation of the output.
28 // 2) Extract |search_block_| as input frames at indices
29 // [|search_block_index_|,
30 // |search_block_index_| + |num_candidate_blocks_| + |ola_window_size_|).
32 // 3) Find a block within the |search_block_| that is most similar
33 // to |target_block_|. Let |optimal_index| be the index of such block and
34 // write it to |optimal_block_|.
37 // |optimal_block_| = |transition_window_| * |target_block_| +
38 // (1 - |transition_window_|) * |optimal_block_|.
40 // 5) Overlap-and-add |optimal_block_| to the |wsola_output_|.
43 // |target_block_| = |optimal_index| + |ola_window_size_| / 2.
44 // |output_index_| = |output_index_| + |ola_window_size_| / 2,
45 // |search_block_center_offset_| = |output_index_| * |playback_rate_|, and
46 // |search_block_index_| = |search_block_center_offset_| -
47 // |search_block_center_offset_|.
49 // Overlap-and-add window size in milliseconds.
50 static const int kOlaWindowSizeMs = 20;
52 // Size of search interval in milliseconds. The search interval is
53 // [-delta delta] around |output_index_| * |playback_rate_|. So the search
54 // interval is 2 * delta.
55 static const int kWsolaSearchIntervalMs = 30;
57 // The maximum size in seconds for the |audio_buffer_|. Arbitrarily determined.
58 static const int kMaxCapacityInSeconds = 3;
60 // The starting size in frames for |audio_buffer_|. Previous usage maintained a
61 // queue of 16 AudioBuffers, each of 512 frames. This worked well, so we
62 // maintain this number of frames.
63 static const int kStartingBufferSizeInFrames = 16 * 512;
65 COMPILE_ASSERT(kStartingBufferSizeInFrames <
66 (kMaxCapacityInSeconds * limits::kMinSampleRate),
67 max_capacity_smaller_than_starting_buffer_size);
69 AudioRendererAlgorithm::AudioRendererAlgorithm()
71 samples_per_second_(0),
73 capacity_(kStartingBufferSizeInFrames),
75 search_block_center_offset_(0),
76 search_block_index_(0),
77 num_candidate_blocks_(0),
78 target_block_index_(0),
81 num_complete_frames_(0) {
84 AudioRendererAlgorithm::~AudioRendererAlgorithm() {}
86 void AudioRendererAlgorithm::Initialize(float initial_playback_rate,
87 const AudioParameters& params) {
88 CHECK(params.IsValid());
90 channels_ = params.channels();
91 samples_per_second_ = params.sample_rate();
92 SetPlaybackRate(initial_playback_rate);
93 num_candidate_blocks_ = (kWsolaSearchIntervalMs * samples_per_second_) / 1000;
94 ola_window_size_ = kOlaWindowSizeMs * samples_per_second_ / 1000;
96 // Make sure window size in an even number.
97 ola_window_size_ += ola_window_size_ & 1;
98 ola_hop_size_ = ola_window_size_ / 2;
100 // |num_candidate_blocks_| / 2 is the offset of the center of the search
101 // block to the center of the first (left most) candidate block. The offset
102 // of the center of a candidate block to its left most point is
103 // |ola_window_size_| / 2 - 1. Note that |ola_window_size_| is even and in
104 // our convention the center belongs to the left half, so we need to subtract
105 // one frame to get the correct offset.
108 // <------------------------------------------->
110 // |ola_window_size_| / 2 - 1
113 // |num_candidate_blocks_| / 2
116 // X----X----------------X---------------X-----X
117 // <----------> <---------->
118 // Candidate ... Candidate
119 // 1, ... |num_candidate_blocks_|
120 search_block_center_offset_ = num_candidate_blocks_ / 2 +
121 (ola_window_size_ / 2 - 1);
123 ola_window_.reset(new float[ola_window_size_]);
124 internal::GetSymmetricHanningWindow(ola_window_size_, ola_window_.get());
126 transition_window_.reset(new float[ola_window_size_ * 2]);
127 internal::GetSymmetricHanningWindow(2 * ola_window_size_,
128 transition_window_.get());
130 wsola_output_ = AudioBus::Create(channels_, ola_window_size_ + ola_hop_size_);
131 wsola_output_->Zero(); // Initialize for overlap-and-add of the first block.
133 // Auxiliary containers.
134 optimal_block_ = AudioBus::Create(channels_, ola_window_size_);
135 search_block_ = AudioBus::Create(
136 channels_, num_candidate_blocks_ + (ola_window_size_ - 1));
137 target_block_ = AudioBus::Create(channels_, ola_window_size_);
140 int AudioRendererAlgorithm::FillBuffer(AudioBus* dest, int requested_frames) {
141 if (playback_rate_ == 0)
144 DCHECK_EQ(channels_, dest->channels());
146 int slower_step = ceil(ola_window_size_ * playback_rate_);
147 int faster_step = ceil(ola_window_size_ / playback_rate_);
149 // Optimize the most common |playback_rate_| ~= 1 case to use a single copy
150 // instead of copying frame by frame.
151 if (ola_window_size_ <= faster_step && slower_step >= ola_window_size_) {
152 const int frames_to_copy =
153 std::min(audio_buffer_.frames(), requested_frames);
154 const int frames_read = audio_buffer_.ReadFrames(frames_to_copy, 0, dest);
155 DCHECK_EQ(frames_read, frames_to_copy);
159 int rendered_frames = 0;
161 rendered_frames += WriteCompletedFramesTo(
162 requested_frames - rendered_frames, rendered_frames, dest);
163 } while (rendered_frames < requested_frames && RunOneWsolaIteration());
164 return rendered_frames;
167 void AudioRendererAlgorithm::SetPlaybackRate(float new_rate) {
168 DCHECK_GE(new_rate, 0);
169 playback_rate_ = new_rate;
172 void AudioRendererAlgorithm::FlushBuffers() {
173 // Clear the queue of decoded packets (releasing the buffers).
174 audio_buffer_.Clear();
176 search_block_index_ = 0;
177 target_block_index_ = 0;
178 wsola_output_->Zero();
179 num_complete_frames_ = 0;
181 // Reset |capacity_| so growth triggered by underflows doesn't penalize
183 capacity_ = kStartingBufferSizeInFrames;
186 base::TimeDelta AudioRendererAlgorithm::GetTime() {
187 return audio_buffer_.current_time();
190 void AudioRendererAlgorithm::EnqueueBuffer(
191 const scoped_refptr<AudioBuffer>& buffer_in) {
192 DCHECK(!buffer_in->end_of_stream());
193 audio_buffer_.Append(buffer_in);
196 bool AudioRendererAlgorithm::IsQueueFull() {
197 return audio_buffer_.frames() >= capacity_;
200 void AudioRendererAlgorithm::IncreaseQueueCapacity() {
201 int max_capacity = kMaxCapacityInSeconds * samples_per_second_;
202 DCHECK_LE(capacity_, max_capacity);
204 capacity_ = std::min(2 * capacity_, max_capacity);
207 bool AudioRendererAlgorithm::CanPerformWsola() const {
208 const int search_block_size = num_candidate_blocks_ + (ola_window_size_ - 1);
209 const int frames = audio_buffer_.frames();
210 return target_block_index_ + ola_window_size_ <= frames &&
211 search_block_index_ + search_block_size <= frames;
214 bool AudioRendererAlgorithm::RunOneWsolaIteration() {
215 if (!CanPerformWsola())
221 for (int k = 0; k < channels_; ++k) {
222 const float* const ch_opt_frame = optimal_block_->channel(k);
223 float* ch_output = wsola_output_->channel(k) + num_complete_frames_;
224 for (int n = 0; n < ola_hop_size_; ++n) {
225 ch_output[n] = ch_output[n] * ola_window_[ola_hop_size_ + n] +
226 ch_opt_frame[n] * ola_window_[n];
229 // Copy the second half to the output.
230 memcpy(&ch_output[ola_hop_size_], &ch_opt_frame[ola_hop_size_],
231 sizeof(*ch_opt_frame) * ola_hop_size_);
234 num_complete_frames_ += ola_hop_size_;
235 UpdateOutputTime(ola_hop_size_);
236 RemoveOldInputFrames();
240 void AudioRendererAlgorithm::UpdateOutputTime(double time_change) {
241 output_time_ += time_change;
242 // Center of the search region, in frames.
243 const int search_block_center_index = static_cast<int>(
244 output_time_ * playback_rate_ + 0.5);
245 search_block_index_ = search_block_center_index - search_block_center_offset_;
248 void AudioRendererAlgorithm::RemoveOldInputFrames() {
249 const int earliest_used_index = std::min(target_block_index_,
250 search_block_index_);
251 if (earliest_used_index <= 0)
252 return; // Nothing to remove.
254 // Remove frames from input and adjust indices accordingly.
255 audio_buffer_.SeekFrames(earliest_used_index);
256 target_block_index_ -= earliest_used_index;
258 // Adjust output index.
259 double output_time_change = static_cast<double>(earliest_used_index) /
261 CHECK_GE(output_time_, output_time_change);
262 UpdateOutputTime(-output_time_change);
265 int AudioRendererAlgorithm::WriteCompletedFramesTo(
266 int requested_frames, int dest_offset, AudioBus* dest) {
267 int rendered_frames = std::min(num_complete_frames_, requested_frames);
269 if (rendered_frames == 0)
270 return 0; // There is nothing to read from |wsola_output_|, return.
272 wsola_output_->CopyPartialFramesTo(0, rendered_frames, dest_offset, dest);
274 // Remove the frames which are read.
275 int frames_to_move = wsola_output_->frames() - rendered_frames;
276 for (int k = 0; k < channels_; ++k) {
277 float* ch = wsola_output_->channel(k);
278 memmove(ch, &ch[rendered_frames], sizeof(*ch) * frames_to_move);
280 num_complete_frames_ -= rendered_frames;
281 return rendered_frames;
284 bool AudioRendererAlgorithm::TargetIsWithinSearchRegion() const {
285 const int search_block_size = num_candidate_blocks_ + (ola_window_size_ - 1);
287 return target_block_index_ >= search_block_index_ &&
288 target_block_index_ + ola_window_size_ <=
289 search_block_index_ + search_block_size;
292 void AudioRendererAlgorithm::GetOptimalBlock() {
293 int optimal_index = 0;
295 // An interval around last optimal block which is excluded from the search.
296 // This is to reduce the buzzy sound. The number 160 is rather arbitrary and
297 // derived heuristically.
298 const int kExcludeIntervalLengthFrames = 160;
299 if (TargetIsWithinSearchRegion()) {
300 optimal_index = target_block_index_;
301 PeekAudioWithZeroPrepend(optimal_index, optimal_block_.get());
303 PeekAudioWithZeroPrepend(target_block_index_, target_block_.get());
304 PeekAudioWithZeroPrepend(search_block_index_, search_block_.get());
305 int last_optimal = target_block_index_ - ola_hop_size_ -
307 internal::Interval exclude_iterval = std::make_pair(
308 last_optimal - kExcludeIntervalLengthFrames / 2,
309 last_optimal + kExcludeIntervalLengthFrames / 2);
311 // |optimal_index| is in frames and it is relative to the beginning of the
313 optimal_index = internal::OptimalIndex(
314 search_block_.get(), target_block_.get(), exclude_iterval);
316 // Translate |index| w.r.t. the beginning of |audio_buffer_| and extract the
318 optimal_index += search_block_index_;
319 PeekAudioWithZeroPrepend(optimal_index, optimal_block_.get());
321 // Make a transition from target block to the optimal block if different.
322 // Target block has the best continuation to the current output.
323 // Optimal block is the most similar block to the target, however, it might
324 // introduce some discontinuity when over-lap-added. Therefore, we combine
325 // them for a smoother transition. The length of transition window is twice
326 // as that of the optimal-block which makes it like a weighting function
327 // where target-block has higher weight close to zero (weight of 1 at index
328 // 0) and lower weight close the end.
329 for (int k = 0; k < channels_; ++k) {
330 float* ch_opt = optimal_block_->channel(k);
331 const float* const ch_target = target_block_->channel(k);
332 for (int n = 0; n < ola_window_size_; ++n) {
333 ch_opt[n] = ch_opt[n] * transition_window_[n] + ch_target[n] *
334 transition_window_[ola_window_size_ + n];
339 // Next target is one hop ahead of the current optimal.
340 target_block_index_ = optimal_index + ola_hop_size_;
343 void AudioRendererAlgorithm::PeekAudioWithZeroPrepend(
344 int read_offset_frames, AudioBus* dest) {
345 CHECK_LE(read_offset_frames + dest->frames(), audio_buffer_.frames());
347 int write_offset = 0;
348 int num_frames_to_read = dest->frames();
349 if (read_offset_frames < 0) {
350 int num_zero_frames_appended = std::min(-read_offset_frames,
352 read_offset_frames = 0;
353 num_frames_to_read -= num_zero_frames_appended;
354 write_offset = num_zero_frames_appended;
355 dest->ZeroFrames(num_zero_frames_appended);
357 audio_buffer_.PeekFrames(num_frames_to_read, read_offset_frames,