1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // AudioRendererAlgorithm buffers and transforms audio data. The owner of
6 // this object provides audio data to the object through EnqueueBuffer() and
7 // requests data from the buffer via FillBuffer().
9 // This class is *not* thread-safe. Calls to enqueue and retrieve data must be
10 // locked if called from multiple threads.
12 // AudioRendererAlgorithm uses the Waveform Similarity Overlap and Add (WSOLA)
13 // algorithm to stretch or compress audio data to meet playback speeds less than
14 // or greater than the natural playback of the audio stream. The algorithm
15 // preserves local properties of the audio, therefore, pitch and harmonics are
16 // are preserved. See audio_renderer_algorith.cc for a more elaborate
17 // description of the algorithm.
19 // Audio at very low or very high playback rates are muted to preserve quality.
21 #ifndef MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_
22 #define MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_
24 #include "base/memory/ref_counted.h"
25 #include "base/memory/scoped_ptr.h"
26 #include "media/audio/audio_parameters.h"
27 #include "media/base/audio_buffer.h"
28 #include "media/base/audio_buffer_queue.h"
34 class MEDIA_EXPORT AudioRendererAlgorithm {
36 AudioRendererAlgorithm();
37 ~AudioRendererAlgorithm();
39 // Initializes this object with information about the audio stream.
40 void Initialize(const AudioParameters& params);
42 // Tries to fill |requested_frames| frames into |dest| with possibly scaled
43 // data from our |audio_buffer_|. Data is scaled based on |playback_rate|,
44 // using a variation of the Overlap-Add method to combine sample windows.
46 // Data from |audio_buffer_| is consumed in proportion to the playback rate.
48 // Returns the number of frames copied into |dest|.
49 int FillBuffer(AudioBus* dest, int requested_frames, float playback_rate);
51 // Clears |audio_buffer_|.
54 // Enqueues a buffer. It is called from the owner of the algorithm after a
56 void EnqueueBuffer(const scoped_refptr<AudioBuffer>& buffer_in);
58 // Returns true if |audio_buffer_| is at or exceeds capacity.
61 // Returns the capacity of |audio_buffer_| in frames.
62 int QueueCapacity() const { return capacity_; }
64 // Increase the capacity of |audio_buffer_| if possible.
65 void IncreaseQueueCapacity();
67 // Returns the number of frames left in |audio_buffer_|, which may be larger
68 // than QueueCapacity() in the event that EnqueueBuffer() delivered more data
69 // than |audio_buffer_| was intending to hold.
70 int frames_buffered() { return audio_buffer_.frames(); }
72 // Returns the samples per second for this audio stream.
73 int samples_per_second() { return samples_per_second_; }
76 // Within |search_block_|, find the block of data that is most similar to
77 // |target_block_|, and write it in |optimal_block_|. This method assumes that
78 // there is enough data to perform a search, i.e. |search_block_| and
79 // |target_block_| can be extracted from the available frames.
80 void GetOptimalBlock();
82 // Read a maximum of |requested_frames| frames from |wsola_output_|. Returns
83 // number of frames actually read.
84 int WriteCompletedFramesTo(
85 int requested_frames, int output_offset, AudioBus* dest);
87 // Fill |dest| with frames from |audio_buffer_| starting from frame
88 // |read_offset_frames|. |dest| is expected to have the same number of
89 // channels as |audio_buffer_|. A negative offset, i.e.
90 // |read_offset_frames| < 0, is accepted assuming that |audio_buffer| is zero
91 // for negative indices. This might happen for few first frames. This method
92 // assumes there is enough frames to fill |dest|, i.e. |read_offset_frames| +
93 // |dest->frames()| does not extend to future.
94 void PeekAudioWithZeroPrepend(int read_offset_frames, AudioBus* dest);
96 // Run one iteration of WSOLA, if there are sufficient frames. This will
97 // overlap-and-add one block to |wsola_output_|, hence, |num_complete_frames_|
98 // is incremented by |ola_hop_size_|.
99 bool RunOneWsolaIteration(float playback_rate);
101 // Seek |audio_buffer_| forward to remove frames from input that are not used
102 // any more. State of the WSOLA will be updated accordingly.
103 void RemoveOldInputFrames(float playback_rate);
105 // Update |output_time_| by |time_change|. In turn |search_block_index_| is
107 void UpdateOutputTime(float playback_rate, double time_change);
109 // Is |target_block_| fully within |search_block_|? If so, we don't need to
110 // perform the search.
111 bool TargetIsWithinSearchRegion() const;
113 // Do we have enough data to perform one round of WSOLA?
114 bool CanPerformWsola() const;
116 // Number of channels in audio stream.
119 // Sample rate of audio stream.
120 int samples_per_second_;
122 // Buffered audio data.
123 AudioBufferQueue audio_buffer_;
125 // If muted, keep track of partial frames that should have been skipped over.
126 double muted_partial_frame_;
128 // How many frames to have in the queue before we report the queue is full.
131 // Book keeping of the current time of generated audio, in frames. This
132 // should be appropriately updated when out samples are generated, regardless
133 // of whether we push samples out when FillBuffer() is called or we store
134 // audio in |wsola_output_| for the subsequent calls to FillBuffer().
135 // Furthermore, if samples from |audio_buffer_| are evicted then this
136 // member variable should be updated based on |playback_rate_|.
137 // Note that this member should be updated ONLY by calling UpdateOutputTime(),
138 // so that |search_block_index_| is update accordingly.
141 // The offset of the center frame of |search_block_| w.r.t. its first frame.
142 int search_block_center_offset_;
144 // Index of the beginning of the |search_block_|, in frames.
145 int search_block_index_;
147 // Number of Blocks to search to find the most similar one to the target
149 int num_candidate_blocks_;
151 // Index of the beginning of the target block, counted in frames.
152 int target_block_index_;
154 // Overlap-and-add window size in frames.
155 int ola_window_size_;
157 // The hop size of overlap-and-add in frames. This implementation assumes 50%
161 // Number of frames in |wsola_output_| that overlap-and-add is completed for
162 // them and can be copied to output if FillBuffer() is called. It also
163 // specifies the index where the next WSOLA window has to overlap-and-add.
164 int num_complete_frames_;
166 // This stores a part of the output that is created but couldn't be rendered.
167 // Output is generated frame-by-frame which at some point might exceed the
168 // number of requested samples. Furthermore, due to overlap-and-add,
169 // the last half-window of the output is incomplete, which is stored in this
171 scoped_ptr<AudioBus> wsola_output_;
173 // Overlap-and-add window.
174 scoped_ptr<float[]> ola_window_;
176 // Transition window, used to update |optimal_block_| by a weighted sum of
177 // |optimal_block_| and |target_block_|.
178 scoped_ptr<float[]> transition_window_;
180 // Auxiliary variables to avoid allocation in every iteration.
182 // Stores the optimal block in every iteration. This is the most
183 // similar block to |target_block_| within |search_block_| and it is
184 // overlap-and-added to |wsola_output_|.
185 scoped_ptr<AudioBus> optimal_block_;
187 // A block of data that search is performed over to find the |optimal_block_|.
188 scoped_ptr<AudioBus> search_block_;
190 // Stores the target block, denoted as |target| above. |search_block_| is
191 // searched for a block (|optimal_block_|) that is most similar to
193 scoped_ptr<AudioBus> target_block_;
195 DISALLOW_COPY_AND_ASSIGN(AudioRendererAlgorithm);
200 #endif // MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_