src/media/filters/audio_renderer_algorithm.h

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 // AudioRendererAlgorithm buffers and transforms audio data. The owner of
   6 // this object provides audio data to the object through EnqueueBuffer() and
   7 // requests data from the buffer via FillBuffer().
   8 //
   9 // This class is *not* thread-safe. Calls to enqueue and retrieve data must be
  10 // locked if called from multiple threads.
  11 //
  12 // AudioRendererAlgorithm uses the Waveform Similarity Overlap and Add (WSOLA)
  13 // algorithm to stretch or compress audio data to meet playback speeds less than
  14 // or greater than the natural playback of the audio stream. The algorithm
  15 // preserves local properties of the audio, therefore, pitch and harmonics are
  16 // are preserved. See audio_renderer_algorith.cc for a more elaborate
  17 // description of the algorithm.
  18 //
  19 // Audio at very low or very high playback rates are muted to preserve quality.
  20
  21 #ifndef MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_
  22 #define MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_
  23
  24 #include "base/memory/ref_counted.h"
  25 #include "base/memory/scoped_ptr.h"
  26 #include "media/audio/audio_parameters.h"
  27 #include "media/base/audio_buffer.h"
  28 #include "media/base/audio_buffer_queue.h"
  29
  30 namespace media {
  31
  32 class AudioBus;
  33
  34 class MEDIA_EXPORT AudioRendererAlgorithm {
  35  public:
  36   AudioRendererAlgorithm();
  37   ~AudioRendererAlgorithm();
  38
  39   // Initializes this object with information about the audio stream.
  40   void Initialize(const AudioParameters& params);
  41
  42   // Tries to fill |requested_frames| frames into |dest| with possibly scaled
  43   // data from our |audio_buffer_|. Data is scaled based on |playback_rate|,
  44   // using a variation of the Overlap-Add method to combine sample windows.
  45   //
  46   // Data from |audio_buffer_| is consumed in proportion to the playback rate.
  47   //
  48   // Returns the number of frames copied into |dest|.
  49   int FillBuffer(AudioBus* dest, int requested_frames, float playback_rate);
  50
  51   // Clears |audio_buffer_|.
  52   void FlushBuffers();
  53
  54   // Enqueues a buffer. It is called from the owner of the algorithm after a
  55   // read completes.
  56   void EnqueueBuffer(const scoped_refptr<AudioBuffer>& buffer_in);
  57
  58   // Returns true if |audio_buffer_| is at or exceeds capacity.
  59   bool IsQueueFull();
  60
  61   // Returns the capacity of |audio_buffer_| in frames.
  62   int QueueCapacity() const { return capacity_; }
  63
  64   // Increase the capacity of |audio_buffer_| if possible.
  65   void IncreaseQueueCapacity();
  66
  67   // Returns the number of frames left in |audio_buffer_|, which may be larger
  68   // than QueueCapacity() in the event that EnqueueBuffer() delivered more data
  69   // than |audio_buffer_| was intending to hold.
  70   int frames_buffered() { return audio_buffer_.frames(); }
  71
  72   // Returns the samples per second for this audio stream.
  73   int samples_per_second() { return samples_per_second_; }
  74
  75  private:
  76   // Within |search_block_|, find the block of data that is most similar to
  77   // |target_block_|, and write it in |optimal_block_|. This method assumes that
  78   // there is enough data to perform a search, i.e. |search_block_| and
  79   // |target_block_| can be extracted from the available frames.
  80   void GetOptimalBlock();
  81
  82   // Read a maximum of |requested_frames| frames from |wsola_output_|. Returns
  83   // number of frames actually read.
  84   int WriteCompletedFramesTo(
  85       int requested_frames, int output_offset, AudioBus* dest);
  86
  87   // Fill |dest| with frames from |audio_buffer_| starting from frame
  88   // |read_offset_frames|. |dest| is expected to have the same number of
  89   // channels as |audio_buffer_|. A negative offset, i.e.
  90   // |read_offset_frames| < 0, is accepted assuming that |audio_buffer| is zero
  91   // for negative indices. This might happen for few first frames. This method
  92   // assumes there is enough frames to fill |dest|, i.e. |read_offset_frames| +
  93   // |dest->frames()| does not extend to future.
  94   void PeekAudioWithZeroPrepend(int read_offset_frames, AudioBus* dest);
  95
  96   // Run one iteration of WSOLA, if there are sufficient frames. This will
  97   // overlap-and-add one block to |wsola_output_|, hence, |num_complete_frames_|
  98   // is incremented by |ola_hop_size_|.
  99   bool RunOneWsolaIteration(float playback_rate);
 100
 101   // Seek |audio_buffer_| forward to remove frames from input that are not used
 102   // any more. State of the WSOLA will be updated accordingly.
 103   void RemoveOldInputFrames(float playback_rate);
 104
 105   // Update |output_time_| by |time_change|. In turn |search_block_index_| is
 106   // updated.
 107   void UpdateOutputTime(float playback_rate, double time_change);
 108
 109   // Is |target_block_| fully within |search_block_|? If so, we don't need to
 110   // perform the search.
 111   bool TargetIsWithinSearchRegion() const;
 112
 113   // Do we have enough data to perform one round of WSOLA?
 114   bool CanPerformWsola() const;
 115
 116   // Number of channels in audio stream.
 117   int channels_;
 118
 119   // Sample rate of audio stream.
 120   int samples_per_second_;
 121
 122   // Buffered audio data.
 123   AudioBufferQueue audio_buffer_;
 124
 125   // If muted, keep track of partial frames that should have been skipped over.
 126   double muted_partial_frame_;
 127
 128   // How many frames to have in the queue before we report the queue is full.
 129   int capacity_;
 130
 131   // Book keeping of the current time of generated audio, in frames. This
 132   // should be appropriately updated when out samples are generated, regardless
 133   // of whether we push samples out when FillBuffer() is called or we store
 134   // audio in |wsola_output_| for the subsequent calls to FillBuffer().
 135   // Furthermore, if samples from |audio_buffer_| are evicted then this
 136   // member variable should be updated based on |playback_rate_|.
 137   // Note that this member should be updated ONLY by calling UpdateOutputTime(),
 138   // so that |search_block_index_| is update accordingly.
 139   double output_time_;
 140
 141   // The offset of the center frame of |search_block_| w.r.t. its first frame.
 142   int search_block_center_offset_;
 143
 144   // Index of the beginning of the |search_block_|, in frames.
 145   int search_block_index_;
 146
 147   // Number of Blocks to search to find the most similar one to the target
 148   // frame.
 149   int num_candidate_blocks_;
 150
 151   // Index of the beginning of the target block, counted in frames.
 152   int target_block_index_;
 153
 154   // Overlap-and-add window size in frames.
 155   int ola_window_size_;
 156
 157   // The hop size of overlap-and-add in frames. This implementation assumes 50%
 158   // overlap-and-add.
 159   int ola_hop_size_;
 160
 161   // Number of frames in |wsola_output_| that overlap-and-add is completed for
 162   // them and can be copied to output if FillBuffer() is called. It also
 163   // specifies the index where the next WSOLA window has to overlap-and-add.
 164   int num_complete_frames_;
 165
 166   // This stores a part of the output that is created but couldn't be rendered.
 167   // Output is generated frame-by-frame which at some point might exceed the
 168   // number of requested samples. Furthermore, due to overlap-and-add,
 169   // the last half-window of the output is incomplete, which is stored in this
 170   // buffer.
 171   scoped_ptr<AudioBus> wsola_output_;
 172
 173   // Overlap-and-add window.
 174   scoped_ptr<float[]> ola_window_;
 175
 176   // Transition window, used to update |optimal_block_| by a weighted sum of
 177   // |optimal_block_| and |target_block_|.
 178   scoped_ptr<float[]> transition_window_;
 179
 180   // Auxiliary variables to avoid allocation in every iteration.
 181
 182   // Stores the optimal block in every iteration. This is the most
 183   // similar block to |target_block_| within |search_block_| and it is
 184   // overlap-and-added to |wsola_output_|.
 185   scoped_ptr<AudioBus> optimal_block_;
 186
 187   // A block of data that search is performed over to find the |optimal_block_|.
 188   scoped_ptr<AudioBus> search_block_;
 189
 190   // Stores the target block, denoted as |target| above. |search_block_| is
 191   // searched for a block (|optimal_block_|) that is most similar to
 192   // |target_block_|.
 193   scoped_ptr<AudioBus> target_block_;
 194
 195   DISALLOW_COPY_AND_ASSIGN(AudioRendererAlgorithm);
 196 };
 197
 198 }  // namespace media
 199
 200 #endif  // MEDIA_FILTERS_AUDIO_RENDERER_ALGORITHM_H_