src/media/audio/win/audio_unified_win.h

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #ifndef MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_
   6 #define MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_
   7
   8 #include <Audioclient.h>
   9 #include <MMDeviceAPI.h>
  10
  11 #include <string>
  12
  13 #include "base/compiler_specific.h"
  14 #include "base/gtest_prod_util.h"
  15 #include "base/threading/platform_thread.h"
  16 #include "base/threading/simple_thread.h"
  17 #include "base/win/scoped_co_mem.h"
  18 #include "base/win/scoped_comptr.h"
  19 #include "base/win/scoped_handle.h"
  20 #include "media/audio/audio_io.h"
  21 #include "media/audio/audio_parameters.h"
  22 #include "media/base/audio_fifo.h"
  23 #include "media/base/channel_mixer.h"
  24 #include "media/base/media_export.h"
  25 #include "media/base/multi_channel_resampler.h"
  26
  27 namespace media {
  28
  29 class AudioManagerWin;
  30
  31 // Implementation of AudioOutputStream for Windows using the Core Audio API
  32 // where both capturing and rendering takes place on the same thread to enable
  33 // audio I/O. This class allows arbitrary combinations of input and output
  34 // devices running off different clocks and using different drivers, with
  35 // potentially differing sample-rates.
  36 //
  37 // It is required to first acquire the native sample rate of the selected
  38 // output device and then use the same rate when creating this object.
  39 // The inner operation depends on the input sample rate which is determined
  40 // during construction. Three different main modes are supported:
  41 //
  42 //  1)  input rate == output rate => input side drives output side directly.
  43 //  2)  input rate != output rate => both sides are driven independently by
  44 //      events and a FIFO plus a resampling unit is used to compensate for
  45 //      differences in sample rates between the two sides.
  46 //  3)  input rate == output rate but native buffer sizes are not identical =>
  47 //      same inner functionality as in (2) to compensate for the differences
  48 //      in buffer sizes and also compensate for any potential clock drift
  49 //      between the two devices.
  50 //
  51 // Mode detection is is done at construction and using mode (1) will lead to
  52 // best performance (lower delay and no "varispeed distortion"), i.e., it is
  53 // recommended to use same sample rates for input and output. Mode (2) uses a
  54 // resampler which supports rate adjustments to fine tune for things like
  55 // clock drift and differences in sample rates between different devices.
  56 // Mode (2) - which uses a FIFO and a adjustable multi-channel resampler -
  57 // is also called the varispeed mode and it is used for case (3) as well to
  58 // compensate for the difference in buffer sizes mainly.
  59 // Mode (3) can happen if two different audio devices are used.
  60 // As an example: some devices needs a buffer size of 441 @ 44.1kHz and others
  61 // 448 @ 44.1kHz. This is a rare case and will only happen for sample rates
  62 // which are even multiples of 11025 Hz (11025, 22050, 44100, 88200 etc.).
  63 //
  64 // Implementation notes:
  65 //
  66 //  - Open() can fail if the input and output parameters do not fulfill
  67 //    certain conditions. See source for Open() for more details.
  68 //  - Channel mixing will be performed if the clients asks for a larger
  69 //    number of channels than the native audio layer provides.
  70 //    Example: client wants stereo but audio layer provides mono. In this case
  71 //    upmixing from mono to stereo (1->2) will be done.
  72 //
  73 // TODO(henrika):
  74 //
  75 //  - Add support for exclusive mode.
  76 //  - Add support for KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, i.e., 32-bit float
  77 //    as internal sample-value representation.
  78 //  - Perform fine-tuning for non-matching sample rates to reduce latency.
  79 //
  80 class MEDIA_EXPORT WASAPIUnifiedStream
  81     : public AudioOutputStream,
  82       public base::DelegateSimpleThread::Delegate {
  83  public:
  84   // The ctor takes all the usual parameters, plus |manager| which is the
  85   // the audio manager who is creating this object.
  86   WASAPIUnifiedStream(AudioManagerWin* manager,
  87                       const AudioParameters& params,
  88                       const std::string& input_device_id);
  89
  90   // The dtor is typically called by the AudioManager only and it is usually
  91   // triggered by calling AudioOutputStream::Close().
  92   virtual ~WASAPIUnifiedStream();
  93
  94   // Implementation of AudioOutputStream.
  95   virtual bool Open() OVERRIDE;
  96   virtual void Start(AudioSourceCallback* callback) OVERRIDE;
  97   virtual void Stop() OVERRIDE;
  98   virtual void Close() OVERRIDE;
  99   virtual void SetVolume(double volume) OVERRIDE;
 100   virtual void GetVolume(double* volume) OVERRIDE;
 101
 102   bool started() const {
 103     return audio_io_thread_.get() != NULL;
 104   }
 105
 106   // Returns true if input sample rate differs from the output sample rate.
 107   // A FIFO and a adjustable multi-channel resampler are utilized in this mode.
 108   bool VarispeedMode() const { return (fifo_ && resampler_); }
 109
 110  private:
 111   enum {
 112     // Time in milliseconds between two successive delay measurements.
 113     // We save resources by not updating the delay estimates for each capture
 114     // event (typically 100Hz rate).
 115     kTimeDiffInMillisecondsBetweenDelayMeasurements = 1000,
 116
 117     // Max possible FIFO size.
 118     kFifoSize = 16384,
 119
 120     // This value was determined empirically for minimum latency while still
 121     // guarding against FIFO under-runs. The actual target size will be equal
 122     // to kTargetFifoSafetyFactor * (native input buffer size).
 123     // TODO(henrika): tune this value for lowest possible latency for all
 124     // possible sample rate combinations.
 125     kTargetFifoSafetyFactor = 2
 126   };
 127
 128   // Additional initialization required when input and output sample rate
 129   // differs. Allocates resources for |fifo_|, |resampler_|, |render_event_|,
 130   // and the |capture_bus_| and configures the |input_format_| structure
 131   // given the provided input and output audio parameters.
 132   void DoVarispeedInitialization(const AudioParameters& input_params,
 133                                  const AudioParameters& output_params);
 134
 135   // Clears varispeed related components such as the FIFO and the resampler.
 136   void ResetVarispeed();
 137
 138   // Builds WAVEFORMATEX structures for input and output based on input and
 139   // output audio parameters.
 140   void SetIOFormats(const AudioParameters& input_params,
 141                     const AudioParameters& output_params);
 142
 143   // DelegateSimpleThread::Delegate implementation.
 144   virtual void Run() OVERRIDE;
 145
 146   // MultiChannelResampler::MultiChannelAudioSourceProvider implementation.
 147   // Callback for providing more data into the resampler.
 148   // Only used in varispeed mode, i.e., when input rate != output rate.
 149   virtual void ProvideInput(int frame_delay, AudioBus* audio_bus);
 150
 151   // Issues the OnError() callback to the |source_|.
 152   void HandleError(HRESULT err);
 153
 154   // Stops and joins the audio thread in case of an error.
 155   void StopAndJoinThread(HRESULT err);
 156
 157   // Converts unique endpoint ID to user-friendly device name.
 158   std::string GetDeviceName(LPCWSTR device_id) const;
 159
 160   // Called on the audio IO thread for each capture event.
 161   // Buffers captured audio into a FIFO if varispeed is used or into an audio
 162   // bus if input and output sample rates are identical.
 163   void ProcessInputAudio();
 164
 165   // Called on the audio IO thread for each render event when varispeed is
 166   // active or for each capture event when varispeed is not used.
 167   // In varispeed mode, it triggers a resampling callback, which reads from the
 168   // FIFO, and calls AudioSourceCallback::OnMoreIOData using the resampled
 169   // input signal and at the same time asks for data to play out.
 170   // If input and output rates are the same - instead of reading from the FIFO
 171   // and do resampling - we read directly from the audio bus used to store
 172   // captured data in ProcessInputAudio.
 173   void ProcessOutputAudio(IAudioClock* audio_output_clock);
 174
 175   // Contains the thread ID of the creating thread.
 176   base::PlatformThreadId creating_thread_id_;
 177
 178   // Our creator, the audio manager needs to be notified when we close.
 179   AudioManagerWin* manager_;
 180
 181   // Contains the audio parameter structure provided at construction.
 182   AudioParameters params_;
 183   // For convenience, same as in params_.
 184   int input_channels_;
 185   int output_channels_;
 186
 187   // Unique ID of the input device to be opened.
 188   const std::string input_device_id_;
 189
 190   // The sharing mode for the streams.
 191   // Valid values are AUDCLNT_SHAREMODE_SHARED and AUDCLNT_SHAREMODE_EXCLUSIVE
 192   // where AUDCLNT_SHAREMODE_SHARED is the default.
 193   AUDCLNT_SHAREMODE share_mode_;
 194
 195   // Rendering and capturing is driven by this thread (no message loop).
 196   // All OnMoreIOData() callbacks will be called from this thread.
 197   scoped_ptr<base::DelegateSimpleThread> audio_io_thread_;
 198
 199   // Contains the desired audio output format which is set up at construction.
 200   // It is required to first acquire the native sample rate of the selected
 201   // output device and then use the same rate when creating this object.
 202   WAVEFORMATPCMEX output_format_;
 203
 204   // Contains the native audio input format which is set up at construction
 205   // if varispeed mode is utilized.
 206   WAVEFORMATPCMEX input_format_;
 207
 208   // True when successfully opened.
 209   bool opened_;
 210
 211   // Volume level from 0 to 1 used for output scaling.
 212   double volume_;
 213
 214   // Size in audio frames of each audio packet where an audio packet
 215   // is defined as the block of data which the destination is expected to
 216   // receive in each OnMoreIOData() callback.
 217   size_t output_buffer_size_frames_;
 218
 219   // Size in audio frames of each audio packet where an audio packet
 220   // is defined as the block of data which the source is expected to
 221   // deliver in each OnMoreIOData() callback.
 222   size_t input_buffer_size_frames_;
 223
 224   // Length of the audio endpoint buffer.
 225   uint32 endpoint_render_buffer_size_frames_;
 226   uint32 endpoint_capture_buffer_size_frames_;
 227
 228   // Counts the number of audio frames written to the endpoint buffer.
 229   uint64 num_written_frames_;
 230
 231   // Time stamp for last delay measurement.
 232   base::TimeTicks last_delay_sample_time_;
 233
 234   // Contains the total (sum of render and capture) delay in milliseconds.
 235   double total_delay_ms_;
 236
 237   // Contains the total (sum of render and capture and possibly FIFO) delay
 238   // in bytes. The update frequency is set by a constant called
 239   // |kTimeDiffInMillisecondsBetweenDelayMeasurements|.
 240   int total_delay_bytes_;
 241
 242   // Pointer to the client that will deliver audio samples to be played out.
 243   AudioSourceCallback* source_;
 244
 245   // IMMDevice interfaces which represents audio endpoint devices.
 246   base::win::ScopedComPtr<IMMDevice> endpoint_render_device_;
 247   base::win::ScopedComPtr<IMMDevice> endpoint_capture_device_;
 248
 249   // IAudioClient interfaces which enables a client to create and initialize
 250   // an audio stream between an audio application and the audio engine.
 251   base::win::ScopedComPtr<IAudioClient> audio_output_client_;
 252   base::win::ScopedComPtr<IAudioClient> audio_input_client_;
 253
 254   // IAudioRenderClient interfaces enables a client to write output
 255   // data to a rendering endpoint buffer.
 256   base::win::ScopedComPtr<IAudioRenderClient> audio_render_client_;
 257
 258   // IAudioCaptureClient interfaces enables a client to read input
 259   // data from a capturing endpoint buffer.
 260   base::win::ScopedComPtr<IAudioCaptureClient> audio_capture_client_;
 261
 262   // The audio engine will signal this event each time a buffer has been
 263   // recorded.
 264   base::win::ScopedHandle capture_event_;
 265
 266   // The audio engine will signal this event each time it needs a new
 267   // audio buffer to play out.
 268   // Only utilized in varispeed mode.
 269   base::win::ScopedHandle render_event_;
 270
 271   // This event will be signaled when streaming shall stop.
 272   base::win::ScopedHandle stop_streaming_event_;
 273
 274   // Container for retrieving data from AudioSourceCallback::OnMoreIOData().
 275   scoped_ptr<AudioBus> output_bus_;
 276
 277   // Container for sending data to AudioSourceCallback::OnMoreIOData().
 278   scoped_ptr<AudioBus> input_bus_;
 279
 280   // Container for storing output from the channel mixer.
 281   scoped_ptr<AudioBus> channel_bus_;
 282
 283   // All members below are only allocated, or used, in varispeed mode:
 284
 285   // Temporary storage of resampled input audio data.
 286   scoped_ptr<AudioBus> resampled_bus_;
 287
 288   // Set to true first time a capture event has been received in varispeed
 289   // mode.
 290   bool input_callback_received_;
 291
 292   // MultiChannelResampler is a multi channel wrapper for SincResampler;
 293   // allowing high quality sample rate conversion of multiple channels at once.
 294   scoped_ptr<MultiChannelResampler> resampler_;
 295
 296   // Resampler I/O ratio.
 297   double io_sample_rate_ratio_;
 298
 299   // Used for input to output buffering.
 300   scoped_ptr<AudioFifo> fifo_;
 301
 302   // The channel mixer is only created and utilized if number of input channels
 303   // is larger than the native number of input channels (e.g client wants
 304   // stereo but the audio device only supports mono).
 305   scoped_ptr<ChannelMixer> channel_mixer_;
 306
 307   // The optimal number of frames we'd like to keep in the FIFO at all times.
 308   int target_fifo_frames_;
 309
 310   // A running average of the measured delta between actual number of frames
 311   // in the FIFO versus |target_fifo_frames_|.
 312   double average_delta_;
 313
 314   // A varispeed rate scalar which is calculated based on FIFO drift.
 315   double fifo_rate_compensation_;
 316
 317   // Set to true when input side signals output side that a new delay
 318   // estimate is needed.
 319   bool update_output_delay_;
 320
 321   // Capture side stores its delay estimate so the sum can be derived in
 322   // the render side.
 323   double capture_delay_ms_;
 324
 325   // TODO(henrika): possibly remove these members once the performance is
 326   // properly tuned. Only used for off-line debugging.
 327 #ifndef NDEBUG
 328   enum LogElementNames {
 329     INPUT_TIME_STAMP,
 330     NUM_FRAMES_IN_FIFO,
 331     RESAMPLER_MARGIN,
 332     RATE_COMPENSATION
 333   };
 334
 335   scoped_ptr<int64[]> input_time_stamps_;
 336   scoped_ptr<int[]> num_frames_in_fifo_;
 337   scoped_ptr<int[]> resampler_margin_;
 338   scoped_ptr<double[]> fifo_rate_comps_;
 339   scoped_ptr<int[]> num_elements_;
 340   scoped_ptr<int[]> input_params_;
 341   scoped_ptr<int[]> output_params_;
 342
 343   FILE* data_file_;
 344   FILE* param_file_;
 345 #endif
 346
 347   DISALLOW_COPY_AND_ASSIGN(WASAPIUnifiedStream);
 348 };
 349
 350 }  // namespace media
 351
 352 #endif  // MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_