src/media/audio/win/audio_low_latency_input_win.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "media/audio/win/audio_low_latency_input_win.h"
   6
   7 #include "base/logging.h"
   8 #include "base/memory/scoped_ptr.h"
   9 #include "base/strings/utf_string_conversions.h"
  10 #include "media/audio/win/audio_manager_win.h"
  11 #include "media/audio/win/avrt_wrapper_win.h"
  12
  13 using base::win::ScopedComPtr;
  14 using base::win::ScopedCOMInitializer;
  15
  16 namespace media {
  17
  18 WASAPIAudioInputStream::WASAPIAudioInputStream(
  19     AudioManagerWin* manager, const AudioParameters& params,
  20     const std::string& device_id)
  21     : manager_(manager),
  22       capture_thread_(NULL),
  23       opened_(false),
  24       started_(false),
  25       endpoint_buffer_size_frames_(0),
  26       device_id_(device_id),
  27       sink_(NULL) {
  28   DCHECK(manager_);
  29
  30   // Load the Avrt DLL if not already loaded. Required to support MMCSS.
  31   bool avrt_init = avrt::Initialize();
  32   DCHECK(avrt_init) << "Failed to load the Avrt.dll";
  33
  34   // Set up the desired capture format specified by the client.
  35   format_.nSamplesPerSec = params.sample_rate();
  36   format_.wFormatTag = WAVE_FORMAT_PCM;
  37   format_.wBitsPerSample = params.bits_per_sample();
  38   format_.nChannels = params.channels();
  39   format_.nBlockAlign = (format_.wBitsPerSample / 8) * format_.nChannels;
  40   format_.nAvgBytesPerSec = format_.nSamplesPerSec * format_.nBlockAlign;
  41   format_.cbSize = 0;
  42
  43   // Size in bytes of each audio frame.
  44   frame_size_ = format_.nBlockAlign;
  45   // Store size of audio packets which we expect to get from the audio
  46   // endpoint device in each capture event.
  47   packet_size_frames_ = params.GetBytesPerBuffer() / format_.nBlockAlign;
  48   packet_size_bytes_ = params.GetBytesPerBuffer();
  49   DVLOG(1) << "Number of bytes per audio frame  : " << frame_size_;
  50   DVLOG(1) << "Number of audio frames per packet: " << packet_size_frames_;
  51
  52   // All events are auto-reset events and non-signaled initially.
  53
  54   // Create the event which the audio engine will signal each time
  55   // a buffer becomes ready to be processed by the client.
  56   audio_samples_ready_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
  57   DCHECK(audio_samples_ready_event_.IsValid());
  58
  59   // Create the event which will be set in Stop() when capturing shall stop.
  60   stop_capture_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
  61   DCHECK(stop_capture_event_.IsValid());
  62
  63   ms_to_frame_count_ = static_cast<double>(params.sample_rate()) / 1000.0;
  64
  65   LARGE_INTEGER performance_frequency;
  66   if (QueryPerformanceFrequency(&performance_frequency)) {
  67     perf_count_to_100ns_units_ =
  68         (10000000.0 / static_cast<double>(performance_frequency.QuadPart));
  69   } else {
  70     LOG(ERROR) <<  "High-resolution performance counters are not supported.";
  71     perf_count_to_100ns_units_ = 0.0;
  72   }
  73 }
  74
  75 WASAPIAudioInputStream::~WASAPIAudioInputStream() {}
  76
  77 bool WASAPIAudioInputStream::Open() {
  78   DCHECK(CalledOnValidThread());
  79   // Verify that we are not already opened.
  80   if (opened_)
  81     return false;
  82
  83   // Obtain a reference to the IMMDevice interface of the capturing
  84   // device with the specified unique identifier or role which was
  85   // set at construction.
  86   HRESULT hr = SetCaptureDevice();
  87   if (FAILED(hr))
  88     return false;
  89
  90   // Obtain an IAudioClient interface which enables us to create and initialize
  91   // an audio stream between an audio application and the audio engine.
  92   hr = ActivateCaptureDevice();
  93   if (FAILED(hr))
  94     return false;
  95
  96   // Retrieve the stream format which the audio engine uses for its internal
  97   // processing/mixing of shared-mode streams. This function call is for
  98   // diagnostic purposes only and only in debug mode.
  99 #ifndef NDEBUG
 100   hr = GetAudioEngineStreamFormat();
 101 #endif
 102
 103   // Verify that the selected audio endpoint supports the specified format
 104   // set during construction.
 105   if (!DesiredFormatIsSupported())
 106     return false;
 107
 108   // Initialize the audio stream between the client and the device using
 109   // shared mode and a lowest possible glitch-free latency.
 110   hr = InitializeAudioEngine();
 111
 112   opened_ = SUCCEEDED(hr);
 113   return opened_;
 114 }
 115
 116 void WASAPIAudioInputStream::Start(AudioInputCallback* callback) {
 117   DCHECK(CalledOnValidThread());
 118   DCHECK(callback);
 119   DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
 120   if (!opened_)
 121     return;
 122
 123   if (started_)
 124     return;
 125
 126   sink_ = callback;
 127
 128   // Starts periodic AGC microphone measurements if the AGC has been enabled
 129   // using SetAutomaticGainControl().
 130   StartAgc();
 131
 132   // Create and start the thread that will drive the capturing by waiting for
 133   // capture events.
 134   capture_thread_ =
 135       new base::DelegateSimpleThread(this, "wasapi_capture_thread");
 136   capture_thread_->Start();
 137
 138   // Start streaming data between the endpoint buffer and the audio engine.
 139   HRESULT hr = audio_client_->Start();
 140   DLOG_IF(ERROR, FAILED(hr)) << "Failed to start input streaming.";
 141
 142   if (SUCCEEDED(hr) && audio_render_client_for_loopback_)
 143     hr = audio_render_client_for_loopback_->Start();
 144
 145   started_ = SUCCEEDED(hr);
 146 }
 147
 148 void WASAPIAudioInputStream::Stop() {
 149   DCHECK(CalledOnValidThread());
 150   DVLOG(1) << "WASAPIAudioInputStream::Stop()";
 151   if (!started_)
 152     return;
 153
 154   // Stops periodic AGC microphone measurements.
 155   StopAgc();
 156
 157   // Shut down the capture thread.
 158   if (stop_capture_event_.IsValid()) {
 159     SetEvent(stop_capture_event_.Get());
 160   }
 161
 162   // Stop the input audio streaming.
 163   HRESULT hr = audio_client_->Stop();
 164   if (FAILED(hr)) {
 165     LOG(ERROR) << "Failed to stop input streaming.";
 166   }
 167
 168   // Wait until the thread completes and perform cleanup.
 169   if (capture_thread_) {
 170     SetEvent(stop_capture_event_.Get());
 171     capture_thread_->Join();
 172     capture_thread_ = NULL;
 173   }
 174
 175   started_ = false;
 176 }
 177
 178 void WASAPIAudioInputStream::Close() {
 179   DVLOG(1) << "WASAPIAudioInputStream::Close()";
 180   // It is valid to call Close() before calling open or Start().
 181   // It is also valid to call Close() after Start() has been called.
 182   Stop();
 183   if (sink_) {
 184     sink_->OnClose(this);
 185     sink_ = NULL;
 186   }
 187
 188   // Inform the audio manager that we have been closed. This will cause our
 189   // destruction.
 190   manager_->ReleaseInputStream(this);
 191 }
 192
 193 double WASAPIAudioInputStream::GetMaxVolume() {
 194   // Verify that Open() has been called succesfully, to ensure that an audio
 195   // session exists and that an ISimpleAudioVolume interface has been created.
 196   DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
 197   if (!opened_)
 198     return 0.0;
 199
 200   // The effective volume value is always in the range 0.0 to 1.0, hence
 201   // we can return a fixed value (=1.0) here.
 202   return 1.0;
 203 }
 204
 205 void WASAPIAudioInputStream::SetVolume(double volume) {
 206   DVLOG(1) << "SetVolume(volume=" << volume << ")";
 207   DCHECK(CalledOnValidThread());
 208   DCHECK_GE(volume, 0.0);
 209   DCHECK_LE(volume, 1.0);
 210
 211   DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
 212   if (!opened_)
 213     return;
 214
 215   // Set a new master volume level. Valid volume levels are in the range
 216   // 0.0 to 1.0. Ignore volume-change events.
 217   HRESULT hr = simple_audio_volume_->SetMasterVolume(static_cast<float>(volume),
 218       NULL);
 219   DLOG_IF(WARNING, FAILED(hr)) << "Failed to set new input master volume.";
 220
 221   // Update the AGC volume level based on the last setting above. Note that,
 222   // the volume-level resolution is not infinite and it is therefore not
 223   // possible to assume that the volume provided as input parameter can be
 224   // used directly. Instead, a new query to the audio hardware is required.
 225   // This method does nothing if AGC is disabled.
 226   UpdateAgcVolume();
 227 }
 228
 229 double WASAPIAudioInputStream::GetVolume() {
 230   DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
 231   if (!opened_)
 232     return 0.0;
 233
 234   // Retrieve the current volume level. The value is in the range 0.0 to 1.0.
 235   float level = 0.0f;
 236   HRESULT hr = simple_audio_volume_->GetMasterVolume(&level);
 237   DLOG_IF(WARNING, FAILED(hr)) << "Failed to get input master volume.";
 238
 239   return static_cast<double>(level);
 240 }
 241
 242 // static
 243 int WASAPIAudioInputStream::HardwareSampleRate(
 244     const std::string& device_id) {
 245   base::win::ScopedCoMem<WAVEFORMATEX> audio_engine_mix_format;
 246   HRESULT hr = GetMixFormat(device_id, &audio_engine_mix_format);
 247   if (FAILED(hr))
 248     return 0;
 249
 250   return static_cast<int>(audio_engine_mix_format->nSamplesPerSec);
 251 }
 252
 253 // static
 254 uint32 WASAPIAudioInputStream::HardwareChannelCount(
 255     const std::string& device_id) {
 256   base::win::ScopedCoMem<WAVEFORMATEX> audio_engine_mix_format;
 257   HRESULT hr = GetMixFormat(device_id, &audio_engine_mix_format);
 258   if (FAILED(hr))
 259     return 0;
 260
 261   return static_cast<uint32>(audio_engine_mix_format->nChannels);
 262 }
 263
 264 // static
 265 HRESULT WASAPIAudioInputStream::GetMixFormat(const std::string& device_id,
 266                                              WAVEFORMATEX** device_format) {
 267   // It is assumed that this static method is called from a COM thread, i.e.,
 268   // CoInitializeEx() is not called here to avoid STA/MTA conflicts.
 269   ScopedComPtr<IMMDeviceEnumerator> enumerator;
 270   HRESULT hr = enumerator.CreateInstance(__uuidof(MMDeviceEnumerator), NULL,
 271                                          CLSCTX_INPROC_SERVER);
 272   if (FAILED(hr))
 273     return hr;
 274
 275   ScopedComPtr<IMMDevice> endpoint_device;
 276   if (device_id == AudioManagerBase::kDefaultDeviceId) {
 277     // Retrieve the default capture audio endpoint.
 278     hr = enumerator->GetDefaultAudioEndpoint(eCapture, eConsole,
 279                                              endpoint_device.Receive());
 280   } else if (device_id == AudioManagerBase::kLoopbackInputDeviceId) {
 281     // Capture the default playback stream.
 282     hr = enumerator->GetDefaultAudioEndpoint(eRender, eConsole,
 283                                              endpoint_device.Receive());
 284   } else {
 285     // Retrieve a capture endpoint device that is specified by an endpoint
 286     // device-identification string.
 287     hr = enumerator->GetDevice(UTF8ToUTF16(device_id).c_str(),
 288                                endpoint_device.Receive());
 289   }
 290   if (FAILED(hr))
 291     return hr;
 292
 293   ScopedComPtr<IAudioClient> audio_client;
 294   hr = endpoint_device->Activate(__uuidof(IAudioClient),
 295                                  CLSCTX_INPROC_SERVER,
 296                                  NULL,
 297                                  audio_client.ReceiveVoid());
 298   return SUCCEEDED(hr) ? audio_client->GetMixFormat(device_format) : hr;
 299 }
 300
 301 void WASAPIAudioInputStream::Run() {
 302   ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA);
 303
 304   // Increase the thread priority.
 305   capture_thread_->SetThreadPriority(base::kThreadPriority_RealtimeAudio);
 306
 307   // Enable MMCSS to ensure that this thread receives prioritized access to
 308   // CPU resources.
 309   DWORD task_index = 0;
 310   HANDLE mm_task = avrt::AvSetMmThreadCharacteristics(L"Pro Audio",
 311                                                       &task_index);
 312   bool mmcss_is_ok =
 313       (mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL));
 314   if (!mmcss_is_ok) {
 315     // Failed to enable MMCSS on this thread. It is not fatal but can lead
 316     // to reduced QoS at high load.
 317     DWORD err = GetLastError();
 318     LOG(WARNING) << "Failed to enable MMCSS (error code=" << err << ").";
 319   }
 320
 321   // Allocate a buffer with a size that enables us to take care of cases like:
 322   // 1) The recorded buffer size is smaller, or does not match exactly with,
 323   //    the selected packet size used in each callback.
 324   // 2) The selected buffer size is larger than the recorded buffer size in
 325   //    each event.
 326   size_t buffer_frame_index = 0;
 327   size_t capture_buffer_size = std::max(
 328       2 * endpoint_buffer_size_frames_ * frame_size_,
 329       2 * packet_size_frames_ * frame_size_);
 330   scoped_ptr<uint8[]> capture_buffer(new uint8[capture_buffer_size]);
 331
 332   LARGE_INTEGER now_count;
 333   bool recording = true;
 334   bool error = false;
 335   double volume = GetVolume();
 336   HANDLE wait_array[2] = {stop_capture_event_, audio_samples_ready_event_};
 337
 338   while (recording && !error) {
 339     HRESULT hr = S_FALSE;
 340
 341     // Wait for a close-down event or a new capture event.
 342     DWORD wait_result = WaitForMultipleObjects(2, wait_array, FALSE, INFINITE);
 343     switch (wait_result) {
 344       case WAIT_FAILED:
 345         error = true;
 346         break;
 347       case WAIT_OBJECT_0 + 0:
 348         // |stop_capture_event_| has been set.
 349         recording = false;
 350         break;
 351       case WAIT_OBJECT_0 + 1:
 352         {
 353           // |audio_samples_ready_event_| has been set.
 354           BYTE* data_ptr = NULL;
 355           UINT32 num_frames_to_read = 0;
 356           DWORD flags = 0;
 357           UINT64 device_position = 0;
 358           UINT64 first_audio_frame_timestamp = 0;
 359
 360           // Retrieve the amount of data in the capture endpoint buffer,
 361           // replace it with silence if required, create callbacks for each
 362           // packet and store non-delivered data for the next event.
 363           hr = audio_capture_client_->GetBuffer(&data_ptr,
 364                                                 &num_frames_to_read,
 365                                                 &flags,
 366                                                 &device_position,
 367                                                 &first_audio_frame_timestamp);
 368           if (FAILED(hr)) {
 369             DLOG(ERROR) << "Failed to get data from the capture buffer";
 370             continue;
 371           }
 372
 373           if (num_frames_to_read != 0) {
 374             size_t pos = buffer_frame_index * frame_size_;
 375             size_t num_bytes = num_frames_to_read * frame_size_;
 376             DCHECK_GE(capture_buffer_size, pos + num_bytes);
 377
 378             if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {
 379               // Clear out the local buffer since silence is reported.
 380               memset(&capture_buffer[pos], 0, num_bytes);
 381             } else {
 382               // Copy captured data from audio engine buffer to local buffer.
 383               memcpy(&capture_buffer[pos], data_ptr, num_bytes);
 384             }
 385
 386             buffer_frame_index += num_frames_to_read;
 387           }
 388
 389           hr = audio_capture_client_->ReleaseBuffer(num_frames_to_read);
 390           DLOG_IF(ERROR, FAILED(hr)) << "Failed to release capture buffer";
 391
 392           // Derive a delay estimate for the captured audio packet.
 393           // The value contains two parts (A+B), where A is the delay of the
 394           // first audio frame in the packet and B is the extra delay
 395           // contained in any stored data. Unit is in audio frames.
 396           QueryPerformanceCounter(&now_count);
 397           double audio_delay_frames =
 398               ((perf_count_to_100ns_units_ * now_count.QuadPart -
 399                 first_audio_frame_timestamp) / 10000.0) * ms_to_frame_count_ +
 400                 buffer_frame_index - num_frames_to_read;
 401
 402           // Get a cached AGC volume level which is updated once every second
 403           // on the audio manager thread. Note that, |volume| is also updated
 404           // each time SetVolume() is called through IPC by the render-side AGC.
 405           GetAgcVolume(&volume);
 406
 407           // Deliver captured data to the registered consumer using a packet
 408           // size which was specified at construction.
 409           uint32 delay_frames = static_cast<uint32>(audio_delay_frames + 0.5);
 410           while (buffer_frame_index >= packet_size_frames_) {
 411             uint8* audio_data =
 412                 reinterpret_cast<uint8*>(capture_buffer.get());
 413
 414             // Deliver data packet, delay estimation and volume level to
 415             // the user.
 416             sink_->OnData(this,
 417                           audio_data,
 418                           packet_size_bytes_,
 419                           delay_frames * frame_size_,
 420                           volume);
 421
 422             // Store parts of the recorded data which can't be delivered
 423             // using the current packet size. The stored section will be used
 424             // either in the next while-loop iteration or in the next
 425             // capture event.
 426             memmove(&capture_buffer[0],
 427                     &capture_buffer[packet_size_bytes_],
 428                     (buffer_frame_index - packet_size_frames_) * frame_size_);
 429
 430             buffer_frame_index -= packet_size_frames_;
 431             delay_frames -= packet_size_frames_;
 432           }
 433         }
 434         break;
 435       default:
 436         error = true;
 437         break;
 438     }
 439   }
 440
 441   if (recording && error) {
 442     // TODO(henrika): perhaps it worth improving the cleanup here by e.g.
 443     // stopping the audio client, joining the thread etc.?
 444     NOTREACHED() << "WASAPI capturing failed with error code "
 445                  << GetLastError();
 446   }
 447
 448   // Disable MMCSS.
 449   if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) {
 450     PLOG(WARNING) << "Failed to disable MMCSS";
 451   }
 452 }
 453
 454 void WASAPIAudioInputStream::HandleError(HRESULT err) {
 455   NOTREACHED() << "Error code: " << err;
 456   if (sink_)
 457     sink_->OnError(this);
 458 }
 459
 460 HRESULT WASAPIAudioInputStream::SetCaptureDevice() {
 461   ScopedComPtr<IMMDeviceEnumerator> enumerator;
 462   HRESULT hr = enumerator.CreateInstance(__uuidof(MMDeviceEnumerator),
 463                                          NULL, CLSCTX_INPROC_SERVER);
 464   if (FAILED(hr))
 465     return hr;
 466
 467   // Retrieve the IMMDevice by using the specified role or the specified
 468   // unique endpoint device-identification string.
 469   // TODO(henrika): possibly add support for the eCommunications as well.
 470   if (device_id_ == AudioManagerBase::kDefaultDeviceId) {
 471     // Retrieve the default capture audio endpoint for the specified role.
 472     // Note that, in Windows Vista, the MMDevice API supports device roles
 473     // but the system-supplied user interface programs do not.
 474     hr = enumerator->GetDefaultAudioEndpoint(eCapture, eConsole,
 475                                              endpoint_device_.Receive());
 476   } else if (device_id_ == AudioManagerBase::kLoopbackInputDeviceId) {
 477     // Capture the default playback stream.
 478     hr = enumerator->GetDefaultAudioEndpoint(eRender, eConsole,
 479                                              endpoint_device_.Receive());
 480   } else {
 481     // Retrieve a capture endpoint device that is specified by an endpoint
 482     // device-identification string.
 483     hr = enumerator->GetDevice(UTF8ToUTF16(device_id_).c_str(),
 484                                endpoint_device_.Receive());
 485   }
 486
 487   if (FAILED(hr))
 488     return hr;
 489
 490   // Verify that the audio endpoint device is active, i.e., the audio
 491   // adapter that connects to the endpoint device is present and enabled.
 492   DWORD state = DEVICE_STATE_DISABLED;
 493   hr = endpoint_device_->GetState(&state);
 494   if (FAILED(hr))
 495     return hr;
 496
 497   if (!(state & DEVICE_STATE_ACTIVE)) {
 498     DLOG(ERROR) << "Selected capture device is not active.";
 499     hr = E_ACCESSDENIED;
 500   }
 501
 502   return hr;
 503 }
 504
 505 HRESULT WASAPIAudioInputStream::ActivateCaptureDevice() {
 506   // Creates and activates an IAudioClient COM object given the selected
 507   // capture endpoint device.
 508   HRESULT hr = endpoint_device_->Activate(__uuidof(IAudioClient),
 509                                           CLSCTX_INPROC_SERVER,
 510                                           NULL,
 511                                           audio_client_.ReceiveVoid());
 512   return hr;
 513 }
 514
 515 HRESULT WASAPIAudioInputStream::GetAudioEngineStreamFormat() {
 516   HRESULT hr = S_OK;
 517 #ifndef NDEBUG
 518   // The GetMixFormat() method retrieves the stream format that the
 519   // audio engine uses for its internal processing of shared-mode streams.
 520   // The method always uses a WAVEFORMATEXTENSIBLE structure, instead
 521   // of a stand-alone WAVEFORMATEX structure, to specify the format.
 522   // An WAVEFORMATEXTENSIBLE structure can specify both the mapping of
 523   // channels to speakers and the number of bits of precision in each sample.
 524   base::win::ScopedCoMem<WAVEFORMATEXTENSIBLE> format_ex;
 525   hr = audio_client_->GetMixFormat(
 526       reinterpret_cast<WAVEFORMATEX**>(&format_ex));
 527
 528   // See http://msdn.microsoft.com/en-us/windows/hardware/gg463006#EFH
 529   // for details on the WAVE file format.
 530   WAVEFORMATEX format = format_ex->Format;
 531   DVLOG(2) << "WAVEFORMATEX:";
 532   DVLOG(2) << "  wFormatTags    : 0x" << std::hex << format.wFormatTag;
 533   DVLOG(2) << "  nChannels      : " << format.nChannels;
 534   DVLOG(2) << "  nSamplesPerSec : " << format.nSamplesPerSec;
 535   DVLOG(2) << "  nAvgBytesPerSec: " << format.nAvgBytesPerSec;
 536   DVLOG(2) << "  nBlockAlign    : " << format.nBlockAlign;
 537   DVLOG(2) << "  wBitsPerSample : " << format.wBitsPerSample;
 538   DVLOG(2) << "  cbSize         : " << format.cbSize;
 539
 540   DVLOG(2) << "WAVEFORMATEXTENSIBLE:";
 541   DVLOG(2) << " wValidBitsPerSample: " <<
 542       format_ex->Samples.wValidBitsPerSample;
 543   DVLOG(2) << " dwChannelMask      : 0x" << std::hex <<
 544       format_ex->dwChannelMask;
 545   if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_PCM)
 546     DVLOG(2) << " SubFormat          : KSDATAFORMAT_SUBTYPE_PCM";
 547   else if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)
 548     DVLOG(2) << " SubFormat          : KSDATAFORMAT_SUBTYPE_IEEE_FLOAT";
 549   else if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_WAVEFORMATEX)
 550     DVLOG(2) << " SubFormat          : KSDATAFORMAT_SUBTYPE_WAVEFORMATEX";
 551 #endif
 552   return hr;
 553 }
 554
 555 bool WASAPIAudioInputStream::DesiredFormatIsSupported() {
 556   // An application that uses WASAPI to manage shared-mode streams can rely
 557   // on the audio engine to perform only limited format conversions. The audio
 558   // engine can convert between a standard PCM sample size used by the
 559   // application and the floating-point samples that the engine uses for its
 560   // internal processing. However, the format for an application stream
 561   // typically must have the same number of channels and the same sample
 562   // rate as the stream format used by the device.
 563   // Many audio devices support both PCM and non-PCM stream formats. However,
 564   // the audio engine can mix only PCM streams.
 565   base::win::ScopedCoMem<WAVEFORMATEX> closest_match;
 566   HRESULT hr = audio_client_->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED,
 567                                                 &format_,
 568                                                 &closest_match);
 569   DLOG_IF(ERROR, hr == S_FALSE) << "Format is not supported "
 570                                 << "but a closest match exists.";
 571   return (hr == S_OK);
 572 }
 573
 574 HRESULT WASAPIAudioInputStream::InitializeAudioEngine() {
 575   DWORD flags;
 576   // Use event-driven mode only fo regular input devices. For loopback the
 577   // EVENTCALLBACK flag is specified when intializing
 578   // |audio_render_client_for_loopback_|.
 579   if (device_id_ == AudioManagerBase::kLoopbackInputDeviceId) {
 580     flags = AUDCLNT_STREAMFLAGS_LOOPBACK | AUDCLNT_STREAMFLAGS_NOPERSIST;
 581   } else {
 582     flags =
 583       AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_NOPERSIST;
 584   }
 585
 586   // Initialize the audio stream between the client and the device.
 587   // We connect indirectly through the audio engine by using shared mode.
 588   // Note that, |hnsBufferDuration| is set of 0, which ensures that the
 589   // buffer is never smaller than the minimum buffer size needed to ensure
 590   // that glitches do not occur between the periodic processing passes.
 591   // This setting should lead to lowest possible latency.
 592   HRESULT hr = audio_client_->Initialize(AUDCLNT_SHAREMODE_SHARED,
 593                                          flags,
 594                                          0,  // hnsBufferDuration
 595                                          0,
 596                                          &format_,
 597                                          NULL);
 598   if (FAILED(hr))
 599     return hr;
 600
 601   // Retrieve the length of the endpoint buffer shared between the client
 602   // and the audio engine. The buffer length determines the maximum amount
 603   // of capture data that the audio engine can read from the endpoint buffer
 604   // during a single processing pass.
 605   // A typical value is 960 audio frames <=> 20ms @ 48kHz sample rate.
 606   hr = audio_client_->GetBufferSize(&endpoint_buffer_size_frames_);
 607   if (FAILED(hr))
 608     return hr;
 609
 610   DVLOG(1) << "endpoint buffer size: " << endpoint_buffer_size_frames_
 611            << " [frames]";
 612
 613 #ifndef NDEBUG
 614   // The period between processing passes by the audio engine is fixed for a
 615   // particular audio endpoint device and represents the smallest processing
 616   // quantum for the audio engine. This period plus the stream latency between
 617   // the buffer and endpoint device represents the minimum possible latency
 618   // that an audio application can achieve.
 619   // TODO(henrika): possibly remove this section when all parts are ready.
 620   REFERENCE_TIME device_period_shared_mode = 0;
 621   REFERENCE_TIME device_period_exclusive_mode = 0;
 622   HRESULT hr_dbg = audio_client_->GetDevicePeriod(
 623       &device_period_shared_mode, &device_period_exclusive_mode);
 624   if (SUCCEEDED(hr_dbg)) {
 625     DVLOG(1) << "device period: "
 626              << static_cast<double>(device_period_shared_mode / 10000.0)
 627              << " [ms]";
 628   }
 629
 630   REFERENCE_TIME latency = 0;
 631   hr_dbg = audio_client_->GetStreamLatency(&latency);
 632   if (SUCCEEDED(hr_dbg)) {
 633     DVLOG(1) << "stream latency: " << static_cast<double>(latency / 10000.0)
 634              << " [ms]";
 635   }
 636 #endif
 637
 638   // Set the event handle that the audio engine will signal each time a buffer
 639   // becomes ready to be processed by the client.
 640   //
 641   // In loopback case the capture device doesn't receive any events, so we
 642   // need to create a separate playback client to get notifications. According
 643   // to MSDN:
 644   //
 645   //   A pull-mode capture client does not receive any events when a stream is
 646   //   initialized with event-driven buffering and is loopback-enabled. To
 647   //   work around this, initialize a render stream in event-driven mode. Each
 648   //   time the client receives an event for the render stream, it must signal
 649   //   the capture client to run the capture thread that reads the next set of
 650   //   samples from the capture endpoint buffer.
 651   //
 652   // http://msdn.microsoft.com/en-us/library/windows/desktop/dd316551(v=vs.85).aspx
 653   if (device_id_ == AudioManagerBase::kLoopbackInputDeviceId) {
 654     hr = endpoint_device_->Activate(
 655         __uuidof(IAudioClient), CLSCTX_INPROC_SERVER, NULL,
 656         audio_render_client_for_loopback_.ReceiveVoid());
 657     if (FAILED(hr))
 658       return hr;
 659
 660     hr = audio_render_client_for_loopback_->Initialize(
 661         AUDCLNT_SHAREMODE_SHARED,
 662         AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_NOPERSIST,
 663         0, 0, &format_, NULL);
 664     if (FAILED(hr))
 665       return hr;
 666
 667     hr = audio_render_client_for_loopback_->SetEventHandle(
 668         audio_samples_ready_event_.Get());
 669   } else {
 670     hr = audio_client_->SetEventHandle(audio_samples_ready_event_.Get());
 671   }
 672
 673   if (FAILED(hr))
 674     return hr;
 675
 676   // Get access to the IAudioCaptureClient interface. This interface
 677   // enables us to read input data from the capture endpoint buffer.
 678   hr = audio_client_->GetService(__uuidof(IAudioCaptureClient),
 679                                  audio_capture_client_.ReceiveVoid());
 680   if (FAILED(hr))
 681     return hr;
 682
 683   // Obtain a reference to the ISimpleAudioVolume interface which enables
 684   // us to control the master volume level of an audio session.
 685   hr = audio_client_->GetService(__uuidof(ISimpleAudioVolume),
 686                                  simple_audio_volume_.ReceiveVoid());
 687   return hr;
 688 }
 689
 690 }  // namespace media