1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "media/audio/win/audio_low_latency_output_win.h"
7 #include <Functiondiscoverykeys_devpkey.h>
9 #include "base/command_line.h"
10 #include "base/debug/trace_event.h"
11 #include "base/logging.h"
12 #include "base/memory/scoped_ptr.h"
13 #include "base/metrics/histogram.h"
14 #include "base/strings/utf_string_conversions.h"
15 #include "base/win/scoped_propvariant.h"
16 #include "media/audio/win/audio_manager_win.h"
17 #include "media/audio/win/avrt_wrapper_win.h"
18 #include "media/audio/win/core_audio_util_win.h"
19 #include "media/base/limits.h"
20 #include "media/base/media_switches.h"
22 using base::win::ScopedComPtr;
23 using base::win::ScopedCOMInitializer;
24 using base::win::ScopedCoMem;
28 // Compare two sets of audio parameters and return true if they are equal.
29 // Note that bits_per_sample() is excluded from this comparison since Core
30 // Audio can deal with most bit depths. As an example, if the native/mixing
31 // bit depth is 32 bits (default), opening at 16 or 24 still works fine and
32 // the audio engine will do the required conversion for us. Channel count is
33 // excluded since Open() will fail anyways and it doesn't impact buffering.
34 static bool CompareAudioParametersNoBitDepthOrChannels(
35 const media::AudioParameters& a, const media::AudioParameters& b) {
36 return (a.format() == b.format() &&
37 a.sample_rate() == b.sample_rate() &&
38 a.frames_per_buffer() == b.frames_per_buffer());
42 AUDCLNT_SHAREMODE WASAPIAudioOutputStream::GetShareMode() {
43 const CommandLine* cmd_line = CommandLine::ForCurrentProcess();
44 if (cmd_line->HasSwitch(switches::kEnableExclusiveAudio))
45 return AUDCLNT_SHAREMODE_EXCLUSIVE;
46 return AUDCLNT_SHAREMODE_SHARED;
50 int WASAPIAudioOutputStream::HardwareSampleRate(const std::string& device_id) {
51 WAVEFORMATPCMEX format;
52 ScopedComPtr<IAudioClient> client;
53 if (device_id.empty()) {
54 client = CoreAudioUtil::CreateDefaultClient(eRender, eConsole);
56 ScopedComPtr<IMMDevice> device(CoreAudioUtil::CreateDevice(device_id));
59 client = CoreAudioUtil::CreateClient(device);
62 if (!client || FAILED(CoreAudioUtil::GetSharedModeMixFormat(client, &format)))
65 return static_cast<int>(format.Format.nSamplesPerSec);
68 WASAPIAudioOutputStream::WASAPIAudioOutputStream(AudioManagerWin* manager,
69 const std::string& device_id,
70 const AudioParameters& params,
72 : creating_thread_id_(base::PlatformThread::CurrentId()),
75 audio_parameters_are_valid_(false),
77 endpoint_buffer_size_frames_(0),
78 device_id_(device_id),
79 device_role_(device_role),
80 share_mode_(GetShareMode()),
81 num_written_frames_(0),
83 audio_bus_(AudioBus::Create(params)) {
85 VLOG(1) << "WASAPIAudioOutputStream::WASAPIAudioOutputStream()";
86 VLOG_IF(1, share_mode_ == AUDCLNT_SHAREMODE_EXCLUSIVE)
87 << "Core Audio (WASAPI) EXCLUSIVE MODE is enabled.";
89 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
90 // Verify that the input audio parameters are identical (bit depth and
91 // channel count are excluded) to the preferred (native) audio parameters.
92 // Open() will fail if this is not the case.
93 AudioParameters preferred_params;
94 HRESULT hr = device_id_.empty() ?
95 CoreAudioUtil::GetPreferredAudioParameters(eRender, device_role,
97 CoreAudioUtil::GetPreferredAudioParameters(device_id_,
99 audio_parameters_are_valid_ = SUCCEEDED(hr) &&
100 CompareAudioParametersNoBitDepthOrChannels(params, preferred_params);
101 LOG_IF(WARNING, !audio_parameters_are_valid_)
102 << "Input and preferred parameters are not identical. "
103 << "Device id: " << device_id_;
106 // Load the Avrt DLL if not already loaded. Required to support MMCSS.
107 bool avrt_init = avrt::Initialize();
108 DCHECK(avrt_init) << "Failed to load the avrt.dll";
110 // Set up the desired render format specified by the client. We use the
111 // WAVE_FORMAT_EXTENSIBLE structure to ensure that multiple channel ordering
112 // and high precision data can be supported.
114 // Begin with the WAVEFORMATEX structure that specifies the basic format.
115 WAVEFORMATEX* format = &format_.Format;
116 format->wFormatTag = WAVE_FORMAT_EXTENSIBLE;
117 format->nChannels = params.channels();
118 format->nSamplesPerSec = params.sample_rate();
119 format->wBitsPerSample = params.bits_per_sample();
120 format->nBlockAlign = (format->wBitsPerSample / 8) * format->nChannels;
121 format->nAvgBytesPerSec = format->nSamplesPerSec * format->nBlockAlign;
122 format->cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
124 // Add the parts which are unique to WAVE_FORMAT_EXTENSIBLE.
125 format_.Samples.wValidBitsPerSample = params.bits_per_sample();
126 format_.dwChannelMask = CoreAudioUtil::GetChannelConfig(device_id, eRender);
127 format_.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
129 // Store size (in different units) of audio packets which we expect to
130 // get from the audio endpoint device in each render event.
131 packet_size_frames_ = params.frames_per_buffer();
132 packet_size_bytes_ = params.GetBytesPerBuffer();
133 packet_size_ms_ = (1000.0 * packet_size_frames_) / params.sample_rate();
134 VLOG(1) << "Number of bytes per audio frame : " << format->nBlockAlign;
135 VLOG(1) << "Number of audio frames per packet: " << packet_size_frames_;
136 VLOG(1) << "Number of bytes per packet : " << packet_size_bytes_;
137 VLOG(1) << "Number of milliseconds per packet: " << packet_size_ms_;
139 // All events are auto-reset events and non-signaled initially.
141 // Create the event which the audio engine will signal each time
142 // a buffer becomes ready to be processed by the client.
143 audio_samples_render_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
144 DCHECK(audio_samples_render_event_.IsValid());
146 // Create the event which will be set in Stop() when capturing shall stop.
147 stop_render_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
148 DCHECK(stop_render_event_.IsValid());
151 WASAPIAudioOutputStream::~WASAPIAudioOutputStream() {}
153 bool WASAPIAudioOutputStream::Open() {
154 VLOG(1) << "WASAPIAudioOutputStream::Open()";
155 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
159 // Audio parameters must be identical to the preferred set of parameters
160 // if shared mode (default) is utilized.
161 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
162 if (!audio_parameters_are_valid_) {
163 LOG(ERROR) << "Audio parameters are not valid.";
168 // Create an IAudioClient interface for the default rendering IMMDevice.
169 ScopedComPtr<IAudioClient> audio_client;
170 if (device_id_.empty()) {
171 audio_client = CoreAudioUtil::CreateDefaultClient(eRender, device_role_);
173 ScopedComPtr<IMMDevice> device(CoreAudioUtil::CreateDevice(device_id_));
174 DLOG_IF(ERROR, !device) << "Failed to open device: " << device_id_;
176 audio_client = CoreAudioUtil::CreateClient(device);
182 // Extra sanity to ensure that the provided device format is still valid.
183 if (!CoreAudioUtil::IsFormatSupported(audio_client,
189 HRESULT hr = S_FALSE;
190 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
191 // Initialize the audio stream between the client and the device in shared
192 // mode and using event-driven buffer handling.
193 hr = CoreAudioUtil::SharedModeInitialize(
194 audio_client, &format_, audio_samples_render_event_.Get(),
195 &endpoint_buffer_size_frames_);
199 // We know from experience that the best possible callback sequence is
200 // achieved when the packet size (given by the native device period)
201 // is an even multiple of the endpoint buffer size.
202 // Examples: 48kHz => 960 % 480, 44.1kHz => 896 % 448 or 882 % 441.
203 if (endpoint_buffer_size_frames_ % packet_size_frames_ != 0) {
204 LOG(ERROR) << "Bailing out due to non-perfect timing.";
208 // TODO(henrika): break out to CoreAudioUtil::ExclusiveModeInitialize()
209 // when removing the enable-exclusive-audio flag.
210 hr = ExclusiveModeInitialization(audio_client,
211 audio_samples_render_event_.Get(),
212 &endpoint_buffer_size_frames_);
216 // The buffer scheme for exclusive mode streams is not designed for max
217 // flexibility. We only allow a "perfect match" between the packet size set
218 // by the user and the actual endpoint buffer size.
219 if (endpoint_buffer_size_frames_ != packet_size_frames_) {
220 LOG(ERROR) << "Bailing out due to non-perfect timing.";
225 // Create an IAudioRenderClient client for an initialized IAudioClient.
226 // The IAudioRenderClient interface enables us to write output data to
227 // a rendering endpoint buffer.
228 ScopedComPtr<IAudioRenderClient> audio_render_client =
229 CoreAudioUtil::CreateRenderClient(audio_client);
230 if (!audio_render_client)
233 // Store valid COM interfaces.
234 audio_client_ = audio_client;
235 audio_render_client_ = audio_render_client;
241 void WASAPIAudioOutputStream::Start(AudioSourceCallback* callback) {
242 VLOG(1) << "WASAPIAudioOutputStream::Start()";
243 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
247 if (render_thread_) {
248 CHECK_EQ(callback, source_);
254 // Create and start the thread that will drive the rendering by waiting for
256 render_thread_.reset(
257 new base::DelegateSimpleThread(this, "wasapi_render_thread"));
258 render_thread_->Start();
259 if (!render_thread_->HasBeenStarted()) {
260 LOG(ERROR) << "Failed to start WASAPI render thread.";
262 callback->OnError(this);
266 // Ensure that the endpoint buffer is prepared with silence.
267 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
268 if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
269 audio_client_, audio_render_client_)) {
270 LOG(ERROR) << "Failed to prepare endpoint buffers with silence.";
272 callback->OnError(this);
276 num_written_frames_ = endpoint_buffer_size_frames_;
278 // Start streaming data between the endpoint buffer and the audio engine.
279 HRESULT hr = audio_client_->Start();
281 LOG_GETLASTERROR(ERROR)
282 << "Failed to start output streaming: " << std::hex << hr;
284 callback->OnError(this);
288 void WASAPIAudioOutputStream::Stop() {
289 VLOG(1) << "WASAPIAudioOutputStream::Stop()";
290 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
294 // Stop output audio streaming.
295 HRESULT hr = audio_client_->Stop();
297 LOG_GETLASTERROR(ERROR)
298 << "Failed to stop output streaming: " << std::hex << hr;
299 source_->OnError(this);
302 // Make a local copy of |source_| since StopThread() will clear it.
303 AudioSourceCallback* callback = source_;
306 // Flush all pending data and reset the audio clock stream position to 0.
307 hr = audio_client_->Reset();
309 LOG_GETLASTERROR(ERROR)
310 << "Failed to reset streaming: " << std::hex << hr;
311 callback->OnError(this);
314 // Extra safety check to ensure that the buffers are cleared.
315 // If the buffers are not cleared correctly, the next call to Start()
316 // would fail with AUDCLNT_E_BUFFER_ERROR at IAudioRenderClient::GetBuffer().
317 // This check is is only needed for shared-mode streams.
318 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
319 UINT32 num_queued_frames = 0;
320 audio_client_->GetCurrentPadding(&num_queued_frames);
321 DCHECK_EQ(0u, num_queued_frames);
325 void WASAPIAudioOutputStream::Close() {
326 VLOG(1) << "WASAPIAudioOutputStream::Close()";
327 DCHECK_EQ(GetCurrentThreadId(), creating_thread_id_);
329 // It is valid to call Close() before calling open or Start().
330 // It is also valid to call Close() after Start() has been called.
333 // Inform the audio manager that we have been closed. This will cause our
335 manager_->ReleaseOutputStream(this);
338 void WASAPIAudioOutputStream::SetVolume(double volume) {
339 VLOG(1) << "SetVolume(volume=" << volume << ")";
340 float volume_float = static_cast<float>(volume);
341 if (volume_float < 0.0f || volume_float > 1.0f) {
344 volume_ = volume_float;
347 void WASAPIAudioOutputStream::GetVolume(double* volume) {
348 VLOG(1) << "GetVolume()";
349 *volume = static_cast<double>(volume_);
352 void WASAPIAudioOutputStream::Run() {
353 ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA);
355 // Increase the thread priority.
356 render_thread_->SetThreadPriority(base::kThreadPriority_RealtimeAudio);
358 // Enable MMCSS to ensure that this thread receives prioritized access to
360 DWORD task_index = 0;
361 HANDLE mm_task = avrt::AvSetMmThreadCharacteristics(L"Pro Audio",
364 (mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL));
366 // Failed to enable MMCSS on this thread. It is not fatal but can lead
367 // to reduced QoS at high load.
368 DWORD err = GetLastError();
369 LOG(WARNING) << "Failed to enable MMCSS (error code=" << err << ").";
372 HRESULT hr = S_FALSE;
376 HANDLE wait_array[] = { stop_render_event_,
377 audio_samples_render_event_ };
378 UINT64 device_frequency = 0;
380 // The IAudioClock interface enables us to monitor a stream's data
381 // rate and the current position in the stream. Allocate it before we
383 ScopedComPtr<IAudioClock> audio_clock;
384 hr = audio_client_->GetService(__uuidof(IAudioClock),
385 audio_clock.ReceiveVoid());
387 // The device frequency is the frequency generated by the hardware clock in
388 // the audio device. The GetFrequency() method reports a constant frequency.
389 hr = audio_clock->GetFrequency(&device_frequency);
392 PLOG_IF(ERROR, error) << "Failed to acquire IAudioClock interface: "
395 // Keep rendering audio until the stop event or the stream-switch event
396 // is signaled. An error event can also break the main thread loop.
397 while (playing && !error) {
398 // Wait for a close-down event, stream-switch event or a new render event.
399 DWORD wait_result = WaitForMultipleObjects(arraysize(wait_array),
404 switch (wait_result) {
405 case WAIT_OBJECT_0 + 0:
406 // |stop_render_event_| has been set.
409 case WAIT_OBJECT_0 + 1:
410 // |audio_samples_render_event_| has been set.
411 error = !RenderAudioFromSource(audio_clock, device_frequency);
419 if (playing && error) {
420 // Stop audio rendering since something has gone wrong in our main thread
421 // loop. Note that, we are still in a "started" state, hence a Stop() call
422 // is required to join the thread properly.
423 audio_client_->Stop();
424 PLOG(ERROR) << "WASAPI rendering failed.";
428 if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) {
429 PLOG(WARNING) << "Failed to disable MMCSS";
433 bool WASAPIAudioOutputStream::RenderAudioFromSource(
434 IAudioClock* audio_clock, UINT64 device_frequency) {
435 TRACE_EVENT0("audio", "RenderAudioFromSource");
437 HRESULT hr = S_FALSE;
438 UINT32 num_queued_frames = 0;
439 uint8* audio_data = NULL;
441 // Contains how much new data we can write to the buffer without
442 // the risk of overwriting previously written data that the audio
443 // engine has not yet read from the buffer.
444 size_t num_available_frames = 0;
446 if (share_mode_ == AUDCLNT_SHAREMODE_SHARED) {
447 // Get the padding value which represents the amount of rendering
448 // data that is queued up to play in the endpoint buffer.
449 hr = audio_client_->GetCurrentPadding(&num_queued_frames);
450 num_available_frames =
451 endpoint_buffer_size_frames_ - num_queued_frames;
453 DLOG(ERROR) << "Failed to retrieve amount of available space: "
458 // While the stream is running, the system alternately sends one
459 // buffer or the other to the client. This form of double buffering
460 // is referred to as "ping-ponging". Each time the client receives
461 // a buffer from the system (triggers this event) the client must
462 // process the entire buffer. Calls to the GetCurrentPadding method
463 // are unnecessary because the packet size must always equal the
464 // buffer size. In contrast to the shared mode buffering scheme,
465 // the latency for an event-driven, exclusive-mode stream depends
466 // directly on the buffer size.
467 num_available_frames = endpoint_buffer_size_frames_;
470 // Check if there is enough available space to fit the packet size
471 // specified by the client.
472 if (num_available_frames < packet_size_frames_)
475 DLOG_IF(ERROR, num_available_frames % packet_size_frames_ != 0)
476 << "Non-perfect timing detected (num_available_frames="
477 << num_available_frames << ", packet_size_frames="
478 << packet_size_frames_ << ")";
480 // Derive the number of packets we need to get from the client to
481 // fill up the available area in the endpoint buffer.
482 // |num_packets| will always be one for exclusive-mode streams and
483 // will be one in most cases for shared mode streams as well.
484 // However, we have found that two packets can sometimes be
486 size_t num_packets = (num_available_frames / packet_size_frames_);
488 for (size_t n = 0; n < num_packets; ++n) {
489 // Grab all available space in the rendering endpoint buffer
490 // into which the client can write a data packet.
491 hr = audio_render_client_->GetBuffer(packet_size_frames_,
494 DLOG(ERROR) << "Failed to use rendering audio buffer: "
499 // Derive the audio delay which corresponds to the delay between
500 // a render event and the time when the first audio sample in a
501 // packet is played out through the speaker. This delay value
502 // can typically be utilized by an acoustic echo-control (AEC)
503 // unit at the render side.
505 int audio_delay_bytes = 0;
506 hr = audio_clock->GetPosition(&position, NULL);
508 // Stream position of the sample that is currently playing
509 // through the speaker.
510 double pos_sample_playing_frames = format_.Format.nSamplesPerSec *
511 (static_cast<double>(position) / device_frequency);
513 // Stream position of the last sample written to the endpoint
514 // buffer. Note that, the packet we are about to receive in
515 // the upcoming callback is also included.
516 size_t pos_last_sample_written_frames =
517 num_written_frames_ + packet_size_frames_;
519 // Derive the actual delay value which will be fed to the
520 // render client using the OnMoreData() callback.
521 audio_delay_bytes = (pos_last_sample_written_frames -
522 pos_sample_playing_frames) * format_.Format.nBlockAlign;
525 // Read a data packet from the registered client source and
526 // deliver a delay estimate in the same callback to the client.
527 // A time stamp is also stored in the AudioBuffersState. This
528 // time stamp can be used at the client side to compensate for
529 // the delay between the usage of the delay value and the time
532 int frames_filled = source_->OnMoreData(
533 audio_bus_.get(), AudioBuffersState(0, audio_delay_bytes));
534 uint32 num_filled_bytes = frames_filled * format_.Format.nBlockAlign;
535 DCHECK_LE(num_filled_bytes, packet_size_bytes_);
537 // Note: If this ever changes to output raw float the data must be
538 // clipped and sanitized since it may come from an untrusted
539 // source such as NaCl.
540 const int bytes_per_sample = format_.Format.wBitsPerSample >> 3;
541 audio_bus_->Scale(volume_);
542 audio_bus_->ToInterleaved(
543 frames_filled, bytes_per_sample, audio_data);
546 // Release the buffer space acquired in the GetBuffer() call.
547 // Render silence if we were not able to fill up the buffer totally.
548 DWORD flags = (num_filled_bytes < packet_size_bytes_) ?
549 AUDCLNT_BUFFERFLAGS_SILENT : 0;
550 audio_render_client_->ReleaseBuffer(packet_size_frames_, flags);
552 num_written_frames_ += packet_size_frames_;
558 HRESULT WASAPIAudioOutputStream::ExclusiveModeInitialization(
559 IAudioClient* client, HANDLE event_handle, uint32* endpoint_buffer_size) {
560 DCHECK_EQ(share_mode_, AUDCLNT_SHAREMODE_EXCLUSIVE);
562 float f = (1000.0 * packet_size_frames_) / format_.Format.nSamplesPerSec;
563 REFERENCE_TIME requested_buffer_duration =
564 static_cast<REFERENCE_TIME>(f * 10000.0 + 0.5);
566 DWORD stream_flags = AUDCLNT_STREAMFLAGS_NOPERSIST;
567 bool use_event = (event_handle != NULL &&
568 event_handle != INVALID_HANDLE_VALUE);
570 stream_flags |= AUDCLNT_STREAMFLAGS_EVENTCALLBACK;
571 VLOG(2) << "stream_flags: 0x" << std::hex << stream_flags;
573 // Initialize the audio stream between the client and the device.
574 // For an exclusive-mode stream that uses event-driven buffering, the
575 // caller must specify nonzero values for hnsPeriodicity and
576 // hnsBufferDuration, and the values of these two parameters must be equal.
577 // The Initialize method allocates two buffers for the stream. Each buffer
578 // is equal in duration to the value of the hnsBufferDuration parameter.
579 // Following the Initialize call for a rendering stream, the caller should
580 // fill the first of the two buffers before starting the stream.
581 HRESULT hr = S_FALSE;
582 hr = client->Initialize(AUDCLNT_SHAREMODE_EXCLUSIVE,
584 requested_buffer_duration,
585 requested_buffer_duration,
586 reinterpret_cast<WAVEFORMATEX*>(&format_),
589 if (hr == AUDCLNT_E_BUFFER_SIZE_NOT_ALIGNED) {
590 LOG(ERROR) << "AUDCLNT_E_BUFFER_SIZE_NOT_ALIGNED";
592 UINT32 aligned_buffer_size = 0;
593 client->GetBufferSize(&aligned_buffer_size);
594 VLOG(1) << "Use aligned buffer size instead: " << aligned_buffer_size;
596 // Calculate new aligned periodicity. Each unit of reference time
597 // is 100 nanoseconds.
598 REFERENCE_TIME aligned_buffer_duration = static_cast<REFERENCE_TIME>(
599 (10000000.0 * aligned_buffer_size / format_.Format.nSamplesPerSec)
602 // It is possible to re-activate and re-initialize the audio client
603 // at this stage but we bail out with an error code instead and
604 // combine it with a log message which informs about the suggested
605 // aligned buffer size which should be used instead.
606 VLOG(1) << "aligned_buffer_duration: "
607 << static_cast<double>(aligned_buffer_duration / 10000.0)
609 } else if (hr == AUDCLNT_E_INVALID_DEVICE_PERIOD) {
610 // We will get this error if we try to use a smaller buffer size than
611 // the minimum supported size (usually ~3ms on Windows 7).
612 LOG(ERROR) << "AUDCLNT_E_INVALID_DEVICE_PERIOD";
618 hr = client->SetEventHandle(event_handle);
620 VLOG(1) << "IAudioClient::SetEventHandle: " << std::hex << hr;
625 UINT32 buffer_size_in_frames = 0;
626 hr = client->GetBufferSize(&buffer_size_in_frames);
628 VLOG(1) << "IAudioClient::GetBufferSize: " << std::hex << hr;
632 *endpoint_buffer_size = buffer_size_in_frames;
633 VLOG(2) << "endpoint buffer size: " << buffer_size_in_frames;
637 void WASAPIAudioOutputStream::StopThread() {
638 if (render_thread_ ) {
639 if (render_thread_->HasBeenStarted()) {
640 // Wait until the thread completes and perform cleanup.
641 SetEvent(stop_render_event_.Get());
642 render_thread_->Join();
645 render_thread_.reset();
647 // Ensure that we don't quit the main thread loop immediately next
648 // time Start() is called.
649 ResetEvent(stop_render_event_.Get());