1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // The format of these tests are to enqueue a known amount of data and then
6 // request the exact amount we expect in order to dequeue the known amount of
7 // data. This ensures that for any rate we are consuming input data at the
8 // correct rate. We always pass in a very large destination buffer with the
9 // expectation that FillBuffer() will fill as much as it can but no more.
11 #include <algorithm> // For std::min().
15 #include "base/bind.h"
16 #include "base/callback.h"
17 #include "base/memory/scoped_ptr.h"
18 #include "media/base/audio_buffer.h"
19 #include "media/base/audio_bus.h"
20 #include "media/base/buffers.h"
21 #include "media/base/channel_layout.h"
22 #include "media/base/test_helpers.h"
23 #include "media/filters/audio_renderer_algorithm.h"
24 #include "media/filters/wsola_internals.h"
25 #include "testing/gtest/include/gtest/gtest.h"
29 const int kFrameSize = 250;
30 const int kSamplesPerSecond = 3000;
31 const int kOutputDurationInSec = 10;
33 static void FillWithSquarePulseTrain(
34 int half_pulse_width, int offset, int num_samples, float* data) {
36 ASSERT_LE(offset, num_samples);
38 // Fill backward from |offset| - 1 toward zero, starting with -1, alternating
39 // between -1 and 1 every |pulse_width| samples.
41 for (int n = offset - 1, k = 0; n >= 0; --n, ++k) {
42 if (k >= half_pulse_width) {
49 // Fill forward from |offset| towards the end, starting with 1, alternating
50 // between 1 and -1 every |pulse_width| samples.
52 for (int n = offset, k = 0; n < num_samples; ++n, ++k) {
53 if (k >= half_pulse_width) {
61 static void FillWithSquarePulseTrain(
62 int half_pulse_width, int offset, int channel, AudioBus* audio_bus) {
63 FillWithSquarePulseTrain(half_pulse_width, offset, audio_bus->frames(),
64 audio_bus->channel(channel));
67 class AudioRendererAlgorithmTest : public testing::Test {
69 AudioRendererAlgorithmTest()
70 : frames_enqueued_(0),
72 channel_layout_(CHANNEL_LAYOUT_NONE),
73 sample_format_(kUnknownSampleFormat),
74 samples_per_second_(0),
75 bytes_per_sample_(0) {
78 ~AudioRendererAlgorithmTest() override {}
81 Initialize(CHANNEL_LAYOUT_STEREO, kSampleFormatS16, 3000);
84 void Initialize(ChannelLayout channel_layout,
85 SampleFormat sample_format,
86 int samples_per_second) {
87 channels_ = ChannelLayoutToChannelCount(channel_layout);
88 samples_per_second_ = samples_per_second;
89 channel_layout_ = channel_layout;
90 sample_format_ = sample_format;
91 bytes_per_sample_ = SampleFormatToBytesPerChannel(sample_format);
92 AudioParameters params(media::AudioParameters::AUDIO_PCM_LINEAR,
95 bytes_per_sample_ * 8,
96 samples_per_second / 100);
97 algorithm_.Initialize(params);
101 void FillAlgorithmQueue() {
102 // The value of the data is meaningless; we just want non-zero data to
103 // differentiate it from muted data.
104 scoped_refptr<AudioBuffer> buffer;
105 while (!algorithm_.IsQueueFull()) {
106 switch (sample_format_) {
107 case kSampleFormatU8:
108 buffer = MakeAudioBuffer<uint8>(
111 ChannelLayoutToChannelCount(channel_layout_),
118 case kSampleFormatS16:
119 buffer = MakeAudioBuffer<int16>(
122 ChannelLayoutToChannelCount(channel_layout_),
129 case kSampleFormatS32:
130 buffer = MakeAudioBuffer<int32>(
133 ChannelLayoutToChannelCount(channel_layout_),
141 NOTREACHED() << "Unrecognized format " << sample_format_;
143 algorithm_.EnqueueBuffer(buffer);
144 frames_enqueued_ += kFrameSize;
148 bool AudioDataIsMuted(AudioBus* audio_data, int frames_written) {
149 for (int ch = 0; ch < channels_; ++ch) {
150 for (int i = 0; i < frames_written; ++i) {
151 if (audio_data->channel(ch)[i] != 0.0f)
158 int ComputeConsumedFrames(int initial_frames_enqueued,
159 int initial_frames_buffered) {
160 int frame_delta = frames_enqueued_ - initial_frames_enqueued;
161 int buffered_delta = algorithm_.frames_buffered() - initial_frames_buffered;
162 int consumed = frame_delta - buffered_delta;
163 CHECK_GE(consumed, 0);
167 void TestPlaybackRate(double playback_rate) {
168 const int kDefaultBufferSize = algorithm_.samples_per_second() / 100;
169 const int kDefaultFramesRequested = kOutputDurationInSec *
170 algorithm_.samples_per_second();
173 playback_rate, kDefaultBufferSize, kDefaultFramesRequested);
176 void TestPlaybackRate(double playback_rate,
177 int buffer_size_in_frames,
178 int total_frames_requested) {
179 int initial_frames_enqueued = frames_enqueued_;
180 int initial_frames_buffered = algorithm_.frames_buffered();
182 scoped_ptr<AudioBus> bus =
183 AudioBus::Create(channels_, buffer_size_in_frames);
184 if (playback_rate == 0.0) {
185 int frames_written = algorithm_.FillBuffer(
186 bus.get(), buffer_size_in_frames, playback_rate);
187 EXPECT_EQ(0, frames_written);
191 bool expect_muted = (playback_rate < 0.5 || playback_rate > 4);
193 int frames_remaining = total_frames_requested;
194 bool first_fill_buffer = true;
195 while (frames_remaining > 0) {
196 int frames_requested = std::min(buffer_size_in_frames, frames_remaining);
198 algorithm_.FillBuffer(bus.get(), frames_requested, playback_rate);
199 ASSERT_GT(frames_written, 0) << "Requested: " << frames_requested
200 << ", playing at " << playback_rate;
202 // Do not check data if it is first pull out and only one frame written.
203 // The very first frame out of WSOLA is always zero because of
204 // overlap-and-add window, which is zero for the first sample. Therefore,
205 // if at very first buffer-fill only one frame is written, that is zero
206 // which might cause exception in CheckFakeData().
207 if (!first_fill_buffer || frames_written > 1)
208 ASSERT_EQ(expect_muted, AudioDataIsMuted(bus.get(), frames_written));
209 first_fill_buffer = false;
210 frames_remaining -= frames_written;
212 FillAlgorithmQueue();
215 int frames_consumed =
216 ComputeConsumedFrames(initial_frames_enqueued, initial_frames_buffered);
218 // If playing back at normal speed, we should always get back the same
219 // number of bytes requested.
220 if (playback_rate == 1.0) {
221 EXPECT_EQ(total_frames_requested, frames_consumed);
225 // Otherwise, allow |kMaxAcceptableDelta| difference between the target and
226 // actual playback rate.
227 // When |kSamplesPerSecond| and |total_frames_requested| are reasonably
228 // large, one can expect less than a 1% difference in most cases. In our
229 // current implementation, sped up playback is less accurate than slowed
230 // down playback, and for playback_rate > 1, playback rate generally gets
231 // less and less accurate the farther it drifts from 1 (though this is
233 double actual_playback_rate =
234 1.0 * frames_consumed / total_frames_requested;
235 EXPECT_NEAR(playback_rate, actual_playback_rate, playback_rate / 100.0);
238 void WsolaTest(float playback_rate) {
239 const int kSampleRateHz = 48000;
240 const ChannelLayout kChannelLayout = CHANNEL_LAYOUT_STEREO;
241 const int kBytesPerSample = 2;
242 const int kNumFrames = kSampleRateHz / 100; // 10 milliseconds.
244 channels_ = ChannelLayoutToChannelCount(kChannelLayout);
245 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout,
246 kSampleRateHz, kBytesPerSample * 8, kNumFrames);
247 algorithm_.Initialize(params);
249 // A pulse is 6 milliseconds (even number of samples).
250 const int kPulseWidthSamples = 6 * kSampleRateHz / 1000;
251 const int kHalfPulseWidthSamples = kPulseWidthSamples / 2;
253 // For the ease of implementation get 1 frame every call to FillBuffer().
254 scoped_ptr<AudioBus> output = AudioBus::Create(channels_, 1);
256 // Input buffer to inject pulses.
257 scoped_refptr<AudioBuffer> input =
258 AudioBuffer::CreateBuffer(kSampleFormatPlanarF32,
264 const std::vector<uint8*>& channel_data = input->channel_data();
266 // Fill |input| channels.
267 FillWithSquarePulseTrain(kHalfPulseWidthSamples, 0, kPulseWidthSamples,
268 reinterpret_cast<float*>(channel_data[0]));
269 FillWithSquarePulseTrain(kHalfPulseWidthSamples, kHalfPulseWidthSamples,
271 reinterpret_cast<float*>(channel_data[1]));
273 // A buffer for the output until a complete pulse is created. Then
274 // reference pulse is compared with this buffer.
275 scoped_ptr<AudioBus> pulse_buffer = AudioBus::Create(
276 channels_, kPulseWidthSamples);
278 const float kTolerance = 0.000001f;
279 // Equivalent of 4 seconds.
280 const int kNumRequestedPulses = kSampleRateHz * 4 / kPulseWidthSamples;
281 for (int n = 0; n < kNumRequestedPulses; ++n) {
282 int num_buffered_frames = 0;
283 while (num_buffered_frames < kPulseWidthSamples) {
284 int num_samples = algorithm_.FillBuffer(output.get(), 1, playback_rate);
285 ASSERT_LE(num_samples, 1);
286 if (num_samples > 0) {
287 output->CopyPartialFramesTo(0, num_samples, num_buffered_frames,
289 num_buffered_frames++;
291 algorithm_.EnqueueBuffer(input);
295 // Pulses in the first half of WSOLA AOL frame are not constructed
296 // perfectly. Do not check them.
298 for (int m = 0; m < channels_; ++m) {
299 const float* pulse_ch = pulse_buffer->channel(m);
301 // Because of overlap-and-add we might have round off error.
302 for (int k = 0; k < kPulseWidthSamples; ++k) {
303 ASSERT_NEAR(reinterpret_cast<float*>(channel_data[m])[k],
304 pulse_ch[k], kTolerance) << " loop " << n
305 << " channel/sample " << m << "/" << k;
310 // Zero out the buffer to be sure the next comparison is relevant.
311 pulse_buffer->Zero();
316 AudioRendererAlgorithm algorithm_;
317 int frames_enqueued_;
319 ChannelLayout channel_layout_;
320 SampleFormat sample_format_;
321 int samples_per_second_;
322 int bytes_per_sample_;
325 TEST_F(AudioRendererAlgorithmTest, FillBuffer_NormalRate) {
327 TestPlaybackRate(1.0);
330 TEST_F(AudioRendererAlgorithmTest, FillBuffer_NearlyNormalFasterRate) {
332 TestPlaybackRate(1.0001);
335 TEST_F(AudioRendererAlgorithmTest, FillBuffer_NearlyNormalSlowerRate) {
337 TestPlaybackRate(0.9999);
340 TEST_F(AudioRendererAlgorithmTest, FillBuffer_OneAndAQuarterRate) {
342 TestPlaybackRate(1.25);
345 TEST_F(AudioRendererAlgorithmTest, FillBuffer_OneAndAHalfRate) {
347 TestPlaybackRate(1.5);
350 TEST_F(AudioRendererAlgorithmTest, FillBuffer_DoubleRate) {
352 TestPlaybackRate(2.0);
355 TEST_F(AudioRendererAlgorithmTest, FillBuffer_EightTimesRate) {
357 TestPlaybackRate(8.0);
360 TEST_F(AudioRendererAlgorithmTest, FillBuffer_ThreeQuartersRate) {
362 TestPlaybackRate(0.75);
365 TEST_F(AudioRendererAlgorithmTest, FillBuffer_HalfRate) {
367 TestPlaybackRate(0.5);
370 TEST_F(AudioRendererAlgorithmTest, FillBuffer_QuarterRate) {
372 TestPlaybackRate(0.25);
375 TEST_F(AudioRendererAlgorithmTest, FillBuffer_Pause) {
377 TestPlaybackRate(0.0);
380 TEST_F(AudioRendererAlgorithmTest, FillBuffer_SlowDown) {
382 TestPlaybackRate(4.5);
383 TestPlaybackRate(3.0);
384 TestPlaybackRate(2.0);
385 TestPlaybackRate(1.0);
386 TestPlaybackRate(0.5);
387 TestPlaybackRate(0.25);
390 TEST_F(AudioRendererAlgorithmTest, FillBuffer_SpeedUp) {
392 TestPlaybackRate(0.25);
393 TestPlaybackRate(0.5);
394 TestPlaybackRate(1.0);
395 TestPlaybackRate(2.0);
396 TestPlaybackRate(3.0);
397 TestPlaybackRate(4.5);
400 TEST_F(AudioRendererAlgorithmTest, FillBuffer_JumpAroundSpeeds) {
402 TestPlaybackRate(2.1);
403 TestPlaybackRate(0.9);
404 TestPlaybackRate(0.6);
405 TestPlaybackRate(1.4);
406 TestPlaybackRate(0.3);
409 TEST_F(AudioRendererAlgorithmTest, FillBuffer_SmallBufferSize) {
411 static const int kBufferSizeInFrames = 1;
412 static const int kFramesRequested = kOutputDurationInSec * kSamplesPerSecond;
413 TestPlaybackRate(1.0, kBufferSizeInFrames, kFramesRequested);
414 TestPlaybackRate(0.5, kBufferSizeInFrames, kFramesRequested);
415 TestPlaybackRate(1.5, kBufferSizeInFrames, kFramesRequested);
418 TEST_F(AudioRendererAlgorithmTest, FillBuffer_LargeBufferSize) {
419 Initialize(CHANNEL_LAYOUT_STEREO, kSampleFormatS16, 44100);
420 TestPlaybackRate(1.0);
421 TestPlaybackRate(0.5);
422 TestPlaybackRate(1.5);
425 TEST_F(AudioRendererAlgorithmTest, FillBuffer_LowerQualityAudio) {
426 Initialize(CHANNEL_LAYOUT_MONO, kSampleFormatU8, kSamplesPerSecond);
427 TestPlaybackRate(1.0);
428 TestPlaybackRate(0.5);
429 TestPlaybackRate(1.5);
432 TEST_F(AudioRendererAlgorithmTest, FillBuffer_HigherQualityAudio) {
433 Initialize(CHANNEL_LAYOUT_STEREO, kSampleFormatS32, kSamplesPerSecond);
434 TestPlaybackRate(1.0);
435 TestPlaybackRate(0.5);
436 TestPlaybackRate(1.5);
439 TEST_F(AudioRendererAlgorithmTest, DotProduct) {
440 const int kChannels = 3;
441 const int kFrames = 20;
442 const int kHalfPulseWidth = 2;
444 scoped_ptr<AudioBus> a = AudioBus::Create(kChannels, kFrames);
445 scoped_ptr<AudioBus> b = AudioBus::Create(kChannels, kFrames);
447 scoped_ptr<float[]> dot_prod(new float[kChannels]);
449 FillWithSquarePulseTrain(kHalfPulseWidth, 0, 0, a.get());
450 FillWithSquarePulseTrain(kHalfPulseWidth, 1, 1, a.get());
451 FillWithSquarePulseTrain(kHalfPulseWidth, 2, 2, a.get());
453 FillWithSquarePulseTrain(kHalfPulseWidth, 0, 0, b.get());
454 FillWithSquarePulseTrain(kHalfPulseWidth, 0, 1, b.get());
455 FillWithSquarePulseTrain(kHalfPulseWidth, 0, 2, b.get());
457 internal::MultiChannelDotProduct(a.get(), 0, b.get(), 0, kFrames,
460 EXPECT_FLOAT_EQ(kFrames, dot_prod[0]);
461 EXPECT_FLOAT_EQ(0, dot_prod[1]);
462 EXPECT_FLOAT_EQ(-kFrames, dot_prod[2]);
464 internal::MultiChannelDotProduct(a.get(), 4, b.get(), 8, kFrames / 2,
467 EXPECT_FLOAT_EQ(kFrames / 2, dot_prod[0]);
468 EXPECT_FLOAT_EQ(0, dot_prod[1]);
469 EXPECT_FLOAT_EQ(-kFrames / 2, dot_prod[2]);
472 TEST_F(AudioRendererAlgorithmTest, MovingBlockEnergy) {
473 const int kChannels = 2;
474 const int kFrames = 20;
475 const int kFramesPerBlock = 3;
476 const int kNumBlocks = kFrames - (kFramesPerBlock - 1);
477 scoped_ptr<AudioBus> a = AudioBus::Create(kChannels, kFrames);
478 scoped_ptr<float[]> energies(new float[kChannels * kNumBlocks]);
479 float* ch_left = a->channel(0);
480 float* ch_right = a->channel(1);
482 // Fill up both channels.
483 for (int n = 0; n < kFrames; ++n) {
485 ch_right[n] = kFrames - 1 - n;
488 internal::MultiChannelMovingBlockEnergies(a.get(), kFramesPerBlock,
491 // Check if the energy of candidate blocks of each channel computed correctly.
492 for (int n = 0; n < kNumBlocks; ++n) {
493 float expected_energy = 0;
494 for (int k = 0; k < kFramesPerBlock; ++k)
495 expected_energy += ch_left[n + k] * ch_left[n + k];
497 // Left (first) channel.
498 EXPECT_FLOAT_EQ(expected_energy, energies[2 * n]);
501 for (int k = 0; k < kFramesPerBlock; ++k)
502 expected_energy += ch_right[n + k] * ch_right[n + k];
504 // Second (right) channel.
505 EXPECT_FLOAT_EQ(expected_energy, energies[2 * n + 1]);
509 TEST_F(AudioRendererAlgorithmTest, FullAndDecimatedSearch) {
510 const int kFramesInSearchRegion = 12;
511 const int kChannels = 2;
513 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f };
515 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.1f, 1.0f, 0.1f, 0.0f, 0.0f };
516 ASSERT_EQ(sizeof(ch_0), sizeof(ch_1));
517 ASSERT_EQ(static_cast<size_t>(kFramesInSearchRegion),
518 sizeof(ch_0) / sizeof(*ch_0));
519 scoped_ptr<AudioBus> search_region = AudioBus::Create(kChannels,
520 kFramesInSearchRegion);
521 float* ch = search_region->channel(0);
522 memcpy(ch, ch_0, sizeof(float) * kFramesInSearchRegion);
523 ch = search_region->channel(1);
524 memcpy(ch, ch_1, sizeof(float) * kFramesInSearchRegion);
526 const int kFramePerBlock = 4;
527 float target_0[] = { 1.0f, 1.0f, 1.0f, 0.0f };
528 float target_1[] = { 0.0f, 1.0f, 0.1f, 1.0f };
529 ASSERT_EQ(sizeof(target_0), sizeof(target_1));
530 ASSERT_EQ(static_cast<size_t>(kFramePerBlock),
531 sizeof(target_0) / sizeof(*target_0));
533 scoped_ptr<AudioBus> target = AudioBus::Create(kChannels,
535 ch = target->channel(0);
536 memcpy(ch, target_0, sizeof(float) * kFramePerBlock);
537 ch = target->channel(1);
538 memcpy(ch, target_1, sizeof(float) * kFramePerBlock);
540 scoped_ptr<float[]> energy_target(new float[kChannels]);
542 internal::MultiChannelDotProduct(target.get(), 0, target.get(), 0,
543 kFramePerBlock, energy_target.get());
545 ASSERT_EQ(3.f, energy_target[0]);
546 ASSERT_EQ(2.01f, energy_target[1]);
548 const int kNumCandidBlocks = kFramesInSearchRegion - (kFramePerBlock - 1);
549 scoped_ptr<float[]> energy_candid_blocks(new float[kNumCandidBlocks *
552 internal::MultiChannelMovingBlockEnergies(
553 search_region.get(), kFramePerBlock, energy_candid_blocks.get());
555 // Check the energy of the candidate blocks of the first channel.
556 ASSERT_FLOAT_EQ(0, energy_candid_blocks[0]);
557 ASSERT_FLOAT_EQ(0, energy_candid_blocks[2]);
558 ASSERT_FLOAT_EQ(1, energy_candid_blocks[4]);
559 ASSERT_FLOAT_EQ(2, energy_candid_blocks[6]);
560 ASSERT_FLOAT_EQ(3, energy_candid_blocks[8]);
561 ASSERT_FLOAT_EQ(3, energy_candid_blocks[10]);
562 ASSERT_FLOAT_EQ(2, energy_candid_blocks[12]);
563 ASSERT_FLOAT_EQ(1, energy_candid_blocks[14]);
564 ASSERT_FLOAT_EQ(0, energy_candid_blocks[16]);
566 // Check the energy of the candidate blocks of the second channel.
567 ASSERT_FLOAT_EQ(0, energy_candid_blocks[1]);
568 ASSERT_FLOAT_EQ(0, energy_candid_blocks[3]);
569 ASSERT_FLOAT_EQ(0, energy_candid_blocks[5]);
570 ASSERT_FLOAT_EQ(0, energy_candid_blocks[7]);
571 ASSERT_FLOAT_EQ(0.01f, energy_candid_blocks[9]);
572 ASSERT_FLOAT_EQ(1.01f, energy_candid_blocks[11]);
573 ASSERT_FLOAT_EQ(1.02f, energy_candid_blocks[13]);
574 ASSERT_FLOAT_EQ(1.02f, energy_candid_blocks[15]);
575 ASSERT_FLOAT_EQ(1.01f, energy_candid_blocks[17]);
577 // An interval which is of no effect.
578 internal::Interval exclude_interval = std::make_pair(-100, -10);
579 EXPECT_EQ(5, internal::FullSearch(
580 0, kNumCandidBlocks - 1, exclude_interval, target.get(),
581 search_region.get(), energy_target.get(), energy_candid_blocks.get()));
583 // Exclude the the best match.
584 exclude_interval = std::make_pair(2, 5);
585 EXPECT_EQ(7, internal::FullSearch(
586 0, kNumCandidBlocks - 1, exclude_interval, target.get(),
587 search_region.get(), energy_target.get(), energy_candid_blocks.get()));
589 // An interval which is of no effect.
590 exclude_interval = std::make_pair(-100, -10);
591 EXPECT_EQ(4, internal::DecimatedSearch(
592 4, exclude_interval, target.get(), search_region.get(),
593 energy_target.get(), energy_candid_blocks.get()));
595 EXPECT_EQ(5, internal::OptimalIndex(search_region.get(), target.get(),
599 TEST_F(AudioRendererAlgorithmTest, QuadraticInterpolation) {
600 // Arbitrary coefficients.
601 const float kA = 0.7f;
602 const float kB = 1.2f;
603 const float kC = 0.8f;
606 y_values[0] = kA - kB + kC;
608 y_values[2] = kA + kB + kC;
611 float extremum_value;
613 internal::QuadraticInterpolation(y_values, &extremum, &extremum_value);
615 float x_star = -kB / (2.f * kA);
616 float y_star = kA * x_star * x_star + kB * x_star + kC;
618 EXPECT_FLOAT_EQ(x_star, extremum);
619 EXPECT_FLOAT_EQ(y_star, extremum_value);
622 TEST_F(AudioRendererAlgorithmTest, QuadraticInterpolation_Colinear) {
629 float extremum_value;
631 internal::QuadraticInterpolation(y_values, &extremum, &extremum_value);
633 EXPECT_FLOAT_EQ(extremum, 0.0);
634 EXPECT_FLOAT_EQ(extremum_value, 1.0);
637 TEST_F(AudioRendererAlgorithmTest, WsolaSlowdown) {
641 TEST_F(AudioRendererAlgorithmTest, WsolaSpeedup) {