#include <wtf/RefPtr.h>
using namespace std;
-
+
namespace WebCore {
// The value of 2 milliseconds is larger than the largest delay which exists in any HRTFKernel from the default HRTFDatabase (0.0136 seconds).
// We ASSERT the delay values used in process() with this value.
const double MaxDelayTimeSeconds = 0.002;
+const int UninitializedAzimuth = -1;
+const unsigned RenderingQuantum = 128;
+
HRTFPanner::HRTFPanner(float sampleRate)
: Panner(PanningModelHRTF)
, m_sampleRate(sampleRate)
- , m_isFirstRender(true)
- , m_azimuthIndex(0)
- , m_convolverL(fftSizeForSampleRate(sampleRate))
- , m_convolverR(fftSizeForSampleRate(sampleRate))
+ , m_crossfadeSelection(CrossfadeSelection1)
+ , m_azimuthIndex1(UninitializedAzimuth)
+ , m_elevation1(0)
+ , m_azimuthIndex2(UninitializedAzimuth)
+ , m_elevation2(0)
+ , m_crossfadeX(0)
+ , m_crossfadeIncr(0)
+ , m_convolverL1(fftSizeForSampleRate(sampleRate))
+ , m_convolverR1(fftSizeForSampleRate(sampleRate))
+ , m_convolverL2(fftSizeForSampleRate(sampleRate))
+ , m_convolverR2(fftSizeForSampleRate(sampleRate))
, m_delayLineL(MaxDelayTimeSeconds, sampleRate)
, m_delayLineR(MaxDelayTimeSeconds, sampleRate)
-{
+ , m_tempL1(RenderingQuantum)
+ , m_tempR1(RenderingQuantum)
+ , m_tempL2(RenderingQuantum)
+ , m_tempR2(RenderingQuantum)
+{
}
HRTFPanner::~HRTFPanner()
void HRTFPanner::reset()
{
- m_isFirstRender = true;
- m_convolverL.reset();
- m_convolverR.reset();
+ m_convolverL1.reset();
+ m_convolverR1.reset();
+ m_convolverL2.reset();
+ m_convolverR2.reset();
m_delayLineL.reset();
m_delayLineR.reset();
}
-static bool wrapDistance(int i, int j, int length)
-{
- int directDistance = abs(i - j);
- int indirectDistance = length - directDistance;
-
- return indirectDistance < directDistance;
-}
-
int HRTFPanner::calculateDesiredAzimuthIndexAndBlend(double azimuth, double& azimuthBlend)
{
// Convert the azimuth angle from the range -180 -> +180 into the range 0 -> 360.
// The azimuth index may then be calculated from this positive value.
if (azimuth < 0)
azimuth += 360.0;
-
- HRTFDatabase* database = HRTFDatabaseLoader::defaultHRTFDatabase();
+
+ HRTFDatabase* database = HRTFDatabaseLoader::defaultHRTFDatabase();
ASSERT(database);
-
+
int numberOfAzimuths = database->numberOfAzimuths();
const double angleBetweenAzimuths = 360.0 / numberOfAzimuths;
double desiredAzimuthIndexFloat = azimuth / angleBetweenAzimuths;
int desiredAzimuthIndex = static_cast<int>(desiredAzimuthIndexFloat);
azimuthBlend = desiredAzimuthIndexFloat - static_cast<double>(desiredAzimuthIndex);
-
+
// We don't immediately start using this azimuth index, but instead approach this index from the last index we rendered at.
// This minimizes the clicks and graininess for moving sources which occur otherwise.
desiredAzimuthIndex = max(0, desiredAzimuthIndex);
}
// This code only runs as long as the context is alive and after database has been loaded.
- HRTFDatabase* database = HRTFDatabaseLoader::defaultHRTFDatabase();
+ HRTFDatabase* database = HRTFDatabaseLoader::defaultHRTFDatabase();
ASSERT(database);
if (!database) {
outputBus->zero();
double azimuthBlend;
int desiredAzimuthIndex = calculateDesiredAzimuthIndexAndBlend(azimuth, azimuthBlend);
- // This algorithm currently requires that we process in power-of-two size chunks at least 128.
+ // Initially snap azimuth and elevation values to first values encountered.
+ if (m_azimuthIndex1 == UninitializedAzimuth) {
+ m_azimuthIndex1 = desiredAzimuthIndex;
+ m_elevation1 = elevation;
+ }
+ if (m_azimuthIndex2 == UninitializedAzimuth) {
+ m_azimuthIndex2 = desiredAzimuthIndex;
+ m_elevation2 = elevation;
+ }
+
+ // Cross-fade / transition over a period of around 45 milliseconds.
+ // This is an empirical value tuned to be a reasonable trade-off between
+ // smoothness and speed.
+ const double fadeFrames = sampleRate() <= 48000 ? 2048 : 4096;
+
+ // Check for azimuth and elevation changes, initiating a cross-fade if needed.
+ if (!m_crossfadeX && m_crossfadeSelection == CrossfadeSelection1) {
+ if (desiredAzimuthIndex != m_azimuthIndex1 || elevation != m_elevation1) {
+ // Cross-fade from 1 -> 2
+ m_crossfadeIncr = 1 / fadeFrames;
+ m_azimuthIndex2 = desiredAzimuthIndex;
+ m_elevation2 = elevation;
+ }
+ }
+ if (m_crossfadeX == 1 && m_crossfadeSelection == CrossfadeSelection2) {
+ if (desiredAzimuthIndex != m_azimuthIndex2 || elevation != m_elevation2) {
+ // Cross-fade from 2 -> 1
+ m_crossfadeIncr = -1 / fadeFrames;
+ m_azimuthIndex1 = desiredAzimuthIndex;
+ m_elevation1 = elevation;
+ }
+ }
+
+ // This algorithm currently requires that we process in power-of-two size chunks at least RenderingQuantum.
ASSERT(1UL << static_cast<int>(log2(framesToProcess)) == framesToProcess);
- ASSERT(framesToProcess >= 128);
-
- const unsigned framesPerSegment = 128;
+ ASSERT(framesToProcess >= RenderingQuantum);
+
+ const unsigned framesPerSegment = RenderingQuantum;
const unsigned numberOfSegments = framesToProcess / framesPerSegment;
for (unsigned segment = 0; segment < numberOfSegments; ++segment) {
- if (m_isFirstRender) {
- // Snap exactly to desired position (first time and after reset()).
- m_azimuthIndex = desiredAzimuthIndex;
- m_isFirstRender = false;
- } else {
- // Each segment renders with an azimuth index closer by one to the desired azimuth index.
- // Because inter-aural time delay is mostly a factor of azimuth and the delay is where the clicks and graininess come from,
- // we don't bother smoothing the elevations.
- int numberOfAzimuths = database->numberOfAzimuths();
- bool wrap = wrapDistance(m_azimuthIndex, desiredAzimuthIndex, numberOfAzimuths);
- if (wrap) {
- if (m_azimuthIndex < desiredAzimuthIndex)
- m_azimuthIndex = (m_azimuthIndex - 1 + numberOfAzimuths) % numberOfAzimuths;
- else if (m_azimuthIndex > desiredAzimuthIndex)
- m_azimuthIndex = (m_azimuthIndex + 1) % numberOfAzimuths;
- } else {
- if (m_azimuthIndex < desiredAzimuthIndex)
- m_azimuthIndex = (m_azimuthIndex + 1) % numberOfAzimuths;
- else if (m_azimuthIndex > desiredAzimuthIndex)
- m_azimuthIndex = (m_azimuthIndex - 1 + numberOfAzimuths) % numberOfAzimuths;
- }
- }
-
- // Get the HRTFKernels and interpolated delays.
- HRTFKernel* kernelL;
- HRTFKernel* kernelR;
- double frameDelayL;
- double frameDelayR;
- database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex, elevation, kernelL, kernelR, frameDelayL, frameDelayR);
-
- ASSERT(kernelL && kernelR);
- if (!kernelL || !kernelR) {
+ // Get the HRTFKernels and interpolated delays.
+ HRTFKernel* kernelL1;
+ HRTFKernel* kernelR1;
+ HRTFKernel* kernelL2;
+ HRTFKernel* kernelR2;
+ double frameDelayL1;
+ double frameDelayR1;
+ double frameDelayL2;
+ double frameDelayR2;
+ database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex1, m_elevation1, kernelL1, kernelR1, frameDelayL1, frameDelayR1);
+ database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex2, m_elevation2, kernelL2, kernelR2, frameDelayL2, frameDelayR2);
+
+ bool areKernelsGood = kernelL1 && kernelR1 && kernelL2 && kernelR2;
+ ASSERT(areKernelsGood);
+ if (!areKernelsGood) {
outputBus->zero();
return;
}
-
- ASSERT(frameDelayL / sampleRate() < MaxDelayTimeSeconds && frameDelayR / sampleRate() < MaxDelayTimeSeconds);
-
+
+ ASSERT(frameDelayL1 / sampleRate() < MaxDelayTimeSeconds && frameDelayR1 / sampleRate() < MaxDelayTimeSeconds);
+ ASSERT(frameDelayL2 / sampleRate() < MaxDelayTimeSeconds && frameDelayR2 / sampleRate() < MaxDelayTimeSeconds);
+
+ // Crossfade inter-aural delays based on transitions.
+ double frameDelayL = (1 - m_crossfadeX) * frameDelayL1 + m_crossfadeX * frameDelayL2;
+ double frameDelayR = (1 - m_crossfadeX) * frameDelayR1 + m_crossfadeX * frameDelayR2;
+
// Calculate the source and destination pointers for the current segment.
unsigned offset = segment * framesPerSegment;
const float* segmentSourceL = sourceL + offset;
m_delayLineL.process(segmentSourceL, segmentDestinationL, framesPerSegment);
m_delayLineR.process(segmentSourceR, segmentDestinationR, framesPerSegment);
- // Now do the convolutions in-place.
- m_convolverL.process(kernelL->fftFrame(), segmentDestinationL, segmentDestinationL, framesPerSegment);
- m_convolverR.process(kernelR->fftFrame(), segmentDestinationR, segmentDestinationR, framesPerSegment);
+ bool needsCrossfading = m_crossfadeIncr;
+
+ // Have the convolvers render directly to the final destination if we're not cross-fading.
+ float* convolutionDestinationL1 = needsCrossfading ? m_tempL1.data() : segmentDestinationL;
+ float* convolutionDestinationR1 = needsCrossfading ? m_tempR1.data() : segmentDestinationR;
+ float* convolutionDestinationL2 = needsCrossfading ? m_tempL2.data() : segmentDestinationL;
+ float* convolutionDestinationR2 = needsCrossfading ? m_tempR2.data() : segmentDestinationR;
+
+ // Now do the convolutions.
+ // Note that we avoid doing convolutions on both sets of convolvers if we're not currently cross-fading.
+
+ if (m_crossfadeSelection == CrossfadeSelection1 || needsCrossfading) {
+ m_convolverL1.process(kernelL1->fftFrame(), segmentDestinationL, convolutionDestinationL1, framesPerSegment);
+ m_convolverR1.process(kernelR1->fftFrame(), segmentDestinationR, convolutionDestinationR1, framesPerSegment);
+ }
+
+ if (m_crossfadeSelection == CrossfadeSelection2 || needsCrossfading) {
+ m_convolverL2.process(kernelL2->fftFrame(), segmentDestinationL, convolutionDestinationL2, framesPerSegment);
+ m_convolverR2.process(kernelR2->fftFrame(), segmentDestinationR, convolutionDestinationR2, framesPerSegment);
+ }
+
+ if (needsCrossfading) {
+ // Apply linear cross-fade.
+ float x = m_crossfadeX;
+ float incr = m_crossfadeIncr;
+ for (unsigned i = 0; i < framesPerSegment; ++i) {
+ segmentDestinationL[i] = (1 - x) * convolutionDestinationL1[i] + x * convolutionDestinationL2[i];
+ segmentDestinationR[i] = (1 - x) * convolutionDestinationR1[i] + x * convolutionDestinationR2[i];
+ x += incr;
+ }
+ // Update cross-fade value from local.
+ m_crossfadeX = x;
+
+ if (m_crossfadeIncr > 0 && fabs(m_crossfadeX - 1) < m_crossfadeIncr) {
+ // We've fully made the crossfade transition from 1 -> 2.
+ m_crossfadeSelection = CrossfadeSelection2;
+ m_crossfadeX = 1;
+ m_crossfadeIncr = 0;
+ } else if (m_crossfadeIncr < 0 && fabs(m_crossfadeX) < -m_crossfadeIncr) {
+ // We've fully made the crossfade transition from 2 -> 1.
+ m_crossfadeSelection = CrossfadeSelection1;
+ m_crossfadeX = 0;
+ m_crossfadeIncr = 0;
+ }
+ }
}
}
static size_t fftSizeForSampleRate(float sampleRate);
float sampleRate() const { return m_sampleRate; }
-
+
private:
// Given an azimuth angle in the range -180 -> +180, returns the corresponding azimuth index for the database,
// and azimuthBlend which is an interpolation value from 0 -> 1.
int calculateDesiredAzimuthIndexAndBlend(double azimuth, double& azimuthBlend);
float m_sampleRate;
-
- // m_isFirstRender and m_azimuthIndex are used to avoid harshly changing from rendering at one azimuth angle to another angle very far away.
- // Changing the azimuth gradually produces a smoother sound.
- bool m_isFirstRender;
- int m_azimuthIndex;
-
- FFTConvolver m_convolverL;
- FFTConvolver m_convolverR;
+
+ // We maintain two sets of convolvers for smooth cross-faded interpolations when
+ // then azimuth and elevation are dynamically changing.
+ // When the azimuth and elevation are not changing, we simply process with one of the two sets.
+ // Initially we use CrossfadeSelection1 corresponding to m_convolverL1 and m_convolverR1.
+ // Whenever the azimuth or elevation changes, a crossfade is initiated to transition
+ // to the new position. So if we're currently processing with CrossfadeSelection1, then
+ // we transition to CrossfadeSelection2 (and vice versa).
+ // If we're in the middle of a transition, then we wait until it is complete before
+ // initiating a new transition.
+
+ // Selects either the convolver set (m_convolverL1, m_convolverR1) or (m_convolverL2, m_convolverR2).
+ enum CrossfadeSelection {
+ CrossfadeSelection1,
+ CrossfadeSelection2
+ };
+
+ CrossfadeSelection m_crossfadeSelection;
+
+ // azimuth/elevation for CrossfadeSelection1.
+ int m_azimuthIndex1;
+ double m_elevation1;
+
+ // azimuth/elevation for CrossfadeSelection2.
+ int m_azimuthIndex2;
+ double m_elevation2;
+
+ // A crossfade value 0 <= m_crossfadeX <= 1.
+ float m_crossfadeX;
+
+ // Per-sample-frame crossfade value increment.
+ float m_crossfadeIncr;
+
+ FFTConvolver m_convolverL1;
+ FFTConvolver m_convolverR1;
+ FFTConvolver m_convolverL2;
+ FFTConvolver m_convolverR2;
+
DelayDSPKernel m_delayLineL;
DelayDSPKernel m_delayLineR;
+
+ AudioFloatArray m_tempL1;
+ AudioFloatArray m_tempR1;
+ AudioFloatArray m_tempL2;
+ AudioFloatArray m_tempR2;
};
} // namespace WebCore