HRTFPanner could have high-quality mode for smoother transitions

author crogers@google.com <crogers@google.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>

Sat, 28 Jan 2012 02:16:19 +0000 (02:16 +0000)

committer crogers@google.com <crogers@google.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>

Sat, 28 Jan 2012 02:16:19 +0000 (02:16 +0000)
author crogers@google.com <crogers@google.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 28 Jan 2012 02:16:19 +0000 (02:16 +0000)
committer crogers@google.com <crogers@google.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Sat, 28 Jan 2012 02:16:19 +0000 (02:16 +0000)
diff --git a/Source/WebCore/ChangeLog b/Source/WebCore/ChangeLog

index e38e5b4..a67b1b7 100644 (file)
--- a/Source/WebCore/ChangeLog
+++ b/Source/WebCore/ChangeLog
@@ -1,3 +1,20 @@
+2012-01-27  Chris Rogers  <crogers@google.com>
+
+        HRTFPanner could have high-quality mode for smoother transitions
+        https://bugs.webkit.org/show_bug.cgi?id=76470
+
+        Reviewed by Kenneth Russell.
+
+        * platform/audio/HRTFPanner.cpp:
+        (WebCore):
+        (WebCore::HRTFPanner::HRTFPanner):
+        (WebCore::HRTFPanner::reset):
+        (WebCore::HRTFPanner::calculateDesiredAzimuthIndexAndBlend):
+        (WebCore::HRTFPanner::pan):
+        * platform/audio/HRTFPanner.h:
+        (HRTFPanner):
+        ():
+
  2012-01-27  Mike Lawther  <mikelawther@chromium.org>
  
          CSS calc parsing stage
diff --git a/Source/WebCore/platform/audio/HRTFPanner.cpp b/Source/WebCore/platform/audio/HRTFPanner.cpp

index f09961f..4c69932 100644 (file)
--- a/Source/WebCore/platform/audio/HRTFPanner.cpp
+++ b/Source/WebCore/platform/audio/HRTFPanner.cpp
@@ -37,23 +37,37 @@
  #include <wtf/RefPtr.h>
  
  using namespace std;
- 
+
  namespace WebCore {
  
  // The value of 2 milliseconds is larger than the largest delay which exists in any HRTFKernel from the default HRTFDatabase (0.0136 seconds).
  // We ASSERT the delay values used in process() with this value.
  const double MaxDelayTimeSeconds = 0.002;
  
+const int UninitializedAzimuth = -1;
+const unsigned RenderingQuantum = 128;
+
  HRTFPanner::HRTFPanner(float sampleRate)
      : Panner(PanningModelHRTF)
      , m_sampleRate(sampleRate)
-    , m_isFirstRender(true)
-    , m_azimuthIndex(0)
-    , m_convolverL(fftSizeForSampleRate(sampleRate))
-    , m_convolverR(fftSizeForSampleRate(sampleRate))
+    , m_crossfadeSelection(CrossfadeSelection1)
+    , m_azimuthIndex1(UninitializedAzimuth)
+    , m_elevation1(0)
+    , m_azimuthIndex2(UninitializedAzimuth)
+    , m_elevation2(0)
+    , m_crossfadeX(0)
+    , m_crossfadeIncr(0)
+    , m_convolverL1(fftSizeForSampleRate(sampleRate))
+    , m_convolverR1(fftSizeForSampleRate(sampleRate))
+    , m_convolverL2(fftSizeForSampleRate(sampleRate))
+    , m_convolverR2(fftSizeForSampleRate(sampleRate))
      , m_delayLineL(MaxDelayTimeSeconds, sampleRate)
      , m_delayLineR(MaxDelayTimeSeconds, sampleRate)
-{ 
+    , m_tempL1(RenderingQuantum)
+    , m_tempR1(RenderingQuantum)
+    , m_tempL2(RenderingQuantum)
+    , m_tempR2(RenderingQuantum)
+{
  }
  
  HRTFPanner::~HRTFPanner()
@@ -71,31 +85,24 @@ size_t HRTFPanner::fftSizeForSampleRate(float sampleRate)
  
  void HRTFPanner::reset()
  {
-    m_isFirstRender = true;
-    m_convolverL.reset();
-    m_convolverR.reset();
+    m_convolverL1.reset();
+    m_convolverR1.reset();
+    m_convolverL2.reset();
+    m_convolverR2.reset();
      m_delayLineL.reset();
      m_delayLineR.reset();
  }
  
-static bool wrapDistance(int i, int j, int length)
-{
-    int directDistance = abs(i - j);
-    int indirectDistance = length - directDistance;
-
-    return indirectDistance < directDistance;
-}
-
  int HRTFPanner::calculateDesiredAzimuthIndexAndBlend(double azimuth, double& azimuthBlend)
  {
      // Convert the azimuth angle from the range -180 -> +180 into the range 0 -> 360.
      // The azimuth index may then be calculated from this positive value.
      if (azimuth < 0)
          azimuth += 360.0;
-    
-    HRTFDatabase* database = HRTFDatabaseLoader::defaultHRTFDatabase();    
+
+    HRTFDatabase* database = HRTFDatabaseLoader::defaultHRTFDatabase();
      ASSERT(database);
-    
+
      int numberOfAzimuths = database->numberOfAzimuths();
      const double angleBetweenAzimuths = 360.0 / numberOfAzimuths;
  
@@ -103,7 +110,7 @@ int HRTFPanner::calculateDesiredAzimuthIndexAndBlend(double azimuth, double& azi
      double desiredAzimuthIndexFloat = azimuth / angleBetweenAzimuths;
      int desiredAzimuthIndex = static_cast<int>(desiredAzimuthIndexFloat);
      azimuthBlend = desiredAzimuthIndexFloat - static_cast<double>(desiredAzimuthIndex);
-    
+
      // We don't immediately start using this azimuth index, but instead approach this index from the last index we rendered at.
      // This minimizes the clicks and graininess for moving sources which occur otherwise.
      desiredAzimuthIndex = max(0, desiredAzimuthIndex);
@@ -128,7 +135,7 @@ void HRTFPanner::pan(double desiredAzimuth, double elevation, const AudioBus* in
      }
  
      // This code only runs as long as the context is alive and after database has been loaded.
-    HRTFDatabase* database = HRTFDatabaseLoader::defaultHRTFDatabase();    
+    HRTFDatabase* database = HRTFDatabaseLoader::defaultHRTFDatabase();
      ASSERT(database);
      if (!database) {
          outputBus->zero();
@@ -159,52 +166,73 @@ void HRTFPanner::pan(double desiredAzimuth, double elevation, const AudioBus* in
      double azimuthBlend;
      int desiredAzimuthIndex = calculateDesiredAzimuthIndexAndBlend(azimuth, azimuthBlend);
  
-    // This algorithm currently requires that we process in power-of-two size chunks at least 128.
+    // Initially snap azimuth and elevation values to first values encountered.
+    if (m_azimuthIndex1 == UninitializedAzimuth) {
+        m_azimuthIndex1 = desiredAzimuthIndex;
+        m_elevation1 = elevation;
+    }
+    if (m_azimuthIndex2 == UninitializedAzimuth) {
+        m_azimuthIndex2 = desiredAzimuthIndex;
+        m_elevation2 = elevation;
+    }
+
+    // Cross-fade / transition over a period of around 45 milliseconds.
+    // This is an empirical value tuned to be a reasonable trade-off between
+    // smoothness and speed.
+    const double fadeFrames = sampleRate() <= 48000 ? 2048 : 4096;
+
+    // Check for azimuth and elevation changes, initiating a cross-fade if needed.
+    if (!m_crossfadeX && m_crossfadeSelection == CrossfadeSelection1) {
+        if (desiredAzimuthIndex != m_azimuthIndex1 || elevation != m_elevation1) {
+            // Cross-fade from 1 -> 2
+            m_crossfadeIncr = 1 / fadeFrames;
+            m_azimuthIndex2 = desiredAzimuthIndex;
+            m_elevation2 = elevation;
+        }
+    }
+    if (m_crossfadeX == 1 && m_crossfadeSelection == CrossfadeSelection2) {
+        if (desiredAzimuthIndex != m_azimuthIndex2 || elevation != m_elevation2) {
+            // Cross-fade from 2 -> 1
+            m_crossfadeIncr = -1 / fadeFrames;
+            m_azimuthIndex1 = desiredAzimuthIndex;
+            m_elevation1 = elevation;
+        }
+    }
+
+    // This algorithm currently requires that we process in power-of-two size chunks at least RenderingQuantum.
      ASSERT(1UL << static_cast<int>(log2(framesToProcess)) == framesToProcess);
-    ASSERT(framesToProcess >= 128);
-    
-    const unsigned framesPerSegment = 128;
+    ASSERT(framesToProcess >= RenderingQuantum);
+
+    const unsigned framesPerSegment = RenderingQuantum;
      const unsigned numberOfSegments = framesToProcess / framesPerSegment;
  
      for (unsigned segment = 0; segment < numberOfSegments; ++segment) {
-        if (m_isFirstRender) {
-            // Snap exactly to desired position (first time and after reset()).
-            m_azimuthIndex = desiredAzimuthIndex;
-            m_isFirstRender = false;
-        } else {
-            // Each segment renders with an azimuth index closer by one to the desired azimuth index.
-            // Because inter-aural time delay is mostly a factor of azimuth and the delay is where the clicks and graininess come from,
-            // we don't bother smoothing the elevations.
-            int numberOfAzimuths = database->numberOfAzimuths();
-            bool wrap = wrapDistance(m_azimuthIndex, desiredAzimuthIndex, numberOfAzimuths);
-            if (wrap) {
-                if (m_azimuthIndex < desiredAzimuthIndex)
-                    m_azimuthIndex = (m_azimuthIndex - 1 + numberOfAzimuths) % numberOfAzimuths;
-                else if (m_azimuthIndex > desiredAzimuthIndex)
-                    m_azimuthIndex = (m_azimuthIndex + 1) % numberOfAzimuths;
-            } else {
-                if (m_azimuthIndex < desiredAzimuthIndex)
-                    m_azimuthIndex = (m_azimuthIndex + 1) % numberOfAzimuths;
-                else if (m_azimuthIndex > desiredAzimuthIndex)
-                    m_azimuthIndex = (m_azimuthIndex - 1 + numberOfAzimuths) % numberOfAzimuths;
-            }
-        }
-        
-        // Get the HRTFKernels and interpolated delays.    
-        HRTFKernel* kernelL;
-        HRTFKernel* kernelR;
-        double frameDelayL;
-        double frameDelayR;
-        database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex, elevation, kernelL, kernelR, frameDelayL, frameDelayR);
-
-        ASSERT(kernelL && kernelR);
-        if (!kernelL || !kernelR) {
+        // Get the HRTFKernels and interpolated delays.
+        HRTFKernel* kernelL1;
+        HRTFKernel* kernelR1;
+        HRTFKernel* kernelL2;
+        HRTFKernel* kernelR2;
+        double frameDelayL1;
+        double frameDelayR1;
+        double frameDelayL2;
+        double frameDelayR2;
+        database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex1, m_elevation1, kernelL1, kernelR1, frameDelayL1, frameDelayR1);
+        database->getKernelsFromAzimuthElevation(azimuthBlend, m_azimuthIndex2, m_elevation2, kernelL2, kernelR2, frameDelayL2, frameDelayR2);
+
+        bool areKernelsGood = kernelL1 && kernelR1 && kernelL2 && kernelR2;
+        ASSERT(areKernelsGood);
+        if (!areKernelsGood) {
              outputBus->zero();
              return;
          }
-        
-        ASSERT(frameDelayL / sampleRate() < MaxDelayTimeSeconds && frameDelayR / sampleRate() < MaxDelayTimeSeconds);
-            
+
+        ASSERT(frameDelayL1 / sampleRate() < MaxDelayTimeSeconds && frameDelayR1 / sampleRate() < MaxDelayTimeSeconds);
+        ASSERT(frameDelayL2 / sampleRate() < MaxDelayTimeSeconds && frameDelayR2 / sampleRate() < MaxDelayTimeSeconds);
+
+        // Crossfade inter-aural delays based on transitions.
+        double frameDelayL = (1 - m_crossfadeX) * frameDelayL1 + m_crossfadeX * frameDelayL2;
+        double frameDelayR = (1 - m_crossfadeX) * frameDelayR1 + m_crossfadeX * frameDelayR2;
+
          // Calculate the source and destination pointers for the current segment.
          unsigned offset = segment * framesPerSegment;
          const float* segmentSourceL = sourceL + offset;
@@ -218,9 +246,51 @@ void HRTFPanner::pan(double desiredAzimuth, double elevation, const AudioBus* in
          m_delayLineL.process(segmentSourceL, segmentDestinationL, framesPerSegment);
          m_delayLineR.process(segmentSourceR, segmentDestinationR, framesPerSegment);
  
-        // Now do the convolutions in-place.
-        m_convolverL.process(kernelL->fftFrame(), segmentDestinationL, segmentDestinationL, framesPerSegment);
-        m_convolverR.process(kernelR->fftFrame(), segmentDestinationR, segmentDestinationR, framesPerSegment);
+        bool needsCrossfading = m_crossfadeIncr;
+        
+        // Have the convolvers render directly to the final destination if we're not cross-fading.
+        float* convolutionDestinationL1 = needsCrossfading ? m_tempL1.data() : segmentDestinationL;
+        float* convolutionDestinationR1 = needsCrossfading ? m_tempR1.data() : segmentDestinationR;
+        float* convolutionDestinationL2 = needsCrossfading ? m_tempL2.data() : segmentDestinationL;
+        float* convolutionDestinationR2 = needsCrossfading ? m_tempR2.data() : segmentDestinationR;
+
+        // Now do the convolutions.
+        // Note that we avoid doing convolutions on both sets of convolvers if we're not currently cross-fading.
+        
+        if (m_crossfadeSelection == CrossfadeSelection1 || needsCrossfading) {
+            m_convolverL1.process(kernelL1->fftFrame(), segmentDestinationL, convolutionDestinationL1, framesPerSegment);
+            m_convolverR1.process(kernelR1->fftFrame(), segmentDestinationR, convolutionDestinationR1, framesPerSegment);
+        }
+
+        if (m_crossfadeSelection == CrossfadeSelection2 || needsCrossfading) {
+            m_convolverL2.process(kernelL2->fftFrame(), segmentDestinationL, convolutionDestinationL2, framesPerSegment);
+            m_convolverR2.process(kernelR2->fftFrame(), segmentDestinationR, convolutionDestinationR2, framesPerSegment);
+        }
+        
+        if (needsCrossfading) {
+            // Apply linear cross-fade.
+            float x = m_crossfadeX;
+            float incr = m_crossfadeIncr;
+            for (unsigned i = 0; i < framesPerSegment; ++i) {
+                segmentDestinationL[i] = (1 - x) * convolutionDestinationL1[i] + x * convolutionDestinationL2[i];
+                segmentDestinationR[i] = (1 - x) * convolutionDestinationR1[i] + x * convolutionDestinationR2[i];
+                x += incr;
+            }
+            // Update cross-fade value from local.
+            m_crossfadeX = x;
+
+            if (m_crossfadeIncr > 0 && fabs(m_crossfadeX - 1) < m_crossfadeIncr) {
+                // We've fully made the crossfade transition from 1 -> 2.
+                m_crossfadeSelection = CrossfadeSelection2;
+                m_crossfadeX = 1;
+                m_crossfadeIncr = 0;
+            } else if (m_crossfadeIncr < 0 && fabs(m_crossfadeX) < -m_crossfadeIncr) {
+                // We've fully made the crossfade transition from 2 -> 1.
+                m_crossfadeSelection = CrossfadeSelection1;
+                m_crossfadeX = 0;
+                m_crossfadeIncr = 0;
+            }
+        }
      }
  }
  
diff --git a/Source/WebCore/platform/audio/HRTFPanner.h b/Source/WebCore/platform/audio/HRTFPanner.h

index ad6f79a..f5af1d1 100644 (file)
--- a/Source/WebCore/platform/audio/HRTFPanner.h
+++ b/Source/WebCore/platform/audio/HRTFPanner.h
@@ -44,23 +44,58 @@ public:
      static size_t fftSizeForSampleRate(float sampleRate);
  
      float sampleRate() const { return m_sampleRate; }
-    
+
  private:
      // Given an azimuth angle in the range -180 -> +180, returns the corresponding azimuth index for the database,
      // and azimuthBlend which is an interpolation value from 0 -> 1.
      int calculateDesiredAzimuthIndexAndBlend(double azimuth, double& azimuthBlend);
  
      float m_sampleRate;
-    
-    // m_isFirstRender and m_azimuthIndex are used to avoid harshly changing from rendering at one azimuth angle to another angle very far away.
-    // Changing the azimuth gradually produces a smoother sound.
-    bool m_isFirstRender;
-    int m_azimuthIndex;
-
-    FFTConvolver m_convolverL;
-    FFTConvolver m_convolverR;
+
+    // We maintain two sets of convolvers for smooth cross-faded interpolations when
+    // then azimuth and elevation are dynamically changing.
+    // When the azimuth and elevation are not changing, we simply process with one of the two sets.
+    // Initially we use CrossfadeSelection1 corresponding to m_convolverL1 and m_convolverR1.
+    // Whenever the azimuth or elevation changes, a crossfade is initiated to transition
+    // to the new position. So if we're currently processing with CrossfadeSelection1, then
+    // we transition to CrossfadeSelection2 (and vice versa).
+    // If we're in the middle of a transition, then we wait until it is complete before
+    // initiating a new transition.
+
+    // Selects either the convolver set (m_convolverL1, m_convolverR1) or (m_convolverL2, m_convolverR2).
+    enum CrossfadeSelection {
+        CrossfadeSelection1,
+        CrossfadeSelection2
+    };
+
+    CrossfadeSelection m_crossfadeSelection;
+
+    // azimuth/elevation for CrossfadeSelection1.
+    int m_azimuthIndex1;
+    double m_elevation1;
+
+    // azimuth/elevation for CrossfadeSelection2.
+    int m_azimuthIndex2;
+    double m_elevation2;
+
+    // A crossfade value 0 <= m_crossfadeX <= 1.
+    float m_crossfadeX;
+
+    // Per-sample-frame crossfade value increment.
+    float m_crossfadeIncr;
+
+    FFTConvolver m_convolverL1;
+    FFTConvolver m_convolverR1;
+    FFTConvolver m_convolverL2;
+    FFTConvolver m_convolverR2;
+
      DelayDSPKernel m_delayLineL;
      DelayDSPKernel m_delayLineR;
+
+    AudioFloatArray m_tempL1;
+    AudioFloatArray m_tempR1;
+    AudioFloatArray m_tempL2;
+    AudioFloatArray m_tempR2;
  };
  
  } // namespace WebCore
author	crogers@google.com <crogers@google.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
	Sat, 28 Jan 2012 02:16:19 +0000 (02:16 +0000)
committer	crogers@google.com <crogers@google.com@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
	Sat, 28 Jan 2012 02:16:19 +0000 (02:16 +0000)
Source/WebCore/ChangeLog		patch \| blob \| history
Source/WebCore/platform/audio/HRTFPanner.cpp		patch \| blob \| history
Source/WebCore/platform/audio/HRTFPanner.h		patch \| blob \| history