optimize decode_subframe_lpc()
authorLoren Merritt <lorenm@u.washington.edu>
Tue, 25 Sep 2007 23:31:13 +0000 (23:31 +0000)
committerLoren Merritt <lorenm@u.washington.edu>
Tue, 25 Sep 2007 23:31:13 +0000 (23:31 +0000)
50%/67%/43% faster on core2/k8/p4, making flac decoding overall 24%/25%/11% faster

Originally committed as revision 10586 to svn://svn.ffmpeg.org/ffmpeg/trunk

libavcodec/flac.c

index 74b245d0e5af194c46bd9962d9ab5640fb24b434..d8e8813d9eb86c805e4005c97aec52d7918d5737 100644 (file)
@@ -359,10 +359,25 @@ static int decode_subframe_lpc(FLACContext *s, int channel, int pred_order)
             s->decoded[channel][i] += sum >> qlevel;
         }
     } else {
-        int sum;
-        for (i = pred_order; i < s->blocksize; i++)
+        for (i = pred_order; i < s->blocksize-1; i += 2)
         {
-            sum = 0;
+            int c = coeffs[pred_order-1];
+            int s0 = c * s->decoded[channel][i-pred_order];
+            int s1 = 0;
+            for (j = pred_order-1; j > 0; j--)
+            {
+                int d = s->decoded[channel][i-j];
+                s1 += c*d;
+                c = coeffs[j-1];
+                s0 += c*d;
+            }
+            s0 = s->decoded[channel][i] += s0 >> qlevel;
+            s1 += c * s0;
+            s->decoded[channel][i+1] += s1 >> qlevel;
+        }
+        if (i < s->blocksize)
+        {
+            int sum = 0;
             for (j = 0; j < pred_order; j++)
                 sum += coeffs[j] * s->decoded[channel][i-j-1];
             s->decoded[channel][i] += sum >> qlevel;