cosmetics in imdct_sse

author Loren Merritt <lorenm@u.washington.edu>

Sat, 28 Aug 2010 21:03:13 +0000 (21:03 +0000)

committer Loren Merritt <lorenm@u.washington.edu>

Sat, 28 Aug 2010 21:03:13 +0000 (21:03 +0000)
author Loren Merritt <lorenm@u.washington.edu>
Sat, 28 Aug 2010 21:03:13 +0000 (21:03 +0000)
committer Loren Merritt <lorenm@u.washington.edu>
Sat, 28 Aug 2010 21:03:13 +0000 (21:03 +0000)
diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm

index 31176d6..b75ec0c 100644 (file)
--- a/libavcodec/x86/fft_mmx.asm
+++ b/libavcodec/x86/fft_mmx.asm
@@ -532,20 +532,15 @@ INIT_XMM
      unpckhps xmm0, xmm2
  %endmacro
  
-%macro PREROTATEW 3 ;addr1, addr2, xmm
-    movlps   %1,   %3
-    movhps   %2,   %3
-%endmacro
-
  %macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5
      movaps   xmm6, [%4+%1*2]
      movaps   %2,   [%4+%1*2+0x10]
      movaps   %3,   xmm6
      movaps   xmm7, %2
-    mulps    xmm6, [%5+%1*1]
-    mulps    %2,   [%6+%1*1]
-    mulps    %3,   [%6+%1*1]
-    mulps    xmm7, [%5+%1*1]
+    mulps    xmm6, [%5+%1]
+    mulps    %2,   [%6+%1]
+    mulps    %3,   [%6+%1]
+    mulps    xmm7, [%5+%1]
      subps    %2,   xmm6
      addps    %3,   xmm7
  %endmacro
@@ -576,8 +571,6 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample
  %define rrevtab r10
  %define rtcos   r11
  %define rtsin   r12
-    push  r10
-    push  r11
      push  r12
      push  r13
      push  r14
@@ -620,21 +613,25 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample
  
      PREROTATER r4, r3, r2, rtcos, rtsin
  %ifdef ARCH_X86_64
-    movzx  r5,  word [rrevtab+r4*1-4]
-    movzx  r6,  word [rrevtab+r4*1-2]
-    movzx  r13, word [rrevtab+r3*1]
-    movzx  r14, word [rrevtab+r3*1+2]
-    PREROTATEW [r1+r5 *8], [r1+r6 *8], xmm0
-    PREROTATEW [r1+r13*8], [r1+r14*8], xmm1
+    movzx  r5,  word [rrevtab+r4-4]
+    movzx  r6,  word [rrevtab+r4-2]
+    movzx  r13, word [rrevtab+r3]
+    movzx  r14, word [rrevtab+r3+2]
+    movlps [r1+r5 *8], xmm0
+    movhps [r1+r6 *8], xmm0
+    movlps [r1+r13*8], xmm1
+    movhps [r1+r14*8], xmm1
      add    r4, 4
  %else
      mov    r6, [esp]
-    movzx  r5, word [r6+r4*1-4]
-    movzx  r4, word [r6+r4*1-2]
-    PREROTATEW [r1+r5*8], [r1+r4*8], xmm0
-    movzx  r5, word [r6+r3*1]
-    movzx  r4, word [r6+r3*1+2]
-    PREROTATEW [r1+r5*8], [r1+r4*8], xmm1
+    movzx  r5, word [r6+r4-4]
+    movzx  r4, word [r6+r4-2]
+    movlps [r1+r5*8], xmm0
+    movhps [r1+r4*8], xmm0
+    movzx  r5, word [r6+r3]
+    movzx  r4, word [r6+r3+2]
+    movlps [r1+r5*8], xmm1
+    movhps [r1+r4*8], xmm1
  %endif
      sub    r3, 4
      jns    .pre
@@ -663,8 +660,6 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample
      pop  r14
      pop  r13
      pop  r12
-    pop  r11
-    pop  r10
  %else
      add esp, 12
  %endif
author	Loren Merritt <lorenm@u.washington.edu>
	Sat, 28 Aug 2010 21:03:13 +0000 (21:03 +0000)
committer	Loren Merritt <lorenm@u.washington.edu>
	Sat, 28 Aug 2010 21:03:13 +0000 (21:03 +0000)