vp8_block_error_xmm: remove unnecessary instructions
authorJim Bankoski <jimbankoski@google.com>
Wed, 16 Jun 2010 16:36:53 +0000 (12:36 -0400)
committerJohn Koleszar <jkoleszar@google.com>
Fri, 18 Jun 2010 17:34:43 +0000 (13:34 -0400)
Remove a couple instructions from this function which weren't
necessary for correct execution.

Change-Id: Ib649674f140689f7e5c1530c35686241688a3151

vp8/encoder/x86/encodeopt.asm

index 842fbdc..b4fe576 100644 (file)
@@ -11,7 +11,6 @@
 
 %include "vpx_ports/x86_abi_support.asm"
 
-
 ;int vp8_block_error_xmm(short *coeff_ptr,  short *dcoef_ptr)
 global sym(vp8_block_error_xmm)
 sym(vp8_block_error_xmm):
@@ -20,11 +19,9 @@ sym(vp8_block_error_xmm):
     SHADOW_ARGS_TO_STACK 2
     push rsi
     push rdi
-    ; end prolog
-
+    ; end prologue
 
         mov         rsi,        arg(0) ;coeff_ptr
-        pxor        xmm7,       xmm7
 
         mov         rdi,        arg(1) ;dcoef_ptr
         movdqa      xmm3,       [rsi]
@@ -33,31 +30,25 @@ sym(vp8_block_error_xmm):
         movdqa      xmm5,       [rsi+16]
 
         movdqa      xmm6,       [rdi+16]
-        pxor        xmm1,       xmm1    ; from movd xmm1, dc; dc=0
+        psubw       xmm3,       xmm4
 
-        movdqa      xmm2,       xmm7
         psubw       xmm5,       xmm6
-
-        por         xmm1,       xmm2
+        pmaddwd     xmm3,       xmm3
         pmaddwd     xmm5,       xmm5
 
-        pcmpeqw     xmm1,       xmm7
-        psubw       xmm3,       xmm4
+        paddd       xmm3,       xmm5
 
-        pand        xmm1,       xmm3
-        pmaddwd     xmm1,       xmm1
-
-        paddd       xmm1,       xmm5
-        movdqa      xmm0,       xmm1
+        pxor        xmm7,       xmm7
+        movdqa      xmm0,       xmm3
 
         punpckldq   xmm0,       xmm7
-        punpckhdq   xmm1,       xmm7
+        punpckhdq   xmm3,       xmm7
 
-        paddd       xmm0,       xmm1
-        movdqa      xmm1,       xmm0
+        paddd       xmm0,       xmm3
+        movdqa      xmm3,       xmm0
 
         psrldq      xmm0,       8
-        paddd       xmm0,       xmm1
+        paddd       xmm0,       xmm3
 
         movd        rax,        xmm0
 
@@ -68,7 +59,6 @@ sym(vp8_block_error_xmm):
     pop         rbp
     ret
 
-
 ;int vp8_block_error_mmx(short *coeff_ptr,  short *dcoef_ptr)
 global sym(vp8_block_error_mmx)
 sym(vp8_block_error_mmx):