For rounding in chroma MC SSSE3, use 16-byte pw_3/4 instead of reading 8 bytes

author Ronald S. Bultje <rsbultje@gmail.com>

Fri, 24 Dec 2010 17:23:22 +0000 (17:23 +0000)

committer Ronald S. Bultje <rsbultje@gmail.com>

Fri, 24 Dec 2010 17:23:22 +0000 (17:23 +0000)
author Ronald S. Bultje <rsbultje@gmail.com>
Fri, 24 Dec 2010 17:23:22 +0000 (17:23 +0000)
committer Ronald S. Bultje <rsbultje@gmail.com>
Fri, 24 Dec 2010 17:23:22 +0000 (17:23 +0000)
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c

index db1e6e3..909ec41 100644 (file)
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -41,7 +41,7 @@ DECLARE_ALIGNED(8,  const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
  DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] =
  {0x8000000080000000ULL, 0x8000000080000000ULL};
  
-DECLARE_ALIGNED(8,  const uint64_t, ff_pw_3  ) = 0x0003000300030003ULL;
+DECLARE_ALIGNED(8,  const xmm_reg,  ff_pw_3  ) = {0x0003000300030003ULL, 0x0003000300030003ULL};
  DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_4  ) = {0x0004000400040004ULL, 0x0004000400040004ULL};
  DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_5  ) = {0x0005000500050005ULL, 0x0005000500050005ULL};
  DECLARE_ALIGNED(16, const xmm_reg,  ff_pw_8  ) = {0x0008000800080008ULL, 0x0008000800080008ULL};
diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h

index 2bd05ce..d9c2f44 100644 (file)
--- a/libavcodec/x86/dsputil_mmx.h
+++ b/libavcodec/x86/dsputil_mmx.h
@@ -32,7 +32,7 @@ extern const uint64_t ff_wtwo;
  
  extern const uint64_t ff_pdw_80000000[2];
  
-extern const uint64_t ff_pw_3;
+extern const xmm_reg  ff_pw_3;
  extern const xmm_reg  ff_pw_4;
  extern const xmm_reg  ff_pw_5;
  extern const xmm_reg  ff_pw_8;
diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm

index 6df82cc..3bb5ed4 100644 (file)
--- a/libavcodec/x86/h264_chromamc.asm
+++ b/libavcodec/x86/h264_chromamc.asm
@@ -530,9 +530,8 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
      add           r4, 8
      sub           r4, r5          ; 255*x+8 = x<<8 | (8-x)
      movd          m7, r4d
-    movq          m6, [rnd_1d_%2]
+    movdqa        m6, [rnd_1d_%2]
      pshuflw       m7, m7, 0
-    movlhps       m6, m6
      movlhps       m7, m7
  
  .next2xrows
@@ -568,9 +567,8 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
      add           r5, 8
      sub           r5, r4          ; 255*y+8 = y<<8 | (8-y)
      movd          m7, r5d
-    movq          m6, [rnd_1d_%2]
+    movdqa        m6, [rnd_1d_%2]
      pshuflw       m7, m7, 0
-    movlhps       m6, m6
      movlhps       m7, m7
  
  .next2yrows
author	Ronald S. Bultje <rsbultje@gmail.com>
	Fri, 24 Dec 2010 17:23:22 +0000 (17:23 +0000)
committer	Ronald S. Bultje <rsbultje@gmail.com>
	Fri, 24 Dec 2010 17:23:22 +0000 (17:23 +0000)
libavcodec/x86/dsputil_mmx.c		patch \| blob \| history
libavcodec/x86/dsputil_mmx.h		patch \| blob \| history
libavcodec/x86/h264_chromamc.asm		patch \| blob \| history