replace a few mov + psrlq with pshufw, there are more cases which could benefit from...
authorMichael Niedermayer <michaelni@gmx.at>
Wed, 21 Sep 2005 21:17:09 +0000 (21:17 +0000)
committerMichael Niedermayer <michaelni@gmx.at>
Wed, 21 Sep 2005 21:17:09 +0000 (21:17 +0000)
the trick is from various places (my own code in libpostproc, a patch on the x264 list, ...)

Originally committed as revision 4608 to svn://svn.ffmpeg.org/ffmpeg/trunk

libavcodec/i386/dsputil_mmx.c
libavcodec/i386/mpegvideo_mmx_template.c

index 5d9d4995c41228bdbf9fbb65fb8a573c3c9fab58..51ed07cd1a22f37cbf1a2e2340919d4b23925362 100644 (file)
@@ -1621,11 +1621,9 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride
         "movq 64(%1), %%mm1            \n\t"
         MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
         
-        "movq %%mm0, %%mm1             \n\t"
-        "psrlq $32, %%mm0              \n\t"
+        "pshufw $0x0E, %%mm0, %%mm1     \n\t"
         "paddusw %%mm1, %%mm0          \n\t"
-        "movq %%mm0, %%mm1             \n\t"
-        "psrlq $16, %%mm0              \n\t"
+        "pshufw $0x01, %%mm0, %%mm1     \n\t"
         "paddusw %%mm1, %%mm0          \n\t"
         "movd %%mm0, %0                        \n\t"
                 
index c9354dc1bbf7dd06e0d5c1c90306089550306afc..93f156ee55a620ebe7fda2d207adb794be77ad5a 100644 (file)
 #ifdef HAVE_MMX2
 #define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"
 #define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
-
+#define PMAX(a,b) \
+            "pshufw $0x0E," #a ", " #b "               \n\t"\
+           PMAXW(b, a)\
+            "pshufw $0x01," #a ", " #b "               \n\t"\
+           PMAXW(b, a)
 #else
 #define SPREADW(a) \
        "punpcklwd " #a ", " #a " \n\t"\
 #define PMAXW(a,b) \
        "psubusw " #a ", " #b " \n\t"\
        "paddw " #a ", " #b " \n\t"
+#define PMAX(a,b)  \
+            "movq " #a ", " #b "               \n\t"\
+            "psrlq $32, " #a "                 \n\t"\
+           PMAXW(b, a)\
+            "movq " #a ", " #b "               \n\t"\
+            "psrlq $16, " #a "                 \n\t"\
+           PMAXW(b, a)
+
 #endif
 
 static int RENAME(dct_quantize)(MpegEncContext *s,
@@ -119,12 +131,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
            PMAXW(%%mm0, %%mm3)
             "add $8, %%"REG_a"                 \n\t"
             " js 1b                            \n\t"
-            "movq %%mm3, %%mm0                 \n\t"
-            "psrlq $32, %%mm3                  \n\t"
-           PMAXW(%%mm0, %%mm3)
-            "movq %%mm3, %%mm0                 \n\t"
-            "psrlq $16, %%mm3                  \n\t"
-           PMAXW(%%mm0, %%mm3)
+           PMAX(%%mm3, %%mm0)
             "movd %%mm3, %%"REG_a"             \n\t"
             "movzb %%al, %%"REG_a"             \n\t" // last_non_zero_p1
            : "+a" (last_non_zero_p1)
@@ -170,12 +177,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
            PMAXW(%%mm0, %%mm3)
             "add $8, %%"REG_a"                 \n\t"
             " js 1b                            \n\t"
-            "movq %%mm3, %%mm0                 \n\t"
-            "psrlq $32, %%mm3                  \n\t"
-           PMAXW(%%mm0, %%mm3)
-            "movq %%mm3, %%mm0                 \n\t"
-            "psrlq $16, %%mm3                  \n\t"
-           PMAXW(%%mm0, %%mm3)
+           PMAX(%%mm3, %%mm0)
             "movd %%mm3, %%"REG_a"             \n\t"
             "movzb %%al, %%"REG_a"             \n\t" // last_non_zero_p1
            : "+a" (last_non_zero_p1)