x86: h264: Remove 3dnow QPEL code

author Daniel Kang <daniel.d.kang@gmail.com>

Sat, 13 Oct 2012 15:04:49 +0000 (10:04 -0500)

committer Diego Biurrun <diego@biurrun.de>

Sun, 25 Nov 2012 19:32:55 +0000 (20:32 +0100)
author Daniel Kang <daniel.d.kang@gmail.com>
Sat, 13 Oct 2012 15:04:49 +0000 (10:04 -0500)
committer Diego Biurrun <diego@biurrun.de>
Sun, 25 Nov 2012 19:32:55 +0000 (20:32 +0100)
diff --git a/libavcodec/x86/dsputil_avg_template.c b/libavcodec/x86/dsputil_avg_template.c

index 89eaedf9ea2143e24f53f5b30bab16e599558d5b..cffcd4e2847cd6fde533767973dbc749eacd530a 100644 (file)
--- a/libavcodec/x86/dsputil_avg_template.c
+++ b/libavcodec/x86/dsputil_avg_template.c
@@ -55,6 +55,7 @@ static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_
          :"%"REG_a, "memory");
  }
  
+#ifndef SKIP_FOR_3DNOW
  static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
  {
      __asm__ volatile(
@@ -332,6 +333,7 @@ static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int
          :"r"(src1Stride), "r"(dstStride)
          :"memory");*/
  }
+#endif /* SKIP_FOR_3DNOW */
  
  static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  {
@@ -373,6 +375,7 @@ static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line
          :"%"REG_a, "memory");
  }
  
+#ifndef SKIP_FOR_3DNOW
  static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
  {
      __asm__ volatile(
@@ -547,6 +550,7 @@ static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *sr
          :"r"(src1Stride), "r"(dstStride)
          :"memory");*/
  }
+#endif /* SKIP_FOR_3DNOW */
  
  /* GL: this function does incorrect rounding if overflow */
  static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@@ -872,6 +876,7 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line
          :"%"REG_a,  "memory");
  }
  
+#ifndef SKIP_FOR_3DNOW
  static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  {
      do {
@@ -896,6 +901,7 @@ static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_siz
          h -= 4;
      } while(h > 0);
  }
+#endif /* SKIP_FOR_3DNOW */
  
  //FIXME the following could be optimized too ...
  static void DEF(put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
@@ -968,6 +974,7 @@ static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride,
      );\
  }
  
+#ifndef SKIP_FOR_3DNOW
  #define STORE_OP(a,b) PAVGB" "#a","#b" \n\t"
  QPEL_2TAP_L3(avg_)
  #undef STORE_OP
@@ -975,3 +982,4 @@ QPEL_2TAP_L3(avg_)
  QPEL_2TAP_L3(put_)
  #undef STORE_OP
  #undef QPEL_2TAP_L3
+#endif /* SKIP_FOR_3DNOW */
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c

index ec22ce7de5bedfa2d9bea4b611608c23a684fdec..ed6cff3e679df8032cd681233ab9a673452bca2b 100644 (file)
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -197,12 +197,14 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
  #define DEF(x) x ## _3dnow
  #define PAVGB "pavgusb"
  #define OP_AVG PAVGB
+#define SKIP_FOR_3DNOW
  
  #include "dsputil_avg_template.c"
  
  #undef DEF
  #undef PAVGB
  #undef OP_AVG
+#undef SKIP_FOR_3DNOW
  
  /***********************************/
  /* MMXEXT specific */
@@ -226,11 +228,6 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
  #define put_pixels4_mmxext put_pixels4_mmx
  #define put_no_rnd_pixels16_mmxext put_no_rnd_pixels16_mmx
  #define put_no_rnd_pixels8_mmxext put_no_rnd_pixels8_mmx
-#define put_pixels16_3dnow put_pixels16_mmx
-#define put_pixels8_3dnow put_pixels8_mmx
-#define put_pixels4_3dnow put_pixels4_mmx
-#define put_no_rnd_pixels16_3dnow put_no_rnd_pixels16_mmx
-#define put_no_rnd_pixels8_3dnow put_no_rnd_pixels8_mmx
  
  /***********************************/
  /* standard MMX */
@@ -923,7 +920,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
      "packuswb            %%mm5, %%mm5   \n\t"                             \
      OP(%%mm5, out, %%mm7, d)
  
-#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMXEXT, OP_3DNOW)              \
+#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMXEXT)                        \
  static void OPNAME ## mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst,         \
                                                      uint8_t *src,         \
                                                      int dstStride,        \
@@ -1051,73 +1048,6 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst,         \
          );                                                                \
  }                                                                         \
                                                                            \
-static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst,          \
-                                                   uint8_t *src,          \
-                                                   int dstStride,         \
-                                                   int srcStride,         \
-                                                   int h)                 \
-{                                                                         \
-    int i;                                                                \
-    int16_t temp[16];                                                     \
-    /* quick HACK, XXX FIXME MUST be optimized */                         \
-    for (i = 0; i < h; i++) {                                             \
-        temp[ 0] = (src[ 0] + src[ 1]) * 20 - (src[ 0] + src[ 2]) * 6 +   \
-                   (src[ 1] + src[ 3]) *  3 - (src[ 2] + src[ 4]);        \
-        temp[ 1] = (src[ 1] + src[ 2]) * 20 - (src[ 0] + src[ 3]) * 6 +   \
-                   (src[ 0] + src[ 4]) *  3 - (src[ 1] + src[ 5]);        \
-        temp[ 2] = (src[ 2] + src[ 3]) * 20 - (src[ 1] + src[ 4]) * 6 +   \
-                   (src[ 0] + src[ 5]) *  3 - (src[ 0] + src[ 6]);        \
-        temp[ 3] = (src[ 3] + src[ 4]) * 20 - (src[ 2] + src[ 5]) * 6 +   \
-                   (src[ 1] + src[ 6]) *  3 - (src[ 0] + src[ 7]);        \
-        temp[ 4] = (src[ 4] + src[ 5]) * 20 - (src[ 3] + src[ 6]) * 6 +   \
-                   (src[ 2] + src[ 7]) *  3 - (src[ 1] + src[ 8]);        \
-        temp[ 5] = (src[ 5] + src[ 6]) * 20 - (src[ 4] + src[ 7]) * 6 +   \
-                   (src[ 3] + src[ 8]) *  3 - (src[ 2] + src[ 9]);        \
-        temp[ 6] = (src[ 6] + src[ 7]) * 20 - (src[ 5] + src[ 8]) * 6 +   \
-                   (src[ 4] + src[ 9]) *  3 - (src[ 3] + src[10]);        \
-        temp[ 7] = (src[ 7] + src[ 8]) * 20 - (src[ 6] + src[ 9]) * 6 +   \
-                   (src[ 5] + src[10]) *  3 - (src[ 4] + src[11]);        \
-        temp[ 8] = (src[ 8] + src[ 9]) * 20 - (src[ 7] + src[10]) * 6 +   \
-                   (src[ 6] + src[11]) *  3 - (src[ 5] + src[12]);        \
-        temp[ 9] = (src[ 9] + src[10]) * 20 - (src[ 8] + src[11]) * 6 +   \
-                   (src[ 7] + src[12]) *  3 - (src[ 6] + src[13]);        \
-        temp[10] = (src[10] + src[11]) * 20 - (src[ 9] + src[12]) * 6 +   \
-                   (src[ 8] + src[13]) *  3 - (src[ 7] + src[14]);        \
-        temp[11] = (src[11] + src[12]) * 20 - (src[10] + src[13]) * 6 +   \
-                   (src[ 9] + src[14]) *  3 - (src[ 8] + src[15]);        \
-        temp[12] = (src[12] + src[13]) * 20 - (src[11] + src[14]) * 6 +   \
-                   (src[10] + src[15]) *  3 - (src[ 9] + src[16]);        \
-        temp[13] = (src[13] + src[14]) * 20 - (src[12] + src[15]) * 6 +   \
-                   (src[11] + src[16]) *  3 - (src[10] + src[16]);        \
-        temp[14] = (src[14] + src[15]) * 20 - (src[13] + src[16]) * 6 +   \
-                   (src[12] + src[16]) *  3 - (src[11] + src[15]);        \
-        temp[15] = (src[15] + src[16]) * 20 - (src[14] + src[16]) * 6 +   \
-                   (src[13] + src[15]) *  3 - (src[12] + src[14]);        \
-        __asm__ volatile (                                                \
-            "movq      (%0), %%mm0          \n\t"                         \
-            "movq     8(%0), %%mm1          \n\t"                         \
-            "paddw       %2, %%mm0          \n\t"                         \
-            "paddw       %2, %%mm1          \n\t"                         \
-            "psraw       $5, %%mm0          \n\t"                         \
-            "psraw       $5, %%mm1          \n\t"                         \
-            "packuswb %%mm1, %%mm0          \n\t"                         \
-            OP_3DNOW(%%mm0, (%1), %%mm1, q)                               \
-            "movq    16(%0), %%mm0          \n\t"                         \
-            "movq    24(%0), %%mm1          \n\t"                         \
-            "paddw       %2, %%mm0          \n\t"                         \
-            "paddw       %2, %%mm1          \n\t"                         \
-            "psraw       $5, %%mm0          \n\t"                         \
-            "psraw       $5, %%mm1          \n\t"                         \
-            "packuswb %%mm1, %%mm0          \n\t"                         \
-            OP_3DNOW(%%mm0, 8(%1), %%mm1, q)                              \
-            :: "r"(temp), "r"(dst), "m"(ROUNDER)                          \
-            : "memory"                                                    \
-            );                                                            \
-        dst += dstStride;                                                 \
-        src += srcStride;                                                 \
-    }                                                                     \
-}                                                                         \
-                                                                          \
  static void OPNAME ## mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst,          \
                                                     uint8_t *src,          \
                                                     int dstStride,         \
@@ -1186,49 +1116,6 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst,          \
            /* "m"(ff_pw_20), "m"(ff_pw_3), */ "m"(ROUNDER)                 \
          : "memory"                                                        \
          );                                                                \
-}                                                                         \
-                                                                          \
-static void OPNAME ## mpeg4_qpel8_h_lowpass_3dnow(uint8_t *dst,           \
-                                                  uint8_t *src,           \
-                                                  int dstStride,          \
-                                                  int srcStride,          \
-                                                  int h)                  \
-{                                                                         \
-    int i;                                                                \
-    int16_t temp[8];                                                      \
-    /* quick HACK, XXX FIXME MUST be optimized */                         \
-    for (i = 0; i < h; i++) {                                             \
-        temp[0] = (src[0] + src[1]) * 20 - (src[0] + src[2]) * 6 +        \
-                  (src[1] + src[3]) *  3 - (src[2] + src[4]);             \
-        temp[1] = (src[1] + src[2]) * 20 - (src[0] + src[3]) * 6 +        \
-                  (src[0] + src[4]) *  3 - (src[1] + src[5]);             \
-        temp[2] = (src[2] + src[3]) * 20 - (src[1] + src[4]) * 6 +        \
-                  (src[0] + src[5]) *  3 - (src[0] + src[6]);             \
-        temp[3] = (src[3] + src[4]) * 20 - (src[2] + src[5]) * 6 +        \
-                  (src[1] + src[6]) *  3 - (src[0] + src[7]);             \
-        temp[4] = (src[4] + src[5]) * 20 - (src[3] + src[6]) * 6 +        \
-                  (src[2] + src[7]) *  3 - (src[1] + src[8]);             \
-        temp[5] = (src[5] + src[6]) * 20 - (src[4] + src[7]) * 6 +        \
-                  (src[3] + src[8]) *  3 - (src[2] + src[8]);             \
-        temp[6] = (src[6] + src[7]) * 20 - (src[5] + src[8]) * 6 +        \
-                  (src[4] + src[8]) *  3 - (src[3] + src[7]);             \
-        temp[7] = (src[7] + src[8]) * 20 - (src[6] + src[8]) * 6 +        \
-                  (src[5] + src[7]) *  3 - (src[4] + src[6]);             \
-        __asm__ volatile (                                                \
-            "movq      (%0), %%mm0      \n\t"                             \
-            "movq     8(%0), %%mm1      \n\t"                             \
-            "paddw       %2, %%mm0      \n\t"                             \
-            "paddw       %2, %%mm1      \n\t"                             \
-            "psraw       $5, %%mm0      \n\t"                             \
-            "psraw       $5, %%mm1      \n\t"                             \
-            "packuswb %%mm1, %%mm0      \n\t"                             \
-            OP_3DNOW(%%mm0, (%1), %%mm1, q)                               \
-            :: "r"(temp), "r"(dst), "m"(ROUNDER)                          \
-            : "memory"                                                    \
-            );                                                            \
-        dst += dstStride;                                                 \
-        src += srcStride;                                                 \
-    }                                                                     \
  }
  
  #define QPEL_OP(OPNAME, ROUNDER, RND, OP, MMX)                          \
@@ -1739,22 +1626,14 @@ static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src,   \
  #define PUT_OP(a, b, temp, size)                \
      "mov"#size"        "#a", "#b"       \n\t"
  
-#define AVG_3DNOW_OP(a, b, temp, size)          \
-    "mov"#size"        "#b", "#temp"    \n\t"   \
-    "pavgusb        "#temp", "#a"       \n\t"   \
-    "mov"#size"        "#a", "#b"       \n\t"
-
  #define AVG_MMXEXT_OP(a, b, temp, size)         \
      "mov"#size"        "#b", "#temp"    \n\t"   \
      "pavgb          "#temp", "#a"       \n\t"   \
      "mov"#size"        "#a", "#b"       \n\t"
  
-QPEL_BASE(put_,        ff_pw_16, _,        PUT_OP,       PUT_OP)
-QPEL_BASE(avg_,        ff_pw_16, _,        AVG_MMXEXT_OP, AVG_3DNOW_OP)
-QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP,       PUT_OP)
-QPEL_OP(put_,          ff_pw_16, _,        PUT_OP,       3dnow)
-QPEL_OP(avg_,          ff_pw_16, _,        AVG_3DNOW_OP, 3dnow)
-QPEL_OP(put_no_rnd_,   ff_pw_15, _no_rnd_, PUT_OP,       3dnow)
+QPEL_BASE(put_,        ff_pw_16, _,        PUT_OP)
+QPEL_BASE(avg_,        ff_pw_16, _,        AVG_MMXEXT_OP)
+QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP)
  QPEL_OP(put_,          ff_pw_16, _,        PUT_OP,        mmxext)
  QPEL_OP(avg_,          ff_pw_16, _,        AVG_MMXEXT_OP, mmxext)
  QPEL_OP(put_no_rnd_,   ff_pw_15, _no_rnd_, PUT_OP,        mmxext)
@@ -1815,10 +1694,6 @@ QPEL_2TAP(put_, 16, mmxext)
  QPEL_2TAP(avg_, 16, mmxext)
  QPEL_2TAP(put_,  8, mmxext)
  QPEL_2TAP(avg_,  8, mmxext)
-QPEL_2TAP(put_, 16, 3dnow)
-QPEL_2TAP(avg_, 16, 3dnow)
-QPEL_2TAP(put_,  8, 3dnow)
-QPEL_2TAP(avg_,  8, 3dnow)
  
  void ff_put_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, int stride)
  {
@@ -2615,29 +2490,6 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
          c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
      }
  
-    if (CONFIG_H264QPEL) {
-        SET_QPEL_FUNCS(put_qpel,        0, 16, 3dnow, );
-        SET_QPEL_FUNCS(put_qpel,        1,  8, 3dnow, );
-        SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow, );
-        SET_QPEL_FUNCS(put_no_rnd_qpel, 1,  8, 3dnow, );
-        SET_QPEL_FUNCS(avg_qpel,        0, 16, 3dnow, );
-        SET_QPEL_FUNCS(avg_qpel,        1,  8, 3dnow, );
-
-        if (!high_bit_depth) {
-            SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow, );
-            SET_QPEL_FUNCS(put_h264_qpel, 1,  8, 3dnow, );
-            SET_QPEL_FUNCS(put_h264_qpel, 2,  4, 3dnow, );
-            SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow, );
-            SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, 3dnow, );
-            SET_QPEL_FUNCS(avg_h264_qpel, 2,  4, 3dnow, );
-        }
-
-        SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow, );
-        SET_QPEL_FUNCS(put_2tap_qpel, 1,  8, 3dnow, );
-        SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow, );
-        SET_QPEL_FUNCS(avg_2tap_qpel, 1,  8, 3dnow, );
-    }
-
      c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
  #endif /* HAVE_INLINE_ASM */
  
diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c

index cf944c139640f06a174c5e036591592953cbe8fb..075506f25bd25af72a3e72a5aa5c4bfec4241054 100644 (file)
--- a/libavcodec/x86/h264_qpel.c
+++ b/libavcodec/x86/h264_qpel.c
@@ -1163,9 +1163,6 @@ QPEL(put_, 16,XMM, 16)\
  QPEL(avg_, 8, XMM, 16)\
  QPEL(avg_, 16,XMM, 16)\
  
-#define PAVGB "pavgusb"
-QPEL_H264(put_,       PUT_OP, 3dnow)
-QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)
  #undef PAVGB
  #define PAVGB "pavgb"
  QPEL_H264(put_,        PUT_OP, mmxext)
@@ -1184,7 +1181,6 @@ QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3)
  #endif
  #undef PAVGB
  
-H264_MC_4816(3dnow)
  H264_MC_4816(mmxext)
  H264_MC_816(H264_MC_V, sse2)
  H264_MC_816(H264_MC_HV, sse2)
author	Daniel Kang <daniel.d.kang@gmail.com>
	Sat, 13 Oct 2012 15:04:49 +0000 (10:04 -0500)
committer	Diego Biurrun <diego@biurrun.de>
	Sun, 25 Nov 2012 19:32:55 +0000 (20:32 +0100)
libavcodec/x86/dsputil_avg_template.c		patch \| blob \| history
libavcodec/x86/dsputil_mmx.c		patch \| blob \| history
libavcodec/x86/h264_qpel.c		patch \| blob \| history