av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
{
- int i, j;
-
ff_check_alignment();
#if CONFIG_ENCODERS
c->shrink[2]= ff_shrink44;
c->shrink[3]= ff_shrink88;
- memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
- memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
-
#undef FUNC
#undef FUNCC
#define FUNC(f, depth) f ## _ ## depth
if (ARCH_SH4) ff_dsputil_init_sh4 (c, avctx);
if (ARCH_BFIN) ff_dsputil_init_bfin (c, avctx);
- for (i = 0; i < 4; i++) {
- for (j = 0; j < 16; j++) {
- if(!c->put_2tap_qpel_pixels_tab[i][j])
- c->put_2tap_qpel_pixels_tab[i][j] =
- c->put_h264_qpel_pixels_tab[i][j];
- if(!c->avg_2tap_qpel_pixels_tab[i][j])
- c->avg_2tap_qpel_pixels_tab[i][j] =
- c->avg_h264_qpel_pixels_tab[i][j];
- }
- }
-
ff_init_scantable_permutation(c->idct_permutation,
c->idct_permutation_type);
}
DEF(avg_pixels8_xy2)(block , pixels , line_size, h);
DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h);
}
-
-#define QPEL_2TAP_L3(OPNAME) \
-static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
- __asm__ volatile(\
- "1: \n\t"\
- "movq (%1,%2), %%mm0 \n\t"\
- "movq 8(%1,%2), %%mm1 \n\t"\
- PAVGB" (%1,%3), %%mm0 \n\t"\
- PAVGB" 8(%1,%3), %%mm1 \n\t"\
- PAVGB" (%1), %%mm0 \n\t"\
- PAVGB" 8(%1), %%mm1 \n\t"\
- STORE_OP( (%1,%4),%%mm0)\
- STORE_OP(8(%1,%4),%%mm1)\
- "movq %%mm0, (%1,%4) \n\t"\
- "movq %%mm1, 8(%1,%4) \n\t"\
- "add %5, %1 \n\t"\
- "decl %0 \n\t"\
- "jnz 1b \n\t"\
- :"+g"(h), "+r"(src)\
- :"r"((x86_reg)off1), "r"((x86_reg)off2),\
- "r"((x86_reg)(dst-src)), "r"((x86_reg)stride)\
- :"memory"\
- );\
-}\
-static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
- __asm__ volatile(\
- "1: \n\t"\
- "movq (%1,%2), %%mm0 \n\t"\
- PAVGB" (%1,%3), %%mm0 \n\t"\
- PAVGB" (%1), %%mm0 \n\t"\
- STORE_OP((%1,%4),%%mm0)\
- "movq %%mm0, (%1,%4) \n\t"\
- "add %5, %1 \n\t"\
- "decl %0 \n\t"\
- "jnz 1b \n\t"\
- :"+g"(h), "+r"(src)\
- :"r"((x86_reg)off1), "r"((x86_reg)off2),\
- "r"((x86_reg)(dst-src)), "r"((x86_reg)stride)\
- :"memory"\
- );\
-}
-
-#ifndef SKIP_FOR_3DNOW
-#define STORE_OP(a,b) PAVGB" "#a","#b" \n\t"
-QPEL_2TAP_L3(avg_)
-#undef STORE_OP
-#define STORE_OP(a,b)
-QPEL_2TAP_L3(put_)
-#undef STORE_OP
-#undef QPEL_2TAP_L3
-#endif /* SKIP_FOR_3DNOW */
QPEL_OP(avg_, ff_pw_16, _, AVG_MMXEXT_OP, mmxext)
QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmxext)
-/***********************************/
-/* bilinear qpel: not compliant to any spec, only for -lavdopts fast */
-
-#define QPEL_2TAP_XY(OPNAME, SIZE, MMX, XY, HPEL) \
-static void OPNAME ## 2tap_qpel ## SIZE ## _mc ## XY ## _ ## MMX(uint8_t *dst, \
- uint8_t *src, \
- int stride) \
-{ \
- OPNAME ## pixels ## SIZE ## HPEL(dst, src, stride, SIZE); \
-}
-
-#define QPEL_2TAP_L3(OPNAME, SIZE, MMX, XY, S0, S1, S2) \
-static void OPNAME ## 2tap_qpel ## SIZE ## _mc ## XY ## _ ## MMX(uint8_t *dst, \
- uint8_t *src, \
- int stride) \
-{ \
- OPNAME ## 2tap_qpel ## SIZE ## _l3_ ## MMX(dst, src + S0, stride, SIZE, \
- S1, S2); \
-}
-
-#define QPEL_2TAP(OPNAME, SIZE, MMX) \
-QPEL_2TAP_XY(OPNAME, SIZE, MMX, 20, _x2_ ## MMX) \
-QPEL_2TAP_XY(OPNAME, SIZE, MMX, 02, _y2_ ## MMX) \
-QPEL_2TAP_XY(OPNAME, SIZE, MMX, 22, _xy2_mmx) \
-static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc00_ ## MMX = \
- OPNAME ## qpel ## SIZE ## _mc00_ ## MMX; \
-static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc21_ ## MMX = \
- OPNAME ## 2tap_qpel ## SIZE ## _mc20_ ## MMX; \
-static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc12_ ## MMX = \
- OPNAME ## 2tap_qpel ## SIZE ## _mc02_ ## MMX; \
-static void OPNAME ## 2tap_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, \
- uint8_t *src, \
- int stride) \
-{ \
- OPNAME ## pixels ## SIZE ## _y2_ ## MMX(dst, src + 1, stride, SIZE); \
-} \
-static void OPNAME ## 2tap_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, \
- uint8_t *src, \
- int stride) \
-{ \
- OPNAME ## pixels ## SIZE ## _x2_ ## MMX(dst, src + stride, \
- stride, SIZE); \
-} \
-QPEL_2TAP_L3(OPNAME, SIZE, MMX, 10, 0, 1, 0) \
-QPEL_2TAP_L3(OPNAME, SIZE, MMX, 30, 1, -1, 0) \
-QPEL_2TAP_L3(OPNAME, SIZE, MMX, 01, 0, stride, 0) \
-QPEL_2TAP_L3(OPNAME, SIZE, MMX, 03, stride, -stride, 0) \
-QPEL_2TAP_L3(OPNAME, SIZE, MMX, 11, 0, stride, 1) \
-QPEL_2TAP_L3(OPNAME, SIZE, MMX, 31, 1, stride, -1) \
-QPEL_2TAP_L3(OPNAME, SIZE, MMX, 13, stride, -stride, 1) \
-QPEL_2TAP_L3(OPNAME, SIZE, MMX, 33, stride + 1, -stride, -1) \
-
-QPEL_2TAP(put_, 16, mmxext)
-QPEL_2TAP(avg_, 16, mmxext)
-QPEL_2TAP(put_, 8, mmxext)
-QPEL_2TAP(avg_, 8, mmxext)
-
void ff_put_rv40_qpel8_mc33_mmx(uint8_t *dst, uint8_t *src, int stride)
{
put_pixels8_xy2_mmx(dst, src, stride, 8);
#if HAVE_INLINE_ASM
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
- SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmxext, );
- SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmxext, );
SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, );
SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, );
- SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmxext, );
- SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmxext, );
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, );
SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, );