From 32ff6432284f713e9f837ee5b36fc8e9f1902836 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Mon, 21 Jan 2013 18:02:30 -0800 Subject: [PATCH] dsputil: remove avg_no_rnd_pixels8. This is never used. --- libavcodec/alpha/dsputil_alpha.c | 5 - libavcodec/arm/dsputil_init_neon.c | 7 - libavcodec/arm/dsputil_neon.S | 6 +- libavcodec/dsputil.c | 1 - libavcodec/dsputil.h | 8 +- libavcodec/sh4/dsputil_align.c | 9 - libavcodec/sparc/dsputil_vis.c | 461 ---------------------------------- libavcodec/x86/dsputil_mmx.c | 3 +- libavcodec/x86/dsputil_rnd_template.c | 4 + 9 files changed, 13 insertions(+), 491 deletions(-) diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c index 4770fa7..5e9196d 100644 --- a/libavcodec/alpha/dsputil_alpha.c +++ b/libavcodec/alpha/dsputil_alpha.c @@ -308,11 +308,6 @@ void ff_dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) c->avg_pixels_tab[1][2] = avg_pixels_y2_axp; c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp; - c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels_axp; - c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels_x2_axp; - c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels_y2_axp; - c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels_xy2_axp; - c->clear_blocks = clear_blocks_axp; } diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index 2b2dd0b..dee55e6 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -58,9 +58,6 @@ void ff_avg_pixels8_xy2_neon(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); void ff_avg_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); -void ff_avg_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); -void ff_avg_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); -void ff_avg_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int); void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int); void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); @@ -203,10 +200,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) c->avg_no_rnd_pixels_tab[0][1] = ff_avg_pixels16_x2_no_rnd_neon; c->avg_no_rnd_pixels_tab[0][2] = ff_avg_pixels16_y2_no_rnd_neon; c->avg_no_rnd_pixels_tab[0][3] = ff_avg_pixels16_xy2_no_rnd_neon; - c->avg_no_rnd_pixels_tab[1][0] = ff_avg_pixels8_neon; - c->avg_no_rnd_pixels_tab[1][1] = ff_avg_pixels8_x2_no_rnd_neon; - c->avg_no_rnd_pixels_tab[1][2] = ff_avg_pixels8_y2_no_rnd_neon; - c->avg_no_rnd_pixels_tab[1][3] = ff_avg_pixels8_xy2_no_rnd_neon; } c->add_pixels_clamped = ff_add_pixels_clamped_neon; diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index cf92817..f33fa33 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -421,9 +421,9 @@ function ff_avg_h264_qpel8_mc00_neon, export=1 endfunc pixfunc avg_, pixels8, avg=1 - pixfunc2 avg_, pixels8_x2, avg=1 - pixfunc2 avg_, pixels8_y2, avg=1 - pixfunc2 avg_, pixels8_xy2, avg=1 + pixfunc avg_, pixels8_x2, avg=1 + pixfunc avg_, pixels8_y2, avg=1 + pixfunc avg_, pixels8_xy2, avg=1 function ff_put_pixels_clamped_neon, export=1 vld1.16 {d16-d19}, [r0,:128]! diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 82da8dd..b78e172 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -2745,7 +2745,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) dspfunc1(avg , 2, 4, depth);\ dspfunc1(avg , 3, 2, depth);\ dspfunc1(avg_no_rnd, 0, 16, depth);\ - dspfunc1(avg_no_rnd, 1, 8, depth);\ \ dspfunc2(put_h264_qpel, 0, 16, depth);\ dspfunc2(put_h264_qpel, 1, 8, depth);\ diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 1e0366b..e069ea1 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -280,15 +280,15 @@ typedef struct DSPContext { /** * Halfpel motion compensation with no rounding (a+b)>>1. - * this is an array[2][4] of motion compensation functions for 2 - * horizontal blocksizes (8,16) and the 4 halfpel positions
- * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ] + * this is an array[4] of motion compensation functions for 1 + * horizontal blocksize (16) and the 4 halfpel positions
+ * *pixels_tab[0][ xhalfpel + 2*yhalfpel ] * @param block destination into which the result is averaged (a+b)>>1 * @param pixels source * @param line_size number of bytes in a horizontal line of block * @param h height */ - op_pixels_func avg_no_rnd_pixels_tab[4][4]; + op_pixels_func avg_no_rnd_pixels_tab[1][4]; /** * Thirdpel motion compensation with rounding (a+b+1)>>1. diff --git a/libavcodec/sh4/dsputil_align.c b/libavcodec/sh4/dsputil_align.c index 333e563..a37c9ba 100644 --- a/libavcodec/sh4/dsputil_align.c +++ b/libavcodec/sh4/dsputil_align.c @@ -294,11 +294,8 @@ DEFFUNC(put,no_rnd,xy,16,OP_XY,PACK) DEFFUNC(avg, rnd,o,8,OP_C,avg32) DEFFUNC(avg, rnd,x,8,OP_X,avg32) -DEFFUNC(avg,no_rnd,x,8,OP_X,avg32) DEFFUNC(avg, rnd,y,8,OP_Y,avg32) -DEFFUNC(avg,no_rnd,y,8,OP_Y,avg32) DEFFUNC(avg, rnd,xy,8,OP_XY,PACK) -DEFFUNC(avg,no_rnd,xy,8,OP_XY,PACK) DEFFUNC(avg, rnd,o,16,OP_C,avg32) DEFFUNC(avg, rnd,x,16,OP_X,avg32) DEFFUNC(avg,no_rnd,x,16,OP_X,avg32) @@ -311,7 +308,6 @@ DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK) #define put_no_rnd_pixels8_o put_rnd_pixels8_o #define put_no_rnd_pixels16_o put_rnd_pixels16_o -#define avg_no_rnd_pixels8_o avg_rnd_pixels8_o #define avg_no_rnd_pixels16_o avg_rnd_pixels16_o #define put_pixels8_c put_rnd_pixels8_o @@ -320,7 +316,6 @@ DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK) #define avg_pixels16_c avg_rnd_pixels16_o #define put_no_rnd_pixels8_c put_rnd_pixels8_o #define put_no_rnd_pixels16_c put_rnd_pixels16_o -#define avg_no_rnd_pixels8_c avg_rnd_pixels8_o #define avg_no_rnd_pixels16_c avg_rnd_pixels16_o #define QPEL @@ -367,10 +362,6 @@ void ff_dsputil_init_align(DSPContext* c, AVCodecContext *avctx) c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x; c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y; c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy; - c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_o; - c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x; - c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y; - c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy; } #ifdef QPEL diff --git a/libavcodec/sparc/dsputil_vis.c b/libavcodec/sparc/dsputil_vis.c index 5bac83e..db2a6ae 100644 --- a/libavcodec/sparc/dsputil_vis.c +++ b/libavcodec/sparc/dsputil_vis.c @@ -2239,98 +2239,6 @@ static void MC_avg_no_round_o_16_vis (uint8_t * dest, const uint8_t * ref, vis_st64_2(TMP22, dest, 8); } -static void MC_avg_no_round_o_8_vis (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - ref = vis_alignaddr(ref); - - vis_ld64(ref[0], TMP0); - - vis_ld64(ref[8], TMP2); - - vis_ld64(dest[0], DST_0); - - vis_ld64(constants_fe[0], MASK_fe); - - vis_ld64(constants_7f[0], MASK_7f); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64(constants128[0], CONST_128); - - ref += stride; - height = (height >> 1) - 1; - - do { /* 12 cycles */ - vis_ld64(ref[0], TMP0); - vis_xor(DST_0, REF_0, TMP4); - - vis_ld64(ref[8], TMP2); - vis_and(TMP4, MASK_fe, TMP4); - - vis_and(DST_0, REF_0, TMP6); - vis_ld64_2(dest, stride, DST_0); - ref += stride; - vis_mul8x16(CONST_128, TMP4, TMP4); - - vis_ld64(ref[0], TMP12); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64(ref[8], TMP2); - vis_xor(DST_0, REF_0, TMP0); - ref += stride; - - vis_and(TMP0, MASK_fe, TMP0); - - vis_and(TMP4, MASK_7f, TMP4); - - vis_padd16(TMP6, TMP4, TMP4); - vis_st64(TMP4, dest[0]); - dest += stride; - vis_mul8x16(CONST_128, TMP0, TMP0); - - vis_and(DST_0, REF_0, TMP6); - vis_ld64_2(dest, stride, DST_0); - - vis_faligndata(TMP12, TMP2, REF_0); - - vis_and(TMP0, MASK_7f, TMP0); - - vis_padd16(TMP6, TMP0, TMP4); - vis_st64(TMP4, dest[0]); - dest += stride; - } while (--height); - - vis_ld64(ref[0], TMP0); - vis_xor(DST_0, REF_0, TMP4); - - vis_ld64(ref[8], TMP2); - vis_and(TMP4, MASK_fe, TMP4); - - vis_and(DST_0, REF_0, TMP6); - vis_ld64_2(dest, stride, DST_0); - vis_mul8x16(CONST_128, TMP4, TMP4); - - vis_faligndata(TMP0, TMP2, REF_0); - - vis_xor(DST_0, REF_0, TMP0); - - vis_and(TMP0, MASK_fe, TMP0); - - vis_and(TMP4, MASK_7f, TMP4); - - vis_padd16(TMP6, TMP4, TMP4); - vis_st64(TMP4, dest[0]); - dest += stride; - vis_mul8x16(CONST_128, TMP0, TMP0); - - vis_and(DST_0, REF_0, TMP6); - - vis_and(TMP0, MASK_7f, TMP0); - - vis_padd16(TMP6, TMP0, TMP4); - vis_st64(TMP4, dest[0]); -} - static void MC_put_no_round_x_16_vis (uint8_t * dest, const uint8_t * ref, const int stride, int height) { @@ -2736,171 +2644,6 @@ static void MC_avg_no_round_x_16_vis (uint8_t * dest, const uint8_t * ref, } while (--height); } -static void MC_avg_no_round_x_8_vis (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - int stride_times_2 = stride << 1; - - vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); - - vis_ld64(constants3[0], CONST_3); - vis_fzero(ZERO); - vis_ld64(constants256_512[0], CONST_256); - - ref = vis_alignaddr(ref); - height >>= 2; - do { /* 47 cycles */ - vis_ld64(ref[0], TMP0); - - vis_ld64_2(ref, 8, TMP2); - ref += stride; - - vis_alignaddr_g0((void *)off); - - vis_ld64(ref[0], TMP4); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, 8, TMP6); - ref += stride; - - vis_ld64(ref[0], TMP8); - - vis_ld64_2(ref, 8, TMP10); - ref += stride; - vis_faligndata(TMP4, TMP6, REF_4); - - vis_ld64(ref[0], TMP12); - - vis_ld64_2(ref, 8, TMP14); - ref += stride; - vis_faligndata(TMP8, TMP10, REF_S0); - - vis_faligndata(TMP12, TMP14, REF_S4); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - - vis_ld64(dest[0], DST_0); - vis_faligndata(TMP0, TMP2, REF_2); - - vis_ld64_2(dest, stride, DST_2); - vis_faligndata(TMP4, TMP6, REF_6); - - vis_faligndata(TMP8, TMP10, REF_S2); - - vis_faligndata(TMP12, TMP14, REF_S6); - } else { - vis_ld64(dest[0], DST_0); - vis_src1(TMP2, REF_2); - - vis_ld64_2(dest, stride, DST_2); - vis_src1(TMP6, REF_6); - - vis_src1(TMP10, REF_S2); - - vis_src1(TMP14, REF_S6); - } - - vis_pmerge(ZERO, REF_0, TMP0); - vis_mul8x16au(REF_0_1, CONST_256, TMP2); - - vis_pmerge(ZERO, REF_2, TMP4); - vis_mul8x16au(REF_2_1, CONST_256, TMP6); - - vis_padd16(TMP0, CONST_3, TMP0); - vis_mul8x16al(DST_0, CONST_512, TMP16); - - vis_padd16(TMP2, CONST_3, TMP2); - vis_mul8x16al(DST_1, CONST_512, TMP18); - - vis_padd16(TMP0, TMP4, TMP0); - vis_mul8x16au(REF_4, CONST_256, TMP8); - - vis_padd16(TMP2, TMP6, TMP2); - vis_mul8x16au(REF_4_1, CONST_256, TMP10); - - vis_padd16(TMP0, TMP16, TMP0); - vis_mul8x16au(REF_6, CONST_256, TMP12); - - vis_padd16(TMP2, TMP18, TMP2); - vis_mul8x16au(REF_6_1, CONST_256, TMP14); - - vis_padd16(TMP8, CONST_3, TMP8); - vis_mul8x16al(DST_2, CONST_512, TMP16); - - vis_padd16(TMP8, TMP12, TMP8); - vis_mul8x16al(DST_3, CONST_512, TMP18); - - vis_padd16(TMP10, TMP14, TMP10); - vis_pack16(TMP0, DST_0); - - vis_pack16(TMP2, DST_1); - vis_st64(DST_0, dest[0]); - dest += stride; - vis_padd16(TMP10, CONST_3, TMP10); - - vis_ld64_2(dest, stride, DST_0); - vis_padd16(TMP8, TMP16, TMP8); - - vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/); - vis_padd16(TMP10, TMP18, TMP10); - vis_pack16(TMP8, DST_2); - - vis_pack16(TMP10, DST_3); - vis_st64(DST_2, dest[0]); - dest += stride; - - vis_mul8x16au(REF_S0_1, CONST_256, TMP2); - vis_pmerge(ZERO, REF_S0, TMP0); - - vis_pmerge(ZERO, REF_S2, TMP24); - vis_mul8x16au(REF_S2_1, CONST_256, TMP6); - - vis_padd16(TMP0, CONST_3, TMP0); - vis_mul8x16au(REF_S4, CONST_256, TMP8); - - vis_padd16(TMP2, CONST_3, TMP2); - vis_mul8x16au(REF_S4_1, CONST_256, TMP10); - - vis_padd16(TMP0, TMP24, TMP0); - vis_mul8x16au(REF_S6, CONST_256, TMP12); - - vis_padd16(TMP2, TMP6, TMP2); - vis_mul8x16au(REF_S6_1, CONST_256, TMP14); - - vis_padd16(TMP8, CONST_3, TMP8); - vis_mul8x16al(DST_0, CONST_512, TMP16); - - vis_padd16(TMP10, CONST_3, TMP10); - vis_mul8x16al(DST_1, CONST_512, TMP18); - - vis_padd16(TMP8, TMP12, TMP8); - vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20); - - vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22); - vis_padd16(TMP0, TMP16, TMP0); - - vis_padd16(TMP2, TMP18, TMP2); - vis_pack16(TMP0, DST_0); - - vis_padd16(TMP10, TMP14, TMP10); - vis_pack16(TMP2, DST_1); - vis_st64(DST_0, dest[0]); - dest += stride; - - vis_padd16(TMP8, TMP20, TMP8); - - vis_padd16(TMP10, TMP22, TMP10); - vis_pack16(TMP8, DST_2); - - vis_pack16(TMP10, DST_3); - vis_st64(DST_2, dest[0]); - dest += stride; - } while (--height); -} - static void MC_put_no_round_y_16_vis (uint8_t * dest, const uint8_t * ref, const int stride, int height) { @@ -3273,85 +3016,6 @@ static void MC_avg_no_round_y_16_vis (uint8_t * dest, const uint8_t * ref, } while (--height); } -static void MC_avg_no_round_y_8_vis (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - int stride_8 = stride + 8; - - vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT); - - ref = vis_alignaddr(ref); - - vis_ld64(ref[ 0], TMP0); - vis_fzero(ZERO); - - vis_ld64(ref[ 8], TMP2); - - vis_ld64(constants3[0], CONST_3); - vis_faligndata(TMP0, TMP2, REF_2); - - vis_ld64(constants256_512[0], CONST_256); - - height >>= 1; - do { /* 20 cycles */ - vis_ld64_2(ref, stride, TMP0); - vis_pmerge(ZERO, REF_2, TMP8); - vis_mul8x16au(REF_2_1, CONST_256, TMP10); - - vis_ld64_2(ref, stride_8, TMP2); - ref += stride; - - vis_ld64(dest[0], DST_0); - - vis_ld64_2(dest, stride, DST_2); - vis_faligndata(TMP0, TMP2, REF_0); - - vis_ld64_2(ref, stride, TMP4); - vis_mul8x16al(DST_0, CONST_512, TMP16); - vis_pmerge(ZERO, REF_0, TMP12); - - vis_ld64_2(ref, stride_8, TMP6); - ref += stride; - vis_mul8x16al(DST_1, CONST_512, TMP18); - vis_pmerge(ZERO, REF_0_1, TMP14); - - vis_padd16(TMP12, CONST_3, TMP12); - vis_mul8x16al(DST_2, CONST_512, TMP24); - - vis_padd16(TMP14, CONST_3, TMP14); - vis_mul8x16al(DST_3, CONST_512, TMP26); - - vis_faligndata(TMP4, TMP6, REF_2); - - vis_padd16(TMP8, TMP12, TMP8); - - vis_padd16(TMP10, TMP14, TMP10); - vis_mul8x16au(REF_2, CONST_256, TMP20); - - vis_padd16(TMP8, TMP16, TMP0); - vis_mul8x16au(REF_2_1, CONST_256, TMP22); - - vis_padd16(TMP10, TMP18, TMP2); - vis_pack16(TMP0, DST_0); - - vis_pack16(TMP2, DST_1); - vis_st64(DST_0, dest[0]); - dest += stride; - vis_padd16(TMP12, TMP20, TMP12); - - vis_padd16(TMP14, TMP22, TMP14); - - vis_padd16(TMP12, TMP24, TMP0); - - vis_padd16(TMP14, TMP26, TMP2); - vis_pack16(TMP0, DST_2); - - vis_pack16(TMP2, DST_3); - vis_st64(DST_2, dest[0]); - dest += stride; - } while (--height); -} - static void MC_put_no_round_xy_16_vis (uint8_t * dest, const uint8_t * ref, const int stride, int height) { @@ -3816,126 +3480,6 @@ static void MC_avg_no_round_xy_16_vis (uint8_t * dest, const uint8_t * ref, } while (--height); } -static void MC_avg_no_round_xy_8_vis (uint8_t * dest, const uint8_t * ref, - const int stride, int height) -{ - unsigned long off = (unsigned long) ref & 0x7; - unsigned long off_plus_1 = off + 1; - int stride_8 = stride + 8; - - vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT); - - ref = vis_alignaddr(ref); - - vis_ld64(ref[0], TMP0); - vis_fzero(ZERO); - - vis_ld64_2(ref, 8, TMP2); - - vis_ld64(constants6[0], CONST_6); - - vis_ld64(constants256_1024[0], CONST_256); - vis_faligndata(TMP0, TMP2, REF_S0); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_S2); - } else { - vis_src1(TMP2, REF_S2); - } - - height >>= 1; - do { /* 31 cycles */ - vis_ld64_2(ref, stride, TMP0); - vis_mul8x16au(REF_S0, CONST_256, TMP8); - vis_pmerge(ZERO, REF_S0_1, TMP10); - - vis_ld64_2(ref, stride_8, TMP2); - ref += stride; - vis_mul8x16au(REF_S2, CONST_256, TMP12); - vis_pmerge(ZERO, REF_S2_1, TMP14); - - vis_alignaddr_g0((void *)off); - - vis_ld64_2(ref, stride, TMP4); - vis_faligndata(TMP0, TMP2, REF_S4); - - vis_ld64_2(ref, stride_8, TMP6); - ref += stride; - - vis_ld64(dest[0], DST_0); - vis_faligndata(TMP4, TMP6, REF_S0); - - vis_ld64_2(dest, stride, DST_2); - - if (off != 0x7) { - vis_alignaddr_g0((void *)off_plus_1); - vis_faligndata(TMP0, TMP2, REF_S6); - vis_faligndata(TMP4, TMP6, REF_S2); - } else { - vis_src1(TMP2, REF_S6); - vis_src1(TMP6, REF_S2); - } - - vis_mul8x16al(DST_0, CONST_1024, TMP30); - vis_pmerge(ZERO, REF_S4, TMP22); - - vis_mul8x16al(DST_1, CONST_1024, TMP32); - vis_pmerge(ZERO, REF_S4_1, TMP24); - - vis_mul8x16au(REF_S6, CONST_256, TMP26); - vis_pmerge(ZERO, REF_S6_1, TMP28); - - vis_mul8x16au(REF_S0, CONST_256, REF_S4); - vis_padd16(TMP22, CONST_6, TMP22); - - vis_mul8x16au(REF_S0_1, CONST_256, REF_S6); - vis_padd16(TMP24, CONST_6, TMP24); - - vis_mul8x16al(DST_2, CONST_1024, REF_0); - vis_padd16(TMP22, TMP26, TMP22); - - vis_mul8x16al(DST_3, CONST_1024, REF_2); - vis_padd16(TMP24, TMP28, TMP24); - - vis_mul8x16au(REF_S2, CONST_256, TMP26); - vis_padd16(TMP8, TMP22, TMP8); - - vis_mul8x16au(REF_S2_1, CONST_256, TMP28); - vis_padd16(TMP10, TMP24, TMP10); - - vis_padd16(TMP8, TMP12, TMP8); - - vis_padd16(TMP10, TMP14, TMP10); - - vis_padd16(TMP8, TMP30, TMP8); - - vis_padd16(TMP10, TMP32, TMP10); - vis_pack16(TMP8, DST_0); - - vis_pack16(TMP10, DST_1); - vis_st64(DST_0, dest[0]); - dest += stride; - - vis_padd16(REF_S4, TMP22, TMP12); - - vis_padd16(REF_S6, TMP24, TMP14); - - vis_padd16(TMP12, TMP26, TMP12); - - vis_padd16(TMP14, TMP28, TMP14); - - vis_padd16(TMP12, REF_0, TMP12); - - vis_padd16(TMP14, REF_2, TMP14); - vis_pack16(TMP12, DST_2); - - vis_pack16(TMP14, DST_3); - vis_st64(DST_2, dest[0]); - dest += stride; - } while (--height); -} - /* End of no rounding code */ #define ACCEL_SPARC_VIS 1 @@ -4000,11 +3544,6 @@ void ff_dsputil_init_vis(DSPContext* c, AVCodecContext *avctx) c->avg_no_rnd_pixels_tab[0][1] = MC_avg_no_round_x_16_vis; c->avg_no_rnd_pixels_tab[0][2] = MC_avg_no_round_y_16_vis; c->avg_no_rnd_pixels_tab[0][3] = MC_avg_no_round_xy_16_vis; - - c->avg_no_rnd_pixels_tab[1][0] = MC_avg_no_round_o_8_vis; - c->avg_no_rnd_pixels_tab[1][1] = MC_avg_no_round_x_8_vis; - c->avg_no_rnd_pixels_tab[1][2] = MC_avg_no_round_y_8_vis; - c->avg_no_rnd_pixels_tab[1][3] = MC_avg_no_round_xy_8_vis; } } } diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 8866813..e468621 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -160,6 +160,7 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; /***********************************/ /* MMX no rounding */ +#define NO_RND 1 #define DEF(x, y) x ## _no_rnd_ ## y ## _mmx #define SET_RND MOVQ_WONE #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f) @@ -172,6 +173,7 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; #undef SET_RND #undef PAVGBP #undef PAVGB +#undef NO_RND /***********************************/ /* MMX rounding */ @@ -1917,7 +1919,6 @@ static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags) SET_HPEL_FUNCS(put, 1, 8, mmx); SET_HPEL_FUNCS(put_no_rnd, 1, 8, mmx); SET_HPEL_FUNCS(avg, 1, 8, mmx); - SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx); switch (avctx->idct_algo) { case FF_IDCT_AUTO: diff --git a/libavcodec/x86/dsputil_rnd_template.c b/libavcodec/x86/dsputil_rnd_template.c index 34a2c0b..7fcc6b7 100644 --- a/libavcodec/x86/dsputil_rnd_template.c +++ b/libavcodec/x86/dsputil_rnd_template.c @@ -317,6 +317,7 @@ static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, i while (--h); } +#ifndef NO_RND // in case more speed is needed - unroling would certainly help static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { @@ -336,6 +337,7 @@ static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_si } while (--h); } +#endif // NO_RND static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { @@ -360,6 +362,7 @@ static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_s while (--h); } +#ifndef NO_RND static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { MOVQ_BFE(mm6); @@ -379,6 +382,7 @@ static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line block += line_size; } while (--h); } +#endif // NO_RND static av_unused void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { -- 2.7.4