From 783df5f347f79643c07c5f6244b496a33e1354a0 Mon Sep 17 00:00:00 2001 From: Michael Niedermayer Date: Fri, 2 Jan 2004 22:29:08 +0000 Subject: [PATCH] denoise_dct_mmx() Originally committed as revision 2657 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/i386/mpegvideo_mmx.c | 56 ++++++++++++++++++++++++++++++++ libavcodec/i386/mpegvideo_mmx_template.c | 2 +- libavcodec/mpegvideo.c | 8 +++-- libavcodec/mpegvideo.h | 1 + 4 files changed, 63 insertions(+), 4 deletions(-) diff --git a/libavcodec/i386/mpegvideo_mmx.c b/libavcodec/i386/mpegvideo_mmx.c index 2299d2b..b6f249e 100644 --- a/libavcodec/i386/mpegvideo_mmx.c +++ b/libavcodec/i386/mpegvideo_mmx.c @@ -563,6 +563,60 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) } } +static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){ + const int intra= s->mb_intra; + int *sum= s->dct_error_sum[intra]; + uint16_t *offset= s->dct_offset[intra]; + + s->dct_count[intra]++; + + asm volatile( + "pxor %%mm7, %%mm7 \n\t" + "1: \n\t" + "pxor %%mm0, %%mm0 \n\t" + "pxor %%mm1, %%mm1 \n\t" + "movq (%0), %%mm2 \n\t" + "movq 8(%0), %%mm3 \n\t" + "pcmpgtw %%mm2, %%mm0 \n\t" + "pcmpgtw %%mm3, %%mm1 \n\t" + "pxor %%mm0, %%mm2 \n\t" + "pxor %%mm1, %%mm3 \n\t" + "psubw %%mm0, %%mm2 \n\t" + "psubw %%mm1, %%mm3 \n\t" + "movq %%mm2, %%mm4 \n\t" + "movq %%mm3, %%mm5 \n\t" + "psubusw (%2), %%mm2 \n\t" + "psubusw 8(%2), %%mm3 \n\t" + "pxor %%mm0, %%mm2 \n\t" + "pxor %%mm1, %%mm3 \n\t" + "psubw %%mm0, %%mm2 \n\t" + "psubw %%mm1, %%mm3 \n\t" + "movq %%mm2, (%0) \n\t" + "movq %%mm3, 8(%0) \n\t" + "movq %%mm4, %%mm2 \n\t" + "movq %%mm5, %%mm3 \n\t" + "punpcklwd %%mm7, %%mm4 \n\t" + "punpckhwd %%mm7, %%mm2 \n\t" + "punpcklwd %%mm7, %%mm5 \n\t" + "punpckhwd %%mm7, %%mm3 \n\t" + "paddd (%1), %%mm4 \n\t" + "paddd 8(%1), %%mm2 \n\t" + "paddd 16(%1), %%mm5 \n\t" + "paddd 24(%1), %%mm3 \n\t" + "movq %%mm4, (%1) \n\t" + "movq %%mm2, 8(%1) \n\t" + "movq %%mm5, 16(%1) \n\t" + "movq %%mm3, 24(%1) \n\t" + "addl $16, %0 \n\t" + "addl $32, %1 \n\t" + "addl $16, %2 \n\t" + "cmpl %3, %0 \n\t" + " jb 1b \n\t" + : "+r" (block), "+r" (sum), "+r" (offset) + : "r"(block+64) + ); +} + #undef HAVE_MMX2 #define RENAME(a) a ## _MMX #define RENAMEl(a) a ## _mmx @@ -588,6 +642,8 @@ void MPV_common_init_mmx(MpegEncContext *s) s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx; draw_edges = draw_edges_mmx; + + s->denoise_dct= denoise_dct_mmx; if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ if(mm_flags & MM_MMXEXT){ diff --git a/libavcodec/i386/mpegvideo_mmx_template.c b/libavcodec/i386/mpegvideo_mmx_template.c index f357e14..d4ed61e 100644 --- a/libavcodec/i386/mpegvideo_mmx_template.c +++ b/libavcodec/i386/mpegvideo_mmx_template.c @@ -46,7 +46,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, RENAMEl(ff_fdct) (block); //cant be anything else ... if(s->dct_error_sum) - ff_denoise_dct(s, block); + s->denoise_dct(s, block); if (s->mb_intra) { int dummy; diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index 8c4938c..01e0d99 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -57,6 +57,7 @@ static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w); static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow); static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow); static int sse_mb(MpegEncContext *s); +static void denoise_dct_c(MpegEncContext *s, DCTELEM *block); #endif //CONFIG_ENCODERS #ifdef HAVE_XVMC @@ -219,6 +220,7 @@ int DCT_common_init(MpegEncContext *s) #ifdef CONFIG_ENCODERS s->dct_quantize= dct_quantize_c; + s->denoise_dct= denoise_dct_c; #endif #ifdef HAVE_MMX @@ -4611,7 +4613,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) #endif //CONFIG_ENCODERS -void ff_denoise_dct(MpegEncContext *s, DCTELEM *block){ +static void denoise_dct_c(MpegEncContext *s, DCTELEM *block){ const int intra= s->mb_intra; int i; @@ -4666,7 +4668,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s, s->dsp.fdct (block); if(s->dct_error_sum) - ff_denoise_dct(s, block); + s->denoise_dct(s, block); qmul= qscale*16; qadd= ((qscale-1)|1)*8; @@ -4939,7 +4941,7 @@ static int dct_quantize_c(MpegEncContext *s, s->dsp.fdct (block); if(s->dct_error_sum) - ff_denoise_dct(s, block); + s->denoise_dct(s, block); if (s->mb_intra) { if (!s->h263_aic) { diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index 4bd99e8..1b94ae5 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -681,6 +681,7 @@ typedef struct MpegEncContext { DCTELEM *block/*align 16*/, int n, int qscale); int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow); int (*fast_dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow); + void (*denoise_dct)(struct MpegEncContext *s, DCTELEM *block); } MpegEncContext; -- 2.7.4