From 270a85d259e44a560d7da70679906fa977f96e97 Mon Sep 17 00:00:00 2001 From: Jason Garrett-Glaser Date: Mon, 28 Jun 2010 23:35:17 +0000 Subject: [PATCH] Fix some intra pred MMX functions that used MMXEXT instructions Also add predict_4x4_dc MMXEXT function for vp8/h264. Originally committed as revision 23873 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/x86/h264_intrapred.asm | 48 ++++++++++++++++++++++----------------- libavcodec/x86/h264dsp_mmx.c | 6 ++--- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index 4f0a43f..0210aa0 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -138,12 +138,7 @@ cglobal pred16x16_dc_%1, 2,7 add r5d, r6d lea r2d, [r2+r5+16] shr r2d, 5 -%ifidn %1, mmx - movd m0, r2d - punpcklbw m0, m0 - punpcklwd m0, m0 - punpckldq m0, m0 -%elifidn %1, mmxext +%ifidn %1, mmxext movd m0, r2d punpcklbw m0, m0 pshufw m0, m0, 0 @@ -185,7 +180,6 @@ cglobal pred16x16_dc_%1, 2,7 %endmacro INIT_MMX -PRED16x16_DC mmx, movq PRED16x16_DC mmxext, movq INIT_XMM PRED16x16_DC sse, movaps @@ -337,8 +331,7 @@ PRED8x8_H ssse3 ; void pred8x8_dc_rv40(uint8_t *src, int stride) ;----------------------------------------------------------------------------- -%macro PRED8x8_DC 1 -cglobal pred8x8_dc_rv40_%1, 2,7 +cglobal pred8x8_dc_rv40_mmxext, 2,7 mov r4, r0 sub r0, r1 pxor mm0, mm0 @@ -358,16 +351,9 @@ cglobal pred8x8_dc_rv40_%1, 2,7 add r5d, r6d lea r2d, [r2+r5+8] shr r2d, 4 -%ifidn %1, mmx - movd mm0, r2d - punpcklbw mm0, mm0 - punpcklwd mm0, mm0 - punpckldq mm0, mm0 -%else movd mm0, r2d punpcklbw mm0, mm0 pshufw mm0, mm0, 0 -%endif mov r3d, 4 .loop: movq [r4+r1*0], mm0 @@ -376,11 +362,6 @@ cglobal pred8x8_dc_rv40_%1, 2,7 dec r3d jg .loop REP_RET -%endmacro - - -PRED8x8_DC mmx -PRED8x8_DC mmxext ;----------------------------------------------------------------------------- ; void pred8x8_tm_vp8(uint8_t *src, int stride) @@ -484,3 +465,28 @@ cglobal pred8x8_tm_vp8_ssse3, 2,3,6 dec r2d jg .loop REP_RET + +cglobal pred4x4_dc_mmxext, 3,5 + pxor mm7, mm7 + mov r4, r0 + sub r0, r2 + movd mm0, [r0] + psadbw mm0, mm7 + movzx r1d, byte [r0+r2*1-1] + movd r3d, mm0 + add r3d, r1d + movzx r1d, byte [r0+r2*2-1] + lea r0, [r0+r2*2] + add r3d, r1d + movzx r1d, byte [r0+r2*1-1] + add r3d, r1d + movzx r1d, byte [r0+r2*2-1] + add r3d, r1d + add r3d, 4 + shr r3d, 3 + imul r3d, 0x01010101 + mov [r4+r2*0], r3d + mov [r0+r2*0], r3d + mov [r0+r2*1], r3d + mov [r0+r2*2], r3d + RET diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c index 3ff100b..118dd7b 100644 --- a/libavcodec/x86/h264dsp_mmx.c +++ b/libavcodec/x86/h264dsp_mmx.c @@ -2328,7 +2328,6 @@ void ff_pred16x16_vertical_sse (uint8_t *src, int stride); void ff_pred16x16_horizontal_mmx (uint8_t *src, int stride); void ff_pred16x16_horizontal_mmxext(uint8_t *src, int stride); void ff_pred16x16_horizontal_ssse3 (uint8_t *src, int stride); -void ff_pred16x16_dc_mmx (uint8_t *src, int stride); void ff_pred16x16_dc_mmxext (uint8_t *src, int stride); void ff_pred16x16_dc_sse (uint8_t *src, int stride); void ff_pred16x16_dc_sse2 (uint8_t *src, int stride); @@ -2336,7 +2335,6 @@ void ff_pred16x16_dc_ssse3 (uint8_t *src, int stride); void ff_pred16x16_tm_vp8_mmx (uint8_t *src, int stride); void ff_pred16x16_tm_vp8_mmxext (uint8_t *src, int stride); void ff_pred16x16_tm_vp8_sse2 (uint8_t *src, int stride); -void ff_pred8x8_dc_rv40_mmx (uint8_t *src, int stride); void ff_pred8x8_dc_rv40_mmxext (uint8_t *src, int stride); void ff_pred8x8_vertical_mmx (uint8_t *src, int stride); void ff_pred8x8_horizontal_mmx (uint8_t *src, int stride); @@ -2346,6 +2344,7 @@ void ff_pred8x8_tm_vp8_mmx (uint8_t *src, int stride); void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride); void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride); void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride); +void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride); #if CONFIG_H264DSP void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) @@ -2354,12 +2353,10 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) if (mm_flags & FF_MM_MMX) { h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_mmx; h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx; - h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmx; h->pred8x8 [VERT_PRED8x8] = ff_pred8x8_vertical_mmx; h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx; if (codec_id == CODEC_ID_VP8) { h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmx; - h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmx; h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmx; } } @@ -2368,6 +2365,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext; h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext; h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext; + h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext; if (codec_id == CODEC_ID_VP8) { h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext; h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext; -- 2.7.4