packuswb m0, m1
%endmacro
-INIT_MMX
-cglobal h264_weight_16_mmxext, 6, 6, 0
+INIT_MMX mmxext
+cglobal h264_weight_16, 6, 6, 0
WEIGHT_SETUP
.nextrow:
WEIGHT_OP 0, 4
jnz .nextrow
REP_RET
-%macro WEIGHT_FUNC_MM 3
-cglobal h264_weight_%1_%3, 6, 6, %2
+%macro WEIGHT_FUNC_MM 2
+cglobal h264_weight_%1, 6, 6, %2
WEIGHT_SETUP
.nextrow:
WEIGHT_OP 0, mmsize/2
REP_RET
%endmacro
-INIT_MMX
-WEIGHT_FUNC_MM 8, 0, mmxext
-INIT_XMM
-WEIGHT_FUNC_MM 16, 8, sse2
+INIT_MMX mmxext
+WEIGHT_FUNC_MM 8, 0
+INIT_XMM sse2
+WEIGHT_FUNC_MM 16, 8
-%macro WEIGHT_FUNC_HALF_MM 3
-cglobal h264_weight_%1_%3, 6, 6, %2
+%macro WEIGHT_FUNC_HALF_MM 2
+cglobal h264_weight_%1, 6, 6, %2
WEIGHT_SETUP
sar r2d, 1
lea r3, [r1*2]
REP_RET
%endmacro
-INIT_MMX
-WEIGHT_FUNC_HALF_MM 4, 0, mmxext
-INIT_XMM
-WEIGHT_FUNC_HALF_MM 8, 8, sse2
+INIT_MMX mmxext
+WEIGHT_FUNC_HALF_MM 4, 0
+INIT_XMM sse2
+WEIGHT_FUNC_HALF_MM 8, 8
%macro BIWEIGHT_SETUP 0
%if ARCH_X86_64
add off_regd, 1
or off_regd, 1
add r4, 1
+%if cpuflag(ssse3)
+ movd m4, r5d
+ movd m0, r6d
+%else
movd m3, r5d
movd m4, r6d
+%endif
movd m5, off_regd
movd m6, r4d
pslld m5, m6
psrld m5, 1
+%if cpuflag(ssse3)
+ punpcklbw m4, m0
+ pshuflw m4, m4, 0
+ pshuflw m5, m5, 0
+ punpcklqdq m4, m4
+ punpcklqdq m5, m5
+
+%else
%if mmsize == 16
pshuflw m3, m3, 0
pshuflw m4, m4, 0
pshufw m5, m5, 0
%endif
pxor m7, m7
+%endif
%endmacro
%macro BIWEIGHT_STEPA 3
packuswb m0, m1
%endmacro
-INIT_MMX
-cglobal h264_biweight_16_mmxext, 7, 8, 0
+INIT_MMX mmxext
+cglobal h264_biweight_16, 7, 8, 0
BIWEIGHT_SETUP
movifnidn r3d, r3m
.nextrow:
jnz .nextrow
REP_RET
-%macro BIWEIGHT_FUNC_MM 3
-cglobal h264_biweight_%1_%3, 7, 8, %2
+%macro BIWEIGHT_FUNC_MM 2
+cglobal h264_biweight_%1, 7, 8, %2
BIWEIGHT_SETUP
movifnidn r3d, r3m
.nextrow:
REP_RET
%endmacro
-INIT_MMX
-BIWEIGHT_FUNC_MM 8, 0, mmxext
-INIT_XMM
-BIWEIGHT_FUNC_MM 16, 8, sse2
+INIT_MMX mmxext
+BIWEIGHT_FUNC_MM 8, 0
+INIT_XMM sse2
+BIWEIGHT_FUNC_MM 16, 8
-%macro BIWEIGHT_FUNC_HALF_MM 3
-cglobal h264_biweight_%1_%3, 7, 8, %2
+%macro BIWEIGHT_FUNC_HALF_MM 2
+cglobal h264_biweight_%1, 7, 8, %2
BIWEIGHT_SETUP
movifnidn r3d, r3m
sar r3, 1
REP_RET
%endmacro
-INIT_MMX
-BIWEIGHT_FUNC_HALF_MM 4, 0, mmxext
-INIT_XMM
-BIWEIGHT_FUNC_HALF_MM 8, 8, sse2
-
-%macro BIWEIGHT_SSSE3_SETUP 0
-%if ARCH_X86_64
-%define off_regd r7d
-%else
-%define off_regd r3d
-%endif
- mov off_regd, r7m
- add off_regd, 1
- or off_regd, 1
- add r4, 1
- movd m4, r5d
- movd m0, r6d
- movd m5, off_regd
- movd m6, r4d
- pslld m5, m6
- psrld m5, 1
- punpcklbw m4, m0
- pshuflw m4, m4, 0
- pshuflw m5, m5, 0
- punpcklqdq m4, m4
- punpcklqdq m5, m5
-%endmacro
+INIT_MMX mmxext
+BIWEIGHT_FUNC_HALF_MM 4, 0
+INIT_XMM sse2
+BIWEIGHT_FUNC_HALF_MM 8, 8
%macro BIWEIGHT_SSSE3_OP 0
pmaddubsw m0, m4
packuswb m0, m2
%endmacro
-INIT_XMM
-cglobal h264_biweight_16_ssse3, 7, 8, 8
- BIWEIGHT_SSSE3_SETUP
+INIT_XMM ssse3
+cglobal h264_biweight_16, 7, 8, 8
+ BIWEIGHT_SETUP
movifnidn r3d, r3m
.nextrow:
jnz .nextrow
REP_RET
-INIT_XMM
-cglobal h264_biweight_8_ssse3, 7, 8, 8
- BIWEIGHT_SSSE3_SETUP
+INIT_XMM ssse3
+cglobal h264_biweight_8, 7, 8, 8
+ BIWEIGHT_SETUP
movifnidn r3d, r3m
sar r3, 1
lea r4, [r2*2]