movd [%7+%9*2], m%4
%endmacro
-%macro SPLATB_REG 3-4
+%macro SPLATB_REG_MMX 2-3
movd %1, %2
-%ifidn %3, ssse3
- pshufb %1, %4
-%else
punpcklbw %1, %1
-%if mmsize == 16 ; sse2
- pshuflw %1, %1, 0x0
- punpcklqdq %1, %1
-%elifidn %3, mmx
punpcklwd %1, %1
punpckldq %1, %1
-%else ; mmxext
+%endmacro
+
+%macro SPLATB_REG_MMXEXT 2-3
+ movd %1, %2
+ punpcklbw %1, %1
pshufw %1, %1, 0x0
-%endif
-%endif
+%endmacro
+
+%macro SPLATB_REG_SSE2 2-3
+ movd %1, %2
+ punpcklbw %1, %1
+ pshuflw %1, %1, 0x0
+ punpcklqdq %1, %1
+%endmacro
+
+%macro SPLATB_REG_SSSE3 3
+ movd %1, %2
+ pshufb %1, %3
%endmacro
%macro SIMPLE_LOOPFILTER 3
%if mmsize == 8 ; mmx/mmxext
mov r3, 2
%endif
-%ifidn %1, ssse3
+%ifnidn %1, sse2 && mmsize == 16
pxor m0, m0
%endif
- SPLATB_REG m7, r2, %1, m0 ; splat "flim" into register
+ SPLATB_REG m7, r2, m0 ; splat "flim" into register
; set up indexes to address 4 rows
mov r2, r1
%endmacro
INIT_MMX
+%define SPLATB_REG SPLATB_REG_MMX
SIMPLE_LOOPFILTER mmx, v, 4
SIMPLE_LOOPFILTER mmx, h, 6
+%define SPLATB_REG SPLATB_REG_MMXEXT
SIMPLE_LOOPFILTER mmxext, v, 4
SIMPLE_LOOPFILTER mmxext, h, 6
INIT_XMM
+%define SPLATB_REG SPLATB_REG_SSE2
SIMPLE_LOOPFILTER sse2, v, 3
SIMPLE_LOOPFILTER sse2, h, 6
+%define SPLATB_REG SPLATB_REG_SSSE3
SIMPLE_LOOPFILTER ssse3, v, 3
SIMPLE_LOOPFILTER ssse3, h, 6
%define stack_reg hev_thr_reg
%endif
-%ifidn %1, ssse3
+%ifnidn %1, sse2 && mmsize == 16
pxor m7, m7
%endif
%ifndef m8 ; mmx/mmxext or sse2 on x86-32
; splat function arguments
- SPLATB_REG m0, E_reg, %1, m7 ; E
- SPLATB_REG m1, I_reg, %1, m7 ; I
- SPLATB_REG m2, hev_thr_reg, %1, m7 ; hev_thresh
+ SPLATB_REG m0, E_reg, m7 ; E
+ SPLATB_REG m1, I_reg, m7 ; I
+ SPLATB_REG m2, hev_thr_reg, m7 ; hev_thresh
; align stack
mov stack_reg, rsp ; backup stack pointer
%define q0backup m8
; splat function arguments
- SPLATB_REG flim_E, E_reg, %1, m7 ; E
- SPLATB_REG flim_I, I_reg, %1, m7 ; I
- SPLATB_REG hev_thr, hev_thr_reg, %1, m7 ; hev_thresh
+ SPLATB_REG flim_E, E_reg, m7 ; E
+ SPLATB_REG flim_I, I_reg, m7 ; I
+ SPLATB_REG hev_thr, hev_thr_reg, m7 ; hev_thresh
%endif
%if mmsize == 8 && %4 == 16 ; mmx/mmxext
%endmacro
INIT_MMX
+%define SPLATB_REG SPLATB_REG_MMX
INNER_LOOPFILTER mmx, v, 6, 16, 0
INNER_LOOPFILTER mmx, h, 6, 16, 0
-INNER_LOOPFILTER mmxext, v, 6, 16, 0
-INNER_LOOPFILTER mmxext, h, 6, 16, 0
-
INNER_LOOPFILTER mmx, v, 6, 8, 0
INNER_LOOPFILTER mmx, h, 6, 8, 0
+
+%define SPLATB_REG SPLATB_REG_MMXEXT
+INNER_LOOPFILTER mmxext, v, 6, 16, 0
+INNER_LOOPFILTER mmxext, h, 6, 16, 0
INNER_LOOPFILTER mmxext, v, 6, 8, 0
INNER_LOOPFILTER mmxext, h, 6, 8, 0
INIT_XMM
+%define SPLATB_REG SPLATB_REG_SSE2
INNER_LOOPFILTER sse2, v, 5, 16, 13
%ifdef m8
INNER_LOOPFILTER sse2, h, 5, 16, 13
INNER_LOOPFILTER sse2, v, 6, 8, 13
INNER_LOOPFILTER sse2, h, 6, 8, 13
+%define SPLATB_REG SPLATB_REG_SSSE3
INNER_LOOPFILTER ssse3, v, 5, 16, 13
%ifdef m8
INNER_LOOPFILTER ssse3, h, 5, 16, 13
%define stack_reg hev_thr_reg
%endif
-%ifidn %1, ssse3
+%ifnidn %1, sse2 && mmsize == 16
pxor m7, m7
%endif
%ifndef m8 ; mmx/mmxext or sse2 on x86-32
; splat function arguments
- SPLATB_REG m0, E_reg, %1, m7 ; E
- SPLATB_REG m1, I_reg, %1, m7 ; I
- SPLATB_REG m2, hev_thr_reg, %1, m7 ; hev_thresh
+ SPLATB_REG m0, E_reg, m7 ; E
+ SPLATB_REG m1, I_reg, m7 ; I
+ SPLATB_REG m2, hev_thr_reg, m7 ; hev_thresh
; align stack
mov stack_reg, rsp ; backup stack pointer
%define lim_sign m15
; splat function arguments
- SPLATB_REG flim_E, E_reg, %1, m7 ; E
- SPLATB_REG flim_I, I_reg, %1, m7 ; I
- SPLATB_REG hev_thr, hev_thr_reg, %1, m7 ; hev_thresh
+ SPLATB_REG flim_E, E_reg, m7 ; E
+ SPLATB_REG flim_I, I_reg, m7 ; I
+ SPLATB_REG hev_thr, hev_thr_reg, m7 ; hev_thresh
%endif
%if mmsize == 8 && %4 == 16 ; mmx/mmxext
%endmacro
INIT_MMX
+%define SPLATB_REG SPLATB_REG_MMX
MBEDGE_LOOPFILTER mmx, v, 6, 16, 0
MBEDGE_LOOPFILTER mmx, h, 6, 16, 0
-MBEDGE_LOOPFILTER mmxext, v, 6, 16, 0
-MBEDGE_LOOPFILTER mmxext, h, 6, 16, 0
-
MBEDGE_LOOPFILTER mmx, v, 6, 8, 0
MBEDGE_LOOPFILTER mmx, h, 6, 8, 0
+
+%define SPLATB_REG SPLATB_REG_MMXEXT
+MBEDGE_LOOPFILTER mmxext, v, 6, 16, 0
+MBEDGE_LOOPFILTER mmxext, h, 6, 16, 0
MBEDGE_LOOPFILTER mmxext, v, 6, 8, 0
MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0
INIT_XMM
+%define SPLATB_REG SPLATB_REG_SSE2
MBEDGE_LOOPFILTER sse2, v, 5, 16, 16
%ifdef m8
MBEDGE_LOOPFILTER sse2, h, 5, 16, 16
MBEDGE_LOOPFILTER sse2, v, 6, 8, 16
MBEDGE_LOOPFILTER sse2, h, 6, 8, 16
+%define SPLATB_REG SPLATB_REG_SSSE3
MBEDGE_LOOPFILTER ssse3, v, 5, 16, 16
%ifdef m8
MBEDGE_LOOPFILTER ssse3, h, 5, 16, 16