From 3bb345c9313ad8f6a6c24abd7d5eaa11413bbe22 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Fri, 12 Mar 2021 14:34:32 +0100 Subject: [PATCH] i386: Hopefully last set of -mavx512vl -mno-avx512bw fixes [PR99321] This is the final patch of the series started with https://gcc.gnu.org/pipermail/gcc-patches/2021-March/566139.html and continued with https://gcc.gnu.org/pipermail/gcc-patches/2021-March/566356.html This time, I went through all the remaining instructions marked by gas as requiring both AVX512BW and AVX512VL and for each checked tmp-mddump.md, figure out if it ever could be a problem (e.g. instructions that require AVX512BW+AVX512VL, but didn't exist before AVX512F are usually fine, the patterns have the right conditions, the bugs are typically on pre-AVX512F patterns where we have just blindly added v while they actually can't access those unless AVX512BW+AVX512VL), added test where possible (the test doesn't cover MMX though)and fixed md bugs. For mmx pextr[bw]/pinsr[bw] patterns it introduces per discussions a new YW constraint that only requires AVX512BW and not AVX512VL, because those instructions only require the former and not latter when using EVEX encoding. There are some other interesting details, e.g. most of the 8 interleave patterns (vpunck[hl]{bw,wd}) had correctly && && in the conditions because for masking it needs to be always EVEX encoded and then it needs both VL+BW, but 2 of those 8 had just && and so again would run into the -mavx512vl -mno-avx512bw problems. Another problem different from others was mmx eq/gt comparisons, that was using Yv constraints, so would happily accept %xmm16+ registers for -mavx512vl, but there actually are no such EVEX encoded instructions, as AVX512 comparisons work with %k* registers instead. The newly added testcase without the patch fails with: /tmp/ccVROLo2.s: Assembler messages: /tmp/ccVROLo2.s:9: Error: unsupported instruction `vpabsb' /tmp/ccVROLo2.s:20: Error: unsupported instruction `vpabsb' /tmp/ccVROLo2.s:31: Error: unsupported instruction `vpabsw' /tmp/ccVROLo2.s:42: Error: unsupported instruction `vpabsw' /tmp/ccVROLo2.s:53: Error: unsupported instruction `vpaddsb' /tmp/ccVROLo2.s:64: Error: unsupported instruction `vpaddsb' /tmp/ccVROLo2.s:75: Error: unsupported instruction `vpaddsw' /tmp/ccVROLo2.s:86: Error: unsupported instruction `vpaddsw' /tmp/ccVROLo2.s:97: Error: unsupported instruction `vpsubsb' /tmp/ccVROLo2.s:108: Error: unsupported instruction `vpsubsb' /tmp/ccVROLo2.s:119: Error: unsupported instruction `vpsubsw' /tmp/ccVROLo2.s:130: Error: unsupported instruction `vpsubsw' /tmp/ccVROLo2.s:141: Error: unsupported instruction `vpaddusb' /tmp/ccVROLo2.s:152: Error: unsupported instruction `vpaddusb' /tmp/ccVROLo2.s:163: Error: unsupported instruction `vpaddusw' /tmp/ccVROLo2.s:174: Error: unsupported instruction `vpaddusw' /tmp/ccVROLo2.s:185: Error: unsupported instruction `vpsubusb' /tmp/ccVROLo2.s:196: Error: unsupported instruction `vpsubusb' /tmp/ccVROLo2.s:207: Error: unsupported instruction `vpsubusw' /tmp/ccVROLo2.s:218: Error: unsupported instruction `vpsubusw' /tmp/ccVROLo2.s:258: Error: unsupported instruction `vpaddusw' /tmp/ccVROLo2.s:269: Error: unsupported instruction `vpavgb' /tmp/ccVROLo2.s:280: Error: unsupported instruction `vpavgb' /tmp/ccVROLo2.s:291: Error: unsupported instruction `vpavgw' /tmp/ccVROLo2.s:302: Error: unsupported instruction `vpavgw' /tmp/ccVROLo2.s:475: Error: unsupported instruction `vpmovsxbw' /tmp/ccVROLo2.s:486: Error: unsupported instruction `vpmovsxbw' /tmp/ccVROLo2.s:497: Error: unsupported instruction `vpmovzxbw' /tmp/ccVROLo2.s:508: Error: unsupported instruction `vpmovzxbw' /tmp/ccVROLo2.s:548: Error: unsupported instruction `vpmulhuw' /tmp/ccVROLo2.s:559: Error: unsupported instruction `vpmulhuw' /tmp/ccVROLo2.s:570: Error: unsupported instruction `vpmulhw' /tmp/ccVROLo2.s:581: Error: unsupported instruction `vpmulhw' /tmp/ccVROLo2.s:592: Error: unsupported instruction `vpsadbw' /tmp/ccVROLo2.s:603: Error: unsupported instruction `vpsadbw' /tmp/ccVROLo2.s:643: Error: unsupported instruction `vpshufhw' /tmp/ccVROLo2.s:654: Error: unsupported instruction `vpshufhw' /tmp/ccVROLo2.s:665: Error: unsupported instruction `vpshuflw' /tmp/ccVROLo2.s:676: Error: unsupported instruction `vpshuflw' /tmp/ccVROLo2.s:687: Error: unsupported instruction `vpslldq' /tmp/ccVROLo2.s:698: Error: unsupported instruction `vpslldq' /tmp/ccVROLo2.s:709: Error: unsupported instruction `vpsrldq' /tmp/ccVROLo2.s:720: Error: unsupported instruction `vpsrldq' /tmp/ccVROLo2.s:899: Error: unsupported instruction `vpunpckhbw' /tmp/ccVROLo2.s:910: Error: unsupported instruction `vpunpckhbw' /tmp/ccVROLo2.s:921: Error: unsupported instruction `vpunpckhwd' /tmp/ccVROLo2.s:932: Error: unsupported instruction `vpunpckhwd' /tmp/ccVROLo2.s:943: Error: unsupported instruction `vpunpcklbw' /tmp/ccVROLo2.s:954: Error: unsupported instruction `vpunpcklbw' /tmp/ccVROLo2.s:965: Error: unsupported instruction `vpunpcklwd' /tmp/ccVROLo2.s:976: Error: unsupported instruction `vpunpcklwd' 2021-03-12 Jakub Jelinek PR target/99321 * config/i386/constraints.md (YW): New internal constraint. * config/i386/sse.md (v_Yw): Add V4TI, V2TI, V1TI and TI cases. (*_3, *_uavg3, *abs2, *mul3_highpart): Use instead of v in constraints. (_psadbw): Use YW instead of v in constraints. (*avx2_pmaddwd, *sse2_pmaddwd, *v8hi3, *v16qi3, avx2_pmaddubsw256, ssse3_pmaddubsw128): Merge last two alternatives into one, use Yw instead of former x,v. (ashr3, 3): Use instead of x in constraints of the last alternative. (_packsswb, _packssdw, _packuswb, _packusdw, *_pmulhrsw3, _palignr, _pshufb3): Merge last two alternatives into one, use instead of former x,v. (avx2_interleave_highv32qi, vec_interleave_highv16qi): Use Yw instead of v in constraints. Add && to condition. (avx2_interleave_lowv32qi, vec_interleave_lowv16qi, avx2_interleave_highv16hi, vec_interleave_highv8hi, avx2_interleave_lowv16hi, vec_interleave_lowv8hi, avx2_pshuflw_1, sse2_pshuflw_1, avx2_pshufhw_1, sse2_pshufhw_1, avx2_v16qiv16hi2, sse4_1_v8qiv8hi2, *sse4_1_v8qiv8hi2_1, _3): Use Yw instead of v in constraints. * config/i386/mmx.md (Yv_Yw): New define_mode_attr. (*mmx_3, mmx_ashr3, mmx_3): Use instead of Yv in constraints. (*mmx_3, *mmx_mulv4hi3, *mmx_smulv4hi3_highpart, *mmx_umulv4hi3_highpart, *mmx_pmaddwd, *mmx_v4hi3, *mmx_v8qi3, mmx_packswb, mmx_packssdw, mmx_punpckhbw, mmx_punpcklbw, mmx_punpckhwd, mmx_punpcklwd, *mmx_uavgv8qi3, *mmx_uavgv4hi3, mmx_psadbw): Use Yw instead of Yv in constraints. (*mmx_pinsrw, *mmx_pinsrb, *mmx_pextrw, *mmx_pextrw_zext, *mmx_pextrb, *mmx_pextrb_zext): Use YW instead of Yv in constraints. (*mmx_eq3, mmx_gt3): Use x instead of Yv in constraints. (mmx_andnot3, *mmx_3): Split last alternative into two, one with just x, another isa avx512vl with v. * gcc.target/i386/avx512vl-pr99321-2.c: New test. --- gcc/config/i386/constraints.md | 6 + gcc/config/i386/mmx.md | 179 +++++++------ gcc/config/i386/sse.md | 292 ++++++++++----------- gcc/testsuite/gcc.target/i386/avx512vl-pr99321-2.c | 94 +++++++ 4 files changed, 332 insertions(+), 239 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-pr99321-2.c diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index a8db33e..eaa582d 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -111,6 +111,8 @@ ;; otherwise any SSE register ;; w any EVEX encodable SSE register for AVX512BW with TARGET_AVX512VL ;; target, otherwise any SSE register. +;; W any EVEX encodable SSE register for AVX512BW target, +;; otherwise any SSE register. (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS" "First SSE register (@code{%xmm0}).") @@ -151,6 +153,10 @@ "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS" "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target, otherwise any SSE register.") +(define_register_constraint "YW" + "TARGET_AVX512BW ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS" + "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW target, otherwise any SSE register.") + ;; We use the B prefix to denote any number of internal operands: ;; f FLAGS_REG ;; g GOT memory operand. diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index c6a2882..4c2b724 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -61,6 +61,9 @@ (define_mode_attr mmxdoublemode [(V8QI "V8HI") (V4HI "V4SI")]) +(define_mode_attr Yv_Yw + [(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")]) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; Move patterns @@ -1152,10 +1155,10 @@ "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_insn "*mmx_3" - [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv") + [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,") (plusminus:MMXMODEI8 - (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "0,0,Yv") - (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,Yv")))] + (match_operand:MMXMODEI8 1 "register_mmxmem_operand" "0,0,") + (match_operand:MMXMODEI8 2 "register_mmxmem_operand" "ym,x,")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (, mode, operands)" "@ @@ -1176,10 +1179,10 @@ "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_insn "*mmx_3" - [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yv") + [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yw") (sat_plusminus:MMXMODE12 - (match_operand:MMXMODE12 1 "register_mmxmem_operand" "0,0,Yv") - (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yv")))] + (match_operand:MMXMODE12 1 "register_mmxmem_operand" "0,0,Yw") + (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yw")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (, mode, operands)" "@ @@ -1206,9 +1209,9 @@ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") (define_insn "*mmx_mulv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") - (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv") - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))] + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw") + (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw") + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (MULT, V4HImode, operands)" "@ @@ -1234,14 +1237,14 @@ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") (define_insn "*mmx_smulv4hi3_highpart" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw") (truncate:V4HI (lshiftrt:V4SI (mult:V4SI (sign_extend:V4SI - (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")) + (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")) (sign_extend:V4SI - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))) + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))) (const_int 16))))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (MULT, V4HImode, operands)" @@ -1269,14 +1272,14 @@ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") (define_insn "*mmx_umulv4hi3_highpart" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw") (truncate:V4HI (lshiftrt:V4SI (mult:V4SI (zero_extend:V4SI - (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")) + (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")) (zero_extend:V4SI - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))) + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))) (const_int 16))))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A) @@ -1313,16 +1316,16 @@ "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);") (define_insn "*mmx_pmaddwd" - [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yw") (plus:V2SI (mult:V2SI (sign_extend:V2SI (vec_select:V2HI - (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv") + (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw") (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2SI (vec_select:V2HI - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv") + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw") (parallel [(const_int 0) (const_int 2)])))) (mult:V2SI (sign_extend:V2SI @@ -1432,10 +1435,10 @@ "ix86_fixup_binary_operands_no_copy (, V4HImode, operands);") (define_insn "*mmx_v4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw") (smaxmin:V4HI - (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv") - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))] + (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw") + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A) && ix86_binary_operator_ok (, V4HImode, operands)" @@ -1466,10 +1469,10 @@ "ix86_fixup_binary_operands_no_copy (, V8QImode, operands);") (define_insn "*mmx_v8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw") (umaxmin:V8QI - (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv") - (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")))] + (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yw") + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A) && ix86_binary_operator_ok (, V8QImode, operands)" @@ -1483,10 +1486,10 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn "mmx_ashr3" - [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,Yv") + [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,") (ashiftrt:MMXMODE24 - (match_operand:MMXMODE24 1 "register_operand" "0,0,Yv") - (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))] + (match_operand:MMXMODE24 1 "register_operand" "0,0,") + (match_operand:DI 2 "nonmemory_operand" "yN,xN,N")))] "TARGET_MMX || TARGET_MMX_WITH_SSE" "@ psra\t{%2, %0|%0, %2} @@ -1509,10 +1512,10 @@ "TARGET_MMX_WITH_SSE") (define_insn "mmx_3" - [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,Yv") + [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,") (any_lshift:MMXMODE248 - (match_operand:MMXMODE248 1 "register_operand" "0,0,Yv") - (match_operand:DI 2 "nonmemory_operand" "yN,xN,YvN")))] + (match_operand:MMXMODE248 1 "register_operand" "0,0,") + (match_operand:DI 2 "nonmemory_operand" "yN,xN,N")))] "TARGET_MMX || TARGET_MMX_WITH_SSE" "@ p\t{%2, %0|%0, %2} @@ -1549,10 +1552,10 @@ "ix86_fixup_binary_operands_no_copy (EQ, mode, operands);") (define_insn "*mmx_eq3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x") (eq:MMXMODEI - (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv") - (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))] + (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,x") + (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (EQ, mode, operands)" "@ @@ -1565,10 +1568,10 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn "mmx_gt3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x") (gt:MMXMODEI - (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv") - (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))] + (match_operand:MMXMODEI 1 "register_operand" "0,0,x") + (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")))] "TARGET_MMX || TARGET_MMX_WITH_SSE" "@ pcmpgt\t{%2, %0|%0, %2} @@ -1594,19 +1597,20 @@ "operands[2] = force_reg (mode, CONSTM1_RTX (mode));") (define_insn "mmx_andnot3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v") (and:MMXMODEI - (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv")) - (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))] + (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,x,v")) + (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x,v")))] "TARGET_MMX || TARGET_MMX_WITH_SSE" "@ pandn\t{%2, %0|%0, %2} pandn\t{%2, %0|%0, %2} - vpandn\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "*,sse2_noavx,avx") - (set_attr "mmx_isa" "native,*,*") - (set_attr "type" "mmxadd,sselog,sselog") - (set_attr "mode" "DI,TI,TI")]) + vpandn\t{%2, %1, %0|%0, %1, %2} + vpandnd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,sse2_noavx,avx,avx512vl") + (set_attr "mmx_isa" "native,*,*,*") + (set_attr "type" "mmxadd,sselog,sselog,sselog") + (set_attr "mode" "DI,TI,TI,TI")]) (define_expand "mmx_3" [(set (match_operand:MMXMODEI 0 "register_operand") @@ -1625,20 +1629,21 @@ "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_insn "*mmx_3" - [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v") (any_logic:MMXMODEI - (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,Yv") - (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,Yv")))] + (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,x,v") + (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x,v")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && ix86_binary_operator_ok (, mode, operands)" "@ p\t{%2, %0|%0, %2} p\t{%2, %0|%0, %2} - vp\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "*,sse2_noavx,avx") - (set_attr "mmx_isa" "native,*,*") - (set_attr "type" "mmxadd,sselog,sselog") - (set_attr "mode" "DI,TI,TI")]) + vp\t{%2, %1, %0|%0, %1, %2} + vpd\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "*,sse2_noavx,avx,avx512vl") + (set_attr "mmx_isa" "native,*,*,*") + (set_attr "type" "mmxadd,sselog,sselog,sselog") + (set_attr "mode" "DI,TI,TI,TI")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; @@ -1652,12 +1657,12 @@ (define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")]) (define_insn_and_split "mmx_packswb" - [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw") (vec_concat:V8QI (any_s_truncate:V4QI - (match_operand:V4HI 1 "register_operand" "0,0,Yv")) + (match_operand:V4HI 1 "register_operand" "0,0,Yw")) (any_s_truncate:V4QI - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))))] + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))))] "TARGET_MMX || TARGET_MMX_WITH_SSE" "@ packswb\t{%2, %0|%0, %2} @@ -1672,12 +1677,12 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn_and_split "mmx_packssdw" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw") (vec_concat:V4HI (ss_truncate:V2HI - (match_operand:V2SI 1 "register_operand" "0,0,Yv")) + (match_operand:V2SI 1 "register_operand" "0,0,Yw")) (ss_truncate:V2HI - (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))))] + (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yw"))))] "TARGET_MMX || TARGET_MMX_WITH_SSE" "@ packssdw\t{%2, %0|%0, %2} @@ -1692,11 +1697,11 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn_and_split "mmx_punpckhbw" - [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw") (vec_select:V8QI (vec_concat:V16QI - (match_operand:V8QI 1 "register_operand" "0,0,Yv") - (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")) + (match_operand:V8QI 1 "register_operand" "0,0,Yw") + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")) (parallel [(const_int 4) (const_int 12) (const_int 5) (const_int 13) (const_int 6) (const_int 14) @@ -1715,11 +1720,11 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn_and_split "mmx_punpcklbw" - [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw") (vec_select:V8QI (vec_concat:V16QI - (match_operand:V8QI 1 "register_operand" "0,0,Yv") - (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")) + (match_operand:V8QI 1 "register_operand" "0,0,Yw") + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")) (parallel [(const_int 0) (const_int 8) (const_int 1) (const_int 9) (const_int 2) (const_int 10) @@ -1738,11 +1743,11 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn_and_split "mmx_punpckhwd" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw") (vec_select:V4HI (vec_concat:V8HI - (match_operand:V4HI 1 "register_operand" "0,0,Yv") - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")) + (match_operand:V4HI 1 "register_operand" "0,0,Yw") + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")) (parallel [(const_int 2) (const_int 6) (const_int 3) (const_int 7)])))] "TARGET_MMX || TARGET_MMX_WITH_SSE" @@ -1759,11 +1764,11 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn_and_split "mmx_punpcklwd" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw") (vec_select:V4HI (vec_concat:V8HI - (match_operand:V4HI 1 "register_operand" "0,0,Yv") - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")) + (match_operand:V4HI 1 "register_operand" "0,0,Yw") + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")) (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))] "TARGET_MMX || TARGET_MMX_WITH_SSE" @@ -1866,11 +1871,11 @@ }) (define_insn "*mmx_pinsrw" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,YW") (vec_merge:V4HI (vec_duplicate:V4HI (match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm")) - (match_operand:V4HI 1 "register_operand" "0,0,Yv") + (match_operand:V4HI 1 "register_operand" "0,0,YW") (match_operand:SI 3 "const_int_operand")))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A) @@ -1902,11 +1907,11 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn "*mmx_pinsrb" - [(set (match_operand:V8QI 0 "register_operand" "=x,Yv") + [(set (match_operand:V8QI 0 "register_operand" "=x,YW") (vec_merge:V8QI (vec_duplicate:V8QI (match_operand:QI 2 "nonimmediate_operand" "rm,rm")) - (match_operand:V8QI 1 "register_operand" "0,Yv") + (match_operand:V8QI 1 "register_operand" "0,YW") (match_operand:SI 3 "const_int_operand")))] "TARGET_MMX_WITH_SSE && TARGET_SSE4_1 && ((unsigned) exact_log2 (INTVAL (operands[3])) @@ -1940,7 +1945,7 @@ (define_insn "*mmx_pextrw" [(set (match_operand:HI 0 "register_sse4nonimm_operand" "=r,r,m") (vec_select:HI - (match_operand:V4HI 1 "register_operand" "y,Yv,Yv") + (match_operand:V4HI 1 "register_operand" "y,YW,YW") (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n")])))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A)" @@ -1959,7 +1964,7 @@ [(set (match_operand:SWI48 0 "register_operand" "=r,r") (zero_extend:SWI48 (vec_select:HI - (match_operand:V4HI 1 "register_operand" "y,Yv") + (match_operand:V4HI 1 "register_operand" "y,YW") (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]))))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A)" @@ -1976,7 +1981,7 @@ (define_insn "*mmx_pextrb" [(set (match_operand:QI 0 "nonimmediate_operand" "=r,m") (vec_select:QI - (match_operand:V8QI 1 "register_operand" "Yv,Yv") + (match_operand:V8QI 1 "register_operand" "YW,YW") (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n,n")])))] "TARGET_MMX_WITH_SSE && TARGET_SSE4_1" "@ @@ -1993,7 +1998,7 @@ [(set (match_operand:SWI248 0 "register_operand" "=r") (zero_extend:SWI248 (vec_select:QI - (match_operand:V8QI 1 "register_operand" "Yv") + (match_operand:V8QI 1 "register_operand" "YW") (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))] "TARGET_MMX_WITH_SSE && TARGET_SSE4_1" "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}" @@ -2394,15 +2399,15 @@ }) (define_insn "*mmx_uavgv8qi3" - [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw") (truncate:V8QI (lshiftrt:V8HI (plus:V8HI (plus:V8HI (zero_extend:V8HI - (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yv")) + (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yw")) (zero_extend:V8HI - (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))) + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw"))) (const_vector:V8HI [(const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) (const_int 1) @@ -2440,15 +2445,15 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn "*mmx_uavgv4hi3" - [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv") + [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw") (truncate:V4HI (lshiftrt:V4SI (plus:V4SI (plus:V4SI (zero_extend:V4SI - (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv")) + (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")) (zero_extend:V4SI - (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))) + (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))) (const_vector:V4SI [(const_int 1) (const_int 1) (const_int 1) (const_int 1)])) (const_int 1))))] @@ -2483,9 +2488,9 @@ }) (define_insn "mmx_psadbw" - [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv") - (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yv") - (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")] + [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yw") + (unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yw") + (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")] UNSPEC_PSADBW))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A)" diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ca4372d..2cd8e04 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -566,7 +566,8 @@ (V4SI "v") (V8SI "v") (V16SI "v") (V2DI "v") (V4DI "v") (V8DI "v") (V4SF "v") (V8SF "v") (V16SF "v") - (V2DF "v") (V4DF "v") (V8DF "v")]) + (V2DF "v") (V4DF "v") (V8DF "v") + (TI "Yw") (V1TI "Yw") (V2TI "Yw") (V4TI "v")]) (define_mode_attr sse2_avx_avx512f [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f") @@ -11736,10 +11737,10 @@ "ix86_fixup_binary_operands_no_copy (, mode, operands);") (define_insn "*_3" - [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v") + [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,") (sat_plusminus:VI12_AVX2_AVX512BW - (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "0,v") - (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))] + (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "0,") + (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,m")))] "TARGET_SSE2 && && && ix86_binary_operator_ok (, mode, operands)" "@ @@ -11827,14 +11828,14 @@ "ix86_fixup_binary_operands_no_copy (MULT, mode, operands);") (define_insn "*mul3_highpart" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v") + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,") (truncate:VI2_AVX2 (lshiftrt: (mult: (any_extend: - (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")) + (match_operand:VI2_AVX2 1 "vector_operand" "%0,")) (any_extend: - (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm"))) + (match_operand:VI2_AVX2 2 "vector_operand" "xBm,m"))) (const_int 16))))] "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2])) && && " @@ -12128,19 +12129,19 @@ "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);") (define_insn "*avx2_pmaddwd" - [(set (match_operand:V8SI 0 "register_operand" "=x,v") + [(set (match_operand:V8SI 0 "register_operand" "=Yw") (plus:V8SI (mult:V8SI (sign_extend:V8SI (vec_select:V8HI - (match_operand:V16HI 1 "nonimmediate_operand" "%x,v") + (match_operand:V16HI 1 "nonimmediate_operand" "%Yw") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) (const_int 12) (const_int 14)]))) (sign_extend:V8SI (vec_select:V8HI - (match_operand:V16HI 2 "nonimmediate_operand" "xm,vm") + (match_operand:V16HI 2 "nonimmediate_operand" "Ywm") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) @@ -12161,8 +12162,7 @@ "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "vpmaddwd\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseiadd") - (set_attr "isa" "*,avx512bw") - (set_attr "prefix" "vex,evex") + (set_attr "prefix" "vex") (set_attr "mode" "OI")]) (define_expand "sse2_pmaddwd" @@ -12192,17 +12192,17 @@ "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);") (define_insn "*sse2_pmaddwd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,v") + [(set (match_operand:V4SI 0 "register_operand" "=x,Yw") (plus:V4SI (mult:V4SI (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "vector_operand" "%0,x,v") + (match_operand:V8HI 1 "vector_operand" "%0,Yw") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))) (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm") + (match_operand:V8HI 2 "vector_operand" "xBm,Ywm") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6)])))) (mult:V4SI @@ -12217,13 +12217,12 @@ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ pmaddwd\t{%2, %0|%0, %2} - vpmaddwd\t{%2, %1, %0|%0, %1, %2} vpmaddwd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") (set_attr "atom_unit" "simul") - (set_attr "prefix_data16" "1,*,*") - (set_attr "prefix" "orig,vex,evex") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "avx512dq_mul3" @@ -12449,10 +12448,10 @@ (set_attr "mode" "")]) (define_insn "ashr3" - [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x") + [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,") (ashiftrt:VI24_AVX2 - (match_operand:VI24_AVX2 1 "register_operand" "0,x") - (match_operand:DI 2 "nonmemory_operand" "xN,xN")))] + (match_operand:VI24_AVX2 1 "register_operand" "0,") + (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))] "TARGET_SSE2" "@ psra\t{%2, %0|%0, %2} @@ -12496,10 +12495,10 @@ (set_attr "mode" "")]) (define_insn "3" - [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x") + [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,") (any_lshift:VI248_AVX2 - (match_operand:VI248_AVX2 1 "register_operand" "0,x") - (match_operand:DI 2 "nonmemory_operand" "xN,xN")))] + (match_operand:VI248_AVX2 1 "register_operand" "0,") + (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))] "TARGET_SSE2" "@ p\t{%2, %0|%0, %2} @@ -12571,9 +12570,9 @@ (set_attr "mode" "")]) (define_insn "_3" - [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v") + [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,Yw") (any_lshift:VIMAX_AVX2 - (match_operand:VIMAX_AVX2 1 "register_operand" "0,v") + (match_operand:VIMAX_AVX2 1 "register_operand" "0,Yw") (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))] "TARGET_SSE2" { @@ -12771,20 +12770,19 @@ (set_attr "mode" "TI")]) (define_insn "*v8hi3" - [(set (match_operand:V8HI 0 "register_operand" "=x,x,v") + [(set (match_operand:V8HI 0 "register_operand" "=x,Yw") (smaxmin:V8HI - (match_operand:V8HI 1 "vector_operand" "%0,x,v") - (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))] + (match_operand:V8HI 1 "vector_operand" "%0,Yw") + (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")))] "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ pw\t{%2, %0|%0, %2} - vpw\t{%2, %1, %0|%0, %1, %2} vpw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") - (set_attr "prefix_data16" "1,*,*") - (set_attr "prefix_extra" "*,1,1") - (set_attr "prefix" "orig,vex,evex") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix_extra" "*,1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_expand "3" @@ -12856,20 +12854,19 @@ (set_attr "mode" "TI")]) (define_insn "*v16qi3" - [(set (match_operand:V16QI 0 "register_operand" "=x,x,v") + [(set (match_operand:V16QI 0 "register_operand" "=x,Yw") (umaxmin:V16QI - (match_operand:V16QI 1 "vector_operand" "%0,x,v") - (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))] + (match_operand:V16QI 1 "vector_operand" "%0,Yw") + (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")))] "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ pb\t{%2, %0|%0, %2} - vpb\t{%2, %1, %0|%0, %1, %2} vpb\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") - (set_attr "prefix_data16" "1,*,*") - (set_attr "prefix_extra" "*,1,1") - (set_attr "prefix" "orig,vex,evex") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix_extra" "*,1") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -13888,57 +13885,54 @@ }) (define_insn "_packsswb" - [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v") + [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,") (vec_concat:VI1_AVX512 (ss_truncate: - (match_operand: 1 "register_operand" "0,x,v")) + (match_operand: 1 "register_operand" "0,")) (ss_truncate: - (match_operand: 2 "vector_operand" "xBm,xm,vm"))))] + (match_operand: 2 "vector_operand" "xBm,m"))))] "TARGET_SSE2 && && " "@ packsswb\t{%2, %0|%0, %2} - vpacksswb\t{%2, %1, %0|%0, %1, %2} vpacksswb\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*,*") - (set_attr "prefix" "orig,,evex") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,") (set_attr "mode" "")]) (define_insn "_packssdw" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v") + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,") (vec_concat:VI2_AVX2 (ss_truncate: - (match_operand: 1 "register_operand" "0,x,v")) + (match_operand: 1 "register_operand" "0,")) (ss_truncate: - (match_operand: 2 "vector_operand" "xBm,xm,vm"))))] + (match_operand: 2 "vector_operand" "xBm,m"))))] "TARGET_SSE2 && && " "@ packssdw\t{%2, %0|%0, %2} - vpackssdw\t{%2, %1, %0|%0, %1, %2} vpackssdw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*,*") - (set_attr "prefix" "orig,,evex") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,") (set_attr "mode" "")]) (define_insn "_packuswb" - [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v") + [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,") (vec_concat:VI1_AVX512 (us_truncate: - (match_operand: 1 "register_operand" "0,x,v")) + (match_operand: 1 "register_operand" "0,")) (us_truncate: - (match_operand: 2 "vector_operand" "xBm,xm,vm"))))] + (match_operand: 2 "vector_operand" "xBm,m"))))] "TARGET_SSE2 && && " "@ packuswb\t{%2, %0|%0, %2} - vpackuswb\t{%2, %1, %0|%0, %1, %2} vpackuswb\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog") - (set_attr "prefix_data16" "1,*,*") - (set_attr "prefix" "orig,,evex") + (set_attr "prefix_data16" "1,*") + (set_attr "prefix" "orig,") (set_attr "mode" "")]) (define_insn "avx512bw_interleave_highv64qi" @@ -13986,11 +13980,11 @@ (set_attr "mode" "XI")]) (define_insn "avx2_interleave_highv32qi" - [(set (match_operand:V32QI 0 "register_operand" "=v") + [(set (match_operand:V32QI 0 "register_operand" "=Yw") (vec_select:V32QI (vec_concat:V64QI - (match_operand:V32QI 1 "register_operand" "v") - (match_operand:V32QI 2 "nonimmediate_operand" "vm")) + (match_operand:V32QI 1 "register_operand" "Yw") + (match_operand:V32QI 2 "nonimmediate_operand" "Ywm")) (parallel [(const_int 8) (const_int 40) (const_int 9) (const_int 41) (const_int 10) (const_int 42) @@ -14007,18 +14001,18 @@ (const_int 29) (const_int 61) (const_int 30) (const_int 62) (const_int 31) (const_int 63)])))] - "TARGET_AVX2 && " + "TARGET_AVX2 && && " "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog") (set_attr "prefix" "") (set_attr "mode" "OI")]) (define_insn "vec_interleave_highv16qi" - [(set (match_operand:V16QI 0 "register_operand" "=x,v") + [(set (match_operand:V16QI 0 "register_operand" "=x,Yw") (vec_select:V16QI (vec_concat:V32QI - (match_operand:V16QI 1 "register_operand" "0,v") - (match_operand:V16QI 2 "vector_operand" "xBm,vm")) + (match_operand:V16QI 1 "register_operand" "0,Yw") + (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")) (parallel [(const_int 8) (const_int 24) (const_int 9) (const_int 25) (const_int 10) (const_int 26) @@ -14027,7 +14021,7 @@ (const_int 13) (const_int 29) (const_int 14) (const_int 30) (const_int 15) (const_int 31)])))] - "TARGET_SSE2 && " + "TARGET_SSE2 && && " "@ punpckhbw\t{%2, %0|%0, %2} vpunpckhbw\t{%2, %1, %0|%0, %1, %2}" @@ -14082,11 +14076,11 @@ (set_attr "mode" "XI")]) (define_insn "avx2_interleave_lowv32qi" - [(set (match_operand:V32QI 0 "register_operand" "=v") + [(set (match_operand:V32QI 0 "register_operand" "=Yw") (vec_select:V32QI (vec_concat:V64QI - (match_operand:V32QI 1 "register_operand" "v") - (match_operand:V32QI 2 "nonimmediate_operand" "vm")) + (match_operand:V32QI 1 "register_operand" "Yw") + (match_operand:V32QI 2 "nonimmediate_operand" "Ywm")) (parallel [(const_int 0) (const_int 32) (const_int 1) (const_int 33) (const_int 2) (const_int 34) @@ -14110,11 +14104,11 @@ (set_attr "mode" "OI")]) (define_insn "vec_interleave_lowv16qi" - [(set (match_operand:V16QI 0 "register_operand" "=x,v") + [(set (match_operand:V16QI 0 "register_operand" "=x,Yw") (vec_select:V16QI (vec_concat:V32QI - (match_operand:V16QI 1 "register_operand" "0,v") - (match_operand:V16QI 2 "vector_operand" "xBm,vm")) + (match_operand:V16QI 1 "register_operand" "0,Yw") + (match_operand:V16QI 2 "vector_operand" "xBm,Ywm")) (parallel [(const_int 0) (const_int 16) (const_int 1) (const_int 17) (const_int 2) (const_int 18) @@ -14162,11 +14156,11 @@ (set_attr "mode" "XI")]) (define_insn "avx2_interleave_highv16hi" - [(set (match_operand:V16HI 0 "register_operand" "=v") + [(set (match_operand:V16HI 0 "register_operand" "=Yw") (vec_select:V16HI (vec_concat:V32HI - (match_operand:V16HI 1 "register_operand" "v") - (match_operand:V16HI 2 "nonimmediate_operand" "vm")) + (match_operand:V16HI 1 "register_operand" "Yw") + (match_operand:V16HI 2 "nonimmediate_operand" "Ywm")) (parallel [(const_int 4) (const_int 20) (const_int 5) (const_int 21) (const_int 6) (const_int 22) @@ -14182,11 +14176,11 @@ (set_attr "mode" "OI")]) (define_insn "vec_interleave_highv8hi" - [(set (match_operand:V8HI 0 "register_operand" "=x,v") + [(set (match_operand:V8HI 0 "register_operand" "=x,Yw") (vec_select:V8HI (vec_concat:V16HI - (match_operand:V8HI 1 "register_operand" "0,v") - (match_operand:V8HI 2 "vector_operand" "xBm,vm")) + (match_operand:V8HI 1 "register_operand" "0,Yw") + (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")) (parallel [(const_int 4) (const_int 12) (const_int 5) (const_int 13) (const_int 6) (const_int 14) @@ -14230,11 +14224,11 @@ (set_attr "mode" "XI")]) (define_insn "avx2_interleave_lowv16hi" - [(set (match_operand:V16HI 0 "register_operand" "=v") + [(set (match_operand:V16HI 0 "register_operand" "=Yw") (vec_select:V16HI (vec_concat:V32HI - (match_operand:V16HI 1 "register_operand" "v") - (match_operand:V16HI 2 "nonimmediate_operand" "vm")) + (match_operand:V16HI 1 "register_operand" "Yw") + (match_operand:V16HI 2 "nonimmediate_operand" "Ywm")) (parallel [(const_int 0) (const_int 16) (const_int 1) (const_int 17) (const_int 2) (const_int 18) @@ -14250,11 +14244,11 @@ (set_attr "mode" "OI")]) (define_insn "vec_interleave_lowv8hi" - [(set (match_operand:V8HI 0 "register_operand" "=x,v") + [(set (match_operand:V8HI 0 "register_operand" "=x,Yw") (vec_select:V8HI (vec_concat:V16HI - (match_operand:V8HI 1 "register_operand" "0,v") - (match_operand:V8HI 2 "vector_operand" "xBm,vm")) + (match_operand:V8HI 1 "register_operand" "0,Yw") + (match_operand:V8HI 2 "vector_operand" "xBm,Ywm")) (parallel [(const_int 0) (const_int 8) (const_int 1) (const_int 9) (const_int 2) (const_int 10) @@ -15190,9 +15184,9 @@ }) (define_insn "avx2_pshuflw_1" - [(set (match_operand:V16HI 0 "register_operand" "=v") + [(set (match_operand:V16HI 0 "register_operand" "=Yw") (vec_select:V16HI - (match_operand:V16HI 1 "nonimmediate_operand" "vm") + (match_operand:V16HI 1 "nonimmediate_operand" "Ywm") (parallel [(match_operand 2 "const_0_to_3_operand") (match_operand 3 "const_0_to_3_operand") (match_operand 4 "const_0_to_3_operand") @@ -15264,9 +15258,9 @@ }) (define_insn "sse2_pshuflw_1" - [(set (match_operand:V8HI 0 "register_operand" "=v") + [(set (match_operand:V8HI 0 "register_operand" "=Yw") (vec_select:V8HI - (match_operand:V8HI 1 "vector_operand" "vBm") + (match_operand:V8HI 1 "vector_operand" "YwBm") (parallel [(match_operand 2 "const_0_to_3_operand") (match_operand 3 "const_0_to_3_operand") (match_operand 4 "const_0_to_3_operand") @@ -15347,9 +15341,9 @@ }) (define_insn "avx2_pshufhw_1" - [(set (match_operand:V16HI 0 "register_operand" "=v") + [(set (match_operand:V16HI 0 "register_operand" "=Yw") (vec_select:V16HI - (match_operand:V16HI 1 "nonimmediate_operand" "vm") + (match_operand:V16HI 1 "nonimmediate_operand" "Ywm") (parallel [(const_int 0) (const_int 1) (const_int 2) @@ -15421,9 +15415,9 @@ }) (define_insn "sse2_pshufhw_1" - [(set (match_operand:V8HI 0 "register_operand" "=v") + [(set (match_operand:V8HI 0 "register_operand" "=Yw") (vec_select:V8HI - (match_operand:V8HI 1 "vector_operand" "vBm") + (match_operand:V8HI 1 "vector_operand" "YwBm") (parallel [(const_int 0) (const_int 1) (const_int 2) @@ -16213,15 +16207,15 @@ }) (define_insn "*_uavg3" - [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v") + [(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,") (truncate:VI12_AVX2_AVX512BW (lshiftrt: (plus: (plus: (zero_extend: - (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,v")) + (match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,")) (zero_extend: - (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm"))) + (match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,m"))) (match_operand: "const1_operand")) (const_int 1))))] "TARGET_SSE2 && && @@ -16238,10 +16232,10 @@ ;; The correct representation for this is absolutely enormous, and ;; surely not generally useful. (define_insn "_psadbw" - [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v") + [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,YW") (unspec:VI8_AVX2_AVX512BW - [(match_operand: 1 "register_operand" "0,v") - (match_operand: 2 "vector_operand" "xBm,vm")] + [(match_operand: 1 "register_operand" "0,YW") + (match_operand: 2 "vector_operand" "xBm,YWm")] UNSPEC_PSADBW))] "TARGET_SSE2" "@ @@ -16815,12 +16809,12 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn "avx2_pmaddubsw256" - [(set (match_operand:V16HI 0 "register_operand" "=x,v") + [(set (match_operand:V16HI 0 "register_operand" "=Yw") (ss_plus:V16HI (mult:V16HI (zero_extend:V16HI (vec_select:V16QI - (match_operand:V32QI 1 "register_operand" "x,v") + (match_operand:V32QI 1 "register_operand" "Yw") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) @@ -16831,7 +16825,7 @@ (const_int 28) (const_int 30)]))) (sign_extend:V16HI (vec_select:V16QI - (match_operand:V32QI 2 "nonimmediate_operand" "xm,vm") + (match_operand:V32QI 2 "nonimmediate_operand" "Ywm") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) @@ -16863,10 +16857,9 @@ (const_int 29) (const_int 31)]))))))] "TARGET_AVX2" "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "*,avx512bw") - (set_attr "type" "sseiadd") + [(set_attr "type" "sseiadd") (set_attr "prefix_extra" "1") - (set_attr "prefix" "vex,evex") + (set_attr "prefix" "vex") (set_attr "mode" "OI")]) ;; The correct representation for this is absolutely enormous, and @@ -16919,19 +16912,19 @@ (set_attr "mode" "XI")]) (define_insn "ssse3_pmaddubsw128" - [(set (match_operand:V8HI 0 "register_operand" "=x,x,v") + [(set (match_operand:V8HI 0 "register_operand" "=x,Yw") (ss_plus:V8HI (mult:V8HI (zero_extend:V8HI (vec_select:V8QI - (match_operand:V16QI 1 "register_operand" "0,x,v") + (match_operand:V16QI 1 "register_operand" "0,Yw") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) (const_int 12) (const_int 14)]))) (sign_extend:V8HI (vec_select:V8QI - (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm") + (match_operand:V16QI 2 "vector_operand" "xBm,Ywm") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6) (const_int 8) (const_int 10) @@ -16952,14 +16945,13 @@ "TARGET_SSSE3" "@ pmaddubsw\t{%2, %0|%0, %2} - vpmaddubsw\t{%2, %1, %0|%0, %1, %2} vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sseiadd") (set_attr "atom_unit" "simul") - (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex,evex") + (set_attr "prefix" "orig,vex") (set_attr "mode" "TI")]) (define_insn "ssse3_pmaddubsw" @@ -17065,16 +17057,16 @@ }) (define_insn "*_pmulhrsw3" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v") + [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,") (truncate:VI2_AVX2 (lshiftrt: (plus: (lshiftrt: (mult: (sign_extend: - (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v")) + (match_operand:VI2_AVX2 1 "vector_operand" "%0,")) (sign_extend: - (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm"))) + (match_operand:VI2_AVX2 2 "vector_operand" "xBm,m"))) (const_int 14)) (match_operand:VI2_AVX2 3 "const1_operand")) (const_int 1))))] @@ -17082,13 +17074,12 @@ && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "@ pmulhrsw\t{%2, %0|%0, %2} - vpmulhrsw\t{%2, %1, %0|%0, %1, %2} vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sseimul") - (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,maybe_evex,evex") + (set_attr "prefix" "orig,maybe_evex") (set_attr "mode" "")]) (define_expand "smulhrsv4hi3" @@ -17160,21 +17151,20 @@ (set_attr "mode" "DI,TI,TI")]) (define_insn "_pshufb3" - [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v") + [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,") (unspec:VI1_AVX512 - [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v") - (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")] + [(match_operand:VI1_AVX512 1 "register_operand" "0,") + (match_operand:VI1_AVX512 2 "vector_operand" "xBm,m")] UNSPEC_PSHUFB))] "TARGET_SSSE3 && && " "@ pshufb\t{%2, %0|%0, %2} - vpshufb\t{%2, %1, %0|%0, %1, %2} vpshufb\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sselog1") - (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,maybe_evex,evex") + (set_attr "prefix" "orig,maybe_evex") (set_attr "btver2_decode" "vector") (set_attr "mode" "")]) @@ -17274,11 +17264,11 @@ (set_attr "mode" "")]) (define_insn "_palignr" - [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v") + [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,") (unspec:SSESCALARMODE - [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v") - (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm") - (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")] + [(match_operand:SSESCALARMODE 1 "register_operand" "0,") + (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,m") + (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")] UNSPEC_PALIGNR))] "TARGET_SSSE3" { @@ -17289,19 +17279,18 @@ case 0: return "palignr\t{%3, %2, %0|%0, %2, %3}"; case 1: - case 2: return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}"; default: gcc_unreachable (); } } - [(set_attr "isa" "noavx,avx,avx512bw") + [(set_attr "isa" "noavx,avx") (set_attr "type" "sseishft") (set_attr "atom_unit" "sishuf") - (set_attr "prefix_data16" "1,*,*") + (set_attr "prefix_data16" "1,*") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,vex,evex") + (set_attr "prefix" "orig,vex") (set_attr "mode" "")]) (define_insn_and_split "ssse3_palignrdi" @@ -17367,9 +17356,9 @@ (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) (define_insn "*abs2" - [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v") + [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=") (abs:VI1248_AVX512VL_AVX512BW - (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))] + (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "Bm")))] "TARGET_SSSE3" "%vpabs\t{%1, %0|%0, %1}" [(set_attr "type" "sselog1") @@ -17731,22 +17720,21 @@ (set_attr "mode" "")]) (define_insn "_packusdw" - [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v") + [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,") (vec_concat:VI2_AVX2 (us_truncate: - (match_operand: 1 "register_operand" "0,0,x,v")) + (match_operand: 1 "register_operand" "0,0,")) (us_truncate: - (match_operand: 2 "vector_operand" "YrBm,*xBm,xm,vm"))))] + (match_operand: 2 "vector_operand" "YrBm,*xBm,m"))))] "TARGET_SSE4_1 && && " "@ packusdw\t{%2, %0|%0, %2} packusdw\t{%2, %0|%0, %2} - vpackusdw\t{%2, %1, %0|%0, %1, %2} vpackusdw\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,noavx,avx,avx512bw") + [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "sselog") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,orig,,evex") + (set_attr "prefix" "orig,orig,") (set_attr "mode" "")]) (define_insn "_pblendvb" @@ -17867,9 +17855,9 @@ (set_attr "mode" "TI")]) (define_insn "avx2_v16qiv16hi2" - [(set (match_operand:V16HI 0 "register_operand" "=v") + [(set (match_operand:V16HI 0 "register_operand" "=Yw") (any_extend:V16HI - (match_operand:V16QI 1 "nonimmediate_operand" "vm")))] + (match_operand:V16QI 1 "nonimmediate_operand" "Ywm")))] "TARGET_AVX2 && && " "vpmovbw\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") @@ -17935,10 +17923,10 @@ "TARGET_AVX512BW") (define_insn "sse4_1_v8qiv8hi2" - [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v") + [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw") (any_extend:V8HI (vec_select:V8QI - (match_operand:V16QI 1 "register_operand" "Yr,*x,v") + (match_operand:V16QI 1 "register_operand" "Yr,*x,Yw") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3) (const_int 4) (const_int 5) @@ -17952,7 +17940,7 @@ (set_attr "mode" "TI")]) (define_insn "*sse4_1_v8qiv8hi2_1" - [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v") + [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw") (any_extend:V8HI (match_operand:V8QI 1 "memory_operand" "m,m,m")))] "TARGET_SSE4_1 && && " diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr99321-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr99321-2.c new file mode 100644 index 0000000..8bb3a03 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr99321-2.c @@ -0,0 +1,94 @@ +/* PR target/99321 */ +/* { dg-do assemble { target lp64 } } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target assembler_march_noavx512bw } */ +/* { dg-options "-O2 -mavx512vl -mno-avx512bw -Wa,-march=+noavx512bw" } */ + +#include + +typedef unsigned char V1 __attribute__((vector_size (16))); +typedef unsigned char V2 __attribute__((vector_size (32))); +typedef unsigned short V3 __attribute__((vector_size (16))); +typedef unsigned short V4 __attribute__((vector_size (32))); + +void f1 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_abs_epi8 ((__m128i) b); __asm ("" : : "v" (a)); } +void f2 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_abs_epi8 ((__m256i) b); __asm ("" : : "v" (a)); } +void f3 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_abs_epi16 ((__m128i) b); __asm ("" : : "v" (a)); } +void f4 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_abs_epi16 ((__m256i) b); __asm ("" : : "v" (a)); } +void f5 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_adds_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f6 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_adds_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f7 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_adds_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f8 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_adds_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f9 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_subs_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f10 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_subs_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f11 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_subs_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f12 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_subs_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f13 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_adds_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f14 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_adds_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f15 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_adds_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f16 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_adds_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f17 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_subs_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f18 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_subs_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f19 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_subs_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f20 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_subs_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f21 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_alignr_epi8 ((__m128i) a, (__m128i) b, 5); __asm ("" : : "v" (a)); } +void f22 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_alignr_epi8 ((__m256i) a, (__m256i) b, 5); __asm ("" : : "v" (a)); } +void f23 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_adds_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f24 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_avg_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f25 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_avg_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f26 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_avg_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f27 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_avg_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f28 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_broadcastb_epi8 ((__m128i) b); __asm ("" : : "v" (a)); } +void f29 (void) { register V2 a __asm ("%xmm16"); register V1 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_broadcastb_epi8 ((__m128i) b); __asm ("" : : "v" (a)); } +void f30 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_broadcastw_epi16 ((__m128i) b); __asm ("" : : "v" (a)); } +void f31 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_broadcastw_epi16 ((__m128i) b); __asm ("" : : "v" (a)); } +int f32 (void) { register V1 a __asm ("%xmm16"); __asm ("" : "=v" (a)); return _mm_extract_epi8 ((__m128i) a, 3); } +int f33 (void) { register V3 a __asm ("%xmm16"); __asm ("" : "=v" (a)); return _mm_extract_epi16 ((__m128i) a, 3); } +void f34 (int c) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_insert_epi8 ((__m128i) b, c, 5); __asm ("" : : "v" (a)); } +void f35 (int c) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_insert_epi16 ((__m128i) b, c, 5); __asm ("" : : "v" (a)); } +void f36 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_maddubs_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f37 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_maddubs_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f38 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_madd_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f39 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_madd_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f40 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_cvtepi8_epi16 ((__m128i) b); __asm ("" : : "v" (a)); } +void f41 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_cvtepi8_epi16 ((__m128i) b); __asm ("" : : "v" (a)); } +void f42 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_cvtepu8_epi16 ((__m128i) b); __asm ("" : : "v" (a)); } +void f43 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_cvtepu8_epi16 ((__m128i) b); __asm ("" : : "v" (a)); } +void f44 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_mulhrs_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f45 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_mulhrs_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f46 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_mulhi_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f47 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_mulhi_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f48 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_mulhi_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f49 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_mulhi_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f50 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_sad_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f51 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_sad_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f52 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_shuffle_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f53 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_shuffle_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f54 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_shufflehi_epi16 ((__m128i) b, 0x5b); __asm ("" : : "v" (a)); } +void f55 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_shufflehi_epi16 ((__m256i) b, 0x5b); __asm ("" : : "v" (a)); } +void f56 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_shufflelo_epi16 ((__m128i) b, 0x5b); __asm ("" : : "v" (a)); } +void f57 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_shufflelo_epi16 ((__m256i) b, 0x5b); __asm ("" : : "v" (a)); } +void f58 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_slli_si128 ((__m128i) b, 3); __asm ("" : : "v" (a)); } +void f59 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_slli_si256 ((__m256i) b, 3); __asm ("" : : "v" (a)); } +void f60 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_srli_si128 ((__m128i) b, 3); __asm ("" : : "v" (a)); } +void f61 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_srli_si256 ((__m256i) b, 3); __asm ("" : : "v" (a)); } +void f62 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_sll_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f63 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_sll_epi16 ((__m256i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f64 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_slli_epi16 ((__m128i) b, 7); __asm ("" : : "v" (a)); } +void f65 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_slli_epi16 ((__m256i) b, 7); __asm ("" : : "v" (a)); } +void f66 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_srl_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f67 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_srl_epi16 ((__m256i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f68 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_srli_epi16 ((__m128i) b, 7); __asm ("" : : "v" (a)); } +void f69 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_srli_epi16 ((__m256i) b, 7); __asm ("" : : "v" (a)); } +void f70 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_sra_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f71 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_sra_epi16 ((__m256i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f72 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_srai_epi16 ((__m128i) b, 7); __asm ("" : : "v" (a)); } +void f73 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_srai_epi16 ((__m256i) b, 7); __asm ("" : : "v" (a)); } +void f74 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_unpackhi_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f75 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_unpackhi_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f76 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_unpackhi_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f77 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_unpackhi_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f78 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_unpacklo_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f79 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_unpacklo_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } +void f80 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_unpacklo_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); } +void f81 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_unpacklo_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); } -- 2.7.4