From: liuhongt Date: Tue, 20 Jul 2021 10:32:35 +0000 (+0800) Subject: Support logic shift left/right for avx512 mask type. X-Git-Tag: upstream/12.2.0~6235 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a56c251898ea70b46798d7893a871bcfe318529b;p=platform%2Fupstream%2Fgcc.git Support logic shift left/right for avx512 mask type. gcc/ChangeLog: * config/i386/constraints.md (Wb): New constraint. (Ww): Ditto. * config/i386/i386.md (*ashlhi3_1): Extend to avx512 mask shift. (*ashlqi3_1): Ditto. (*3_1): Split to .. (*ashr3_1): this, ... (*lshr3_1): and this, also extend this pattern to avx512 mask registers. (*3_1): Split to .. (*ashr3_1): this, ... (*lshrqi3_1): and this, also extend this pattern to avx512 mask registers. (*lshrhi3_1): And this, also extend this pattern to avx512 mask registers. * config/i386/sse.md (k): New define_split after it to convert generic shift pattern to mask shift ones. gcc/testsuite/ChangeLog: * gcc.target/i386/mask-shift.c: New test. --- diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index 485e3f5..4aa28a5 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -222,6 +222,16 @@ (match_operand 0 "vector_all_ones_operand")))) ;; Integer constant constraints. +(define_constraint "Wb" + "Integer constant in the range 0 @dots{} 7, for 8-bit shifts." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 7)"))) + +(define_constraint "Ww" + "Integer constant in the range 0 @dots{} 15, for 16-bit shifts." + (and (match_code "const_int") + (match_test "IN_RANGE (ival, 0, 15)"))) + (define_constraint "I" "Integer constant in the range 0 @dots{} 31, for 32-bit shifts." (and (match_code "const_int") diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 8b809c4..44ae18e 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1136,6 +1136,7 @@ ;; Immediate operand constraint for shifts. (define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")]) +(define_mode_attr KS [(QI "Wb") (HI "Ww") (SI "I") (DI "J")]) ;; Print register name in the specified mode. (define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")]) @@ -11088,9 +11089,9 @@ (set_attr "mode" "")]) (define_insn "*ashl3_1" - [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r") - (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm") - (match_operand:QI 2 "nonmemory_operand" "c,M,r"))) + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k") + (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k") + (match_operand:QI 2 "nonmemory_operand" "c,M,r,"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, mode, operands)" { @@ -11098,6 +11099,7 @@ { case TYPE_LEA: case TYPE_ISHIFTX: + case TYPE_MSKLOG: return "#"; case TYPE_ALU: @@ -11113,7 +11115,7 @@ return "sal{}\t{%2, %0|%0, %2}"; } } - [(set_attr "isa" "*,*,bmi2") + [(set_attr "isa" "*,*,bmi2,avx512bw") (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") @@ -11123,6 +11125,8 @@ (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) (const_string "alu") + (eq_attr "alternative" "3") + (const_string "msklog") ] (const_string "ishift"))) (set (attr "length_immediate") @@ -11218,15 +11222,16 @@ "operands[2] = gen_lowpart (SImode, operands[2]);") (define_insn "*ashlhi3_1" - [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp") - (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l") - (match_operand:QI 2 "nonmemory_operand" "cI,M"))) + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k") + (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k") + (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, HImode, operands)" { switch (get_attr_type (insn)) { case TYPE_LEA: + case TYPE_MSKLOG: return "#"; case TYPE_ALU: @@ -11241,9 +11246,12 @@ return "sal{w}\t{%2, %0|%0, %2}"; } } - [(set (attr "type") + [(set_attr "isa" "*,*,avx512f") + (set (attr "type") (cond [(eq_attr "alternative" "1") (const_string "lea") + (eq_attr "alternative" "2") + (const_string "msklog") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) @@ -11259,18 +11267,19 @@ (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) - (set_attr "mode" "HI,SI")]) + (set_attr "mode" "HI,SI,HI")]) (define_insn "*ashlqi3_1" - [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp") - (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l") - (match_operand:QI 2 "nonmemory_operand" "cI,cI,M"))) + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k") + (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k") + (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb"))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, QImode, operands)" { switch (get_attr_type (insn)) { case TYPE_LEA: + case TYPE_MSKLOG: return "#"; case TYPE_ALU: @@ -11298,9 +11307,12 @@ } } } - [(set (attr "type") + [(set_attr "isa" "*,*,*,avx512dq") + (set (attr "type") (cond [(eq_attr "alternative" "2") (const_string "lea") + (eq_attr "alternative" "3") + (const_string "msklog") (and (and (match_test "TARGET_DOUBLE_WITH_ADD") (match_operand 0 "register_operand")) (match_operand 2 "const1_operand")) @@ -11316,7 +11328,7 @@ (match_test "optimize_function_for_size_p (cfun)"))))) (const_string "0") (const_string "*"))) - (set_attr "mode" "QI,SI,SI") + (set_attr "mode" "QI,SI,SI,QI") ;; Potential partial reg stall on alternative 1. (set (attr "preferred_for_speed") (cond [(eq_attr "alternative" "1") @@ -11818,13 +11830,13 @@ [(set_attr "type" "ishiftx") (set_attr "mode" "")]) -(define_insn "*3_1" +(define_insn "*ashr3_1" [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r") - (any_shiftrt:SWI48 + (ashiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,rm") (match_operand:QI 2 "nonmemory_operand" "c,r"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (, mode, operands)" + "ix86_binary_operator_ok (ASHIFTRT, mode, operands)" { switch (get_attr_type (insn)) { @@ -11834,9 +11846,9 @@ default: if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) - return "{}\t%0"; + return "sar{}\t%0"; else - return "{}\t{%2, %0|%0, %2}"; + return "sar{}\t{%2, %0|%0, %2}"; } } [(set_attr "isa" "*,bmi2") @@ -11850,6 +11862,40 @@ (const_string "*"))) (set_attr "mode" "")]) +(define_insn "*lshr3_1" + [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k") + (lshiftrt:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k") + (match_operand:QI 2 "nonmemory_operand" "c,r,"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (LSHIFTRT, mode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ISHIFTX: + case TYPE_MSKLOG: + return "#"; + + default: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "shr{}\t%0"; + else + return "shr{}\t{%2, %0|%0, %2}"; + } +} + [(set_attr "isa" "*,bmi2,avx512bw") + (set_attr "type" "ishift,ishiftx,msklog") + (set (attr "length_immediate") + (if_then_else + (and (and (match_operand 2 "const1_operand") + (eq_attr "alternative" "0")) + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "")]) + ;; Convert shift to the shiftx pattern to avoid flags dependency. (define_split [(set (match_operand:SWI48 0 "register_operand") @@ -11915,19 +11961,19 @@ (zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))] "operands[2] = gen_lowpart (SImode, operands[2]);") -(define_insn "*3_1" +(define_insn "*ashr3_1" [(set (match_operand:SWI12 0 "nonimmediate_operand" "=m") - (any_shiftrt:SWI12 + (ashiftrt:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0") (match_operand:QI 2 "nonmemory_operand" "c"))) (clobber (reg:CC FLAGS_REG))] - "ix86_binary_operator_ok (, mode, operands)" + "ix86_binary_operator_ok (ASHIFTRT, mode, operands)" { if (operands[2] == const1_rtx && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) - return "{}\t%0"; + return "sar{}\t%0"; else - return "{}\t{%2, %0|%0, %2}"; + return "sar{}\t{%2, %0|%0, %2}"; } [(set_attr "type" "ishift") (set (attr "length_immediate") @@ -11939,6 +11985,74 @@ (const_string "*"))) (set_attr "mode" "")]) +(define_insn "*lshrqi3_1" + [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,?k") + (lshiftrt:QI + (match_operand:QI 1 "nonimmediate_operand" "0, k") + (match_operand:QI 2 "nonmemory_operand" "cI,Wb"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ISHIFT: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "shr{b}\t%0"; + else + return "shr{b}\t{%2, %0|%0, %2}"; + case TYPE_MSKLOG: + return "#"; + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "*,avx512dq") + (set_attr "type" "ishift,msklog") + (set (attr "length_immediate") + (if_then_else + (and (and (match_operand 2 "const1_operand") + (eq_attr "alternative" "0")) + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "QI")]) + +(define_insn "*lshrhi3_1" + [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k") + (lshiftrt:HI + (match_operand:HI 1 "nonimmediate_operand" "0, k") + (match_operand:QI 2 "nonmemory_operand" "cI, Ww"))) + (clobber (reg:CC FLAGS_REG))] + "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)" +{ + switch (get_attr_type (insn)) + { + case TYPE_ISHIFT: + if (operands[2] == const1_rtx + && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))) + return "shr{w}\t%0"; + else + return "shr{w}\t{%2, %0|%0, %2}"; + case TYPE_MSKLOG: + return "#"; + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "*, avx512f") + (set_attr "type" "ishift,msklog") + (set (attr "length_immediate") + (if_then_else + (and (and (match_operand 2 "const1_operand") + (eq_attr "alternative" "0")) + (ior (match_test "TARGET_SHIFT1") + (match_test "optimize_function_for_size_p (cfun)"))) + (const_string "0") + (const_string "*"))) + (set_attr "mode" "HI")]) + (define_insn "*3_1_slp" [(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+")) (any_shiftrt:SWI12 (match_operand:SWI12 1 "register_operand" "0") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ab29999..f8759e4 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1755,6 +1755,20 @@ (set_attr "prefix" "vex") (set_attr "mode" "")]) +(define_split + [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand") + (any_lshift:SWI1248_AVX512BW + (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand") + (match_operand 2 "const_int_operand"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_AVX512F && reload_completed" + [(parallel + [(set (match_dup 0) + (any_lshift:SWI1248_AVX512BW + (match_dup 1) + (match_dup 2))) + (unspec [(const_int 0)] UNSPEC_MASKOP)])]) + (define_insn "ktest" [(set (reg:CC FLAGS_REG) (unspec:CC diff --git a/gcc/testsuite/gcc.target/i386/mask-shift.c b/gcc/testsuite/gcc.target/i386/mask-shift.c new file mode 100644 index 0000000..4cb6ef3 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/mask-shift.c @@ -0,0 +1,83 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512bw -mavx512dq -O2" } */ + +#include +void +fooq (__m512i a, __m512i b, void* p) +{ + __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b); + m1 >>= 4; + _mm512_mask_storeu_epi64 (p, m1, a); +} + +/* { dg-final { scan-assembler-times {(?n)kshiftrb} "1" } } */ + +void +food (__m512i a, __m512i b, void* p) +{ + __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b); + m1 >>= 8; + _mm512_mask_storeu_epi32 (p, m1, a); +} + +/* { dg-final { scan-assembler-times {(?n)kshiftrw} "1" } } */ + +void +foow (__m512i a, __m512i b, void* p) +{ + __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b); + m1 >>= 16; + _mm512_mask_storeu_epi16 (p, m1, a); +} + +/* { dg-final { scan-assembler-times {(?n)kshiftrd} "1" } } */ + +void +foob (__m512i a, __m512i b, void* p) +{ + __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b); + m1 >>= 32; + _mm512_mask_storeu_epi8 (p, m1, a); +} + +/* { dg-final { scan-assembler-times {(?n)kshiftrq} "1" { target { ! ia32 } } } } */ + +void +fooq1 (__m512i a, __m512i b, void* p) +{ + __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b); + m1 <<= 4; + _mm512_mask_storeu_epi64 (p, m1, a); +} + +/* { dg-final { scan-assembler-times {(?n)kshiftlb} "1" } } */ + +void +food1 (__m512i a, __m512i b, void* p) +{ + __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b); + m1 <<= 8; + _mm512_mask_storeu_epi32 (p, m1, a); +} + +/* { dg-final { scan-assembler-times {(?n)kshiftlw} "1" } } */ + +void +foow1 (__m512i a, __m512i b, void* p) +{ + __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b); + m1 <<= 16; + _mm512_mask_storeu_epi16 (p, m1, a); +} + +/* { dg-final { scan-assembler-times {(?n)kshiftld} "1" } } */ + +void +foob1 (__m512i a, __m512i b, void* p) +{ + __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b); + m1 <<= 32; + _mm512_mask_storeu_epi8 (p, m1, a); +} + +/* { dg-final { scan-assembler-times {(?n)kshiftlq} "1" { target { ! ia32 } } } } */