;; Immediate operand constraint for shifts.
(define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")])
+(define_mode_attr KS [(QI "Wb") (HI "Ww") (SI "I") (DI "J")])
;; Print register name in the specified mode.
(define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")])
(set_attr "mode" "<MODE>")])
(define_insn "*ashl<mode>3_1"
- [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
- (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm")
- (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r")))
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k")
+ (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k")
+ (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
{
{
case TYPE_LEA:
case TYPE_ISHIFTX:
+ case TYPE_MSKLOG:
return "#";
case TYPE_ALU:
return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "isa" "*,*,bmi2")
+ [(set_attr "isa" "*,*,bmi2,avx512bw")
(set (attr "type")
(cond [(eq_attr "alternative" "1")
(const_string "lea")
(match_operand 0 "register_operand"))
(match_operand 2 "const1_operand"))
(const_string "alu")
+ (eq_attr "alternative" "3")
+ (const_string "msklog")
]
(const_string "ishift")))
(set (attr "length_immediate")
"operands[2] = gen_lowpart (SImode, operands[2]);")
(define_insn "*ashlhi3_1"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp")
- (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l")
- (match_operand:QI 2 "nonmemory_operand" "cI,M")))
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k")
+ (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k")
+ (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (ASHIFT, HImode, operands)"
{
switch (get_attr_type (insn))
{
case TYPE_LEA:
+ case TYPE_MSKLOG:
return "#";
case TYPE_ALU:
return "sal{w}\t{%2, %0|%0, %2}";
}
}
- [(set (attr "type")
+ [(set_attr "isa" "*,*,avx512f")
+ (set (attr "type")
(cond [(eq_attr "alternative" "1")
(const_string "lea")
+ (eq_attr "alternative" "2")
+ (const_string "msklog")
(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 0 "register_operand"))
(match_operand 2 "const1_operand"))
(match_test "optimize_function_for_size_p (cfun)")))))
(const_string "0")
(const_string "*")))
- (set_attr "mode" "HI,SI")])
+ (set_attr "mode" "HI,SI,HI")])
(define_insn "*ashlqi3_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp")
- (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l")
- (match_operand:QI 2 "nonmemory_operand" "cI,cI,M")))
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k")
+ (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k")
+ (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (ASHIFT, QImode, operands)"
{
switch (get_attr_type (insn))
{
case TYPE_LEA:
+ case TYPE_MSKLOG:
return "#";
case TYPE_ALU:
}
}
}
- [(set (attr "type")
+ [(set_attr "isa" "*,*,*,avx512dq")
+ (set (attr "type")
(cond [(eq_attr "alternative" "2")
(const_string "lea")
+ (eq_attr "alternative" "3")
+ (const_string "msklog")
(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 0 "register_operand"))
(match_operand 2 "const1_operand"))
(match_test "optimize_function_for_size_p (cfun)")))))
(const_string "0")
(const_string "*")))
- (set_attr "mode" "QI,SI,SI")
+ (set_attr "mode" "QI,SI,SI,QI")
;; Potential partial reg stall on alternative 1.
(set (attr "preferred_for_speed")
(cond [(eq_attr "alternative" "1")
[(set_attr "type" "ishiftx")
(set_attr "mode" "<MODE>")])
-(define_insn "*<insn><mode>3_1"
+(define_insn "*ashr<mode>3_1"
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
- (any_shiftrt:SWI48
+ (ashiftrt:SWI48
(match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
(match_operand:QI 2 "nonmemory_operand" "c<S>,r")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
{
switch (get_attr_type (insn))
{
default:
if (operands[2] == const1_rtx
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "<shift>{<imodesuffix>}\t%0";
+ return "sar{<imodesuffix>}\t%0";
else
- return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
[(set_attr "isa" "*,bmi2")
(const_string "*")))
(set_attr "mode" "<MODE>")])
+(define_insn "*lshr<mode>3_1"
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k")
+ (lshiftrt:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k")
+ (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ISHIFTX:
+ case TYPE_MSKLOG:
+ return "#";
+
+ default:
+ if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "shr{<imodesuffix>}\t%0";
+ else
+ return "shr{<imodesuffix>}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set_attr "isa" "*,bmi2,avx512bw")
+ (set_attr "type" "ishift,ishiftx,msklog")
+ (set (attr "length_immediate")
+ (if_then_else
+ (and (and (match_operand 2 "const1_operand")
+ (eq_attr "alternative" "0"))
+ (ior (match_test "TARGET_SHIFT1")
+ (match_test "optimize_function_for_size_p (cfun)")))
+ (const_string "0")
+ (const_string "*")))
+ (set_attr "mode" "<MODE>")])
+
;; Convert shift to the shiftx pattern to avoid flags dependency.
(define_split
[(set (match_operand:SWI48 0 "register_operand")
(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
"operands[2] = gen_lowpart (SImode, operands[2]);")
-(define_insn "*<insn><mode>3_1"
+(define_insn "*ashr<mode>3_1"
[(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
- (any_shiftrt:SWI12
+ (ashiftrt:SWI12
(match_operand:SWI12 1 "nonimmediate_operand" "0")
(match_operand:QI 2 "nonmemory_operand" "c<S>")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
{
if (operands[2] == const1_rtx
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "<shift>{<imodesuffix>}\t%0";
+ return "sar{<imodesuffix>}\t%0";
else
- return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
}
[(set_attr "type" "ishift")
(set (attr "length_immediate")
(const_string "*")))
(set_attr "mode" "<MODE>")])
+(define_insn "*lshrqi3_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,?k")
+ (lshiftrt:QI
+ (match_operand:QI 1 "nonimmediate_operand" "0, k")
+ (match_operand:QI 2 "nonmemory_operand" "cI,Wb")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ISHIFT:
+ if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "shr{b}\t%0";
+ else
+ return "shr{b}\t{%2, %0|%0, %2}";
+ case TYPE_MSKLOG:
+ return "#";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "isa" "*,avx512dq")
+ (set_attr "type" "ishift,msklog")
+ (set (attr "length_immediate")
+ (if_then_else
+ (and (and (match_operand 2 "const1_operand")
+ (eq_attr "alternative" "0"))
+ (ior (match_test "TARGET_SHIFT1")
+ (match_test "optimize_function_for_size_p (cfun)")))
+ (const_string "0")
+ (const_string "*")))
+ (set_attr "mode" "QI")])
+
+(define_insn "*lshrhi3_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k")
+ (lshiftrt:HI
+ (match_operand:HI 1 "nonimmediate_operand" "0, k")
+ (match_operand:QI 2 "nonmemory_operand" "cI, Ww")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ISHIFT:
+ if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "shr{w}\t%0";
+ else
+ return "shr{w}\t{%2, %0|%0, %2}";
+ case TYPE_MSKLOG:
+ return "#";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "isa" "*, avx512f")
+ (set_attr "type" "ishift,msklog")
+ (set (attr "length_immediate")
+ (if_then_else
+ (and (and (match_operand 2 "const1_operand")
+ (eq_attr "alternative" "0"))
+ (ior (match_test "TARGET_SHIFT1")
+ (match_test "optimize_function_for_size_p (cfun)")))
+ (const_string "0")
+ (const_string "*")))
+ (set_attr "mode" "HI")])
+
(define_insn "*<insn><mode>3_1_slp"
[(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
(any_shiftrt:SWI12 (match_operand:SWI12 1 "register_operand" "0")
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512dq -O2" } */
+
+#include<immintrin.h>
+void
+fooq (__m512i a, __m512i b, void* p)
+{
+ __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b);
+ m1 >>= 4;
+ _mm512_mask_storeu_epi64 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftrb} "1" } } */
+
+void
+food (__m512i a, __m512i b, void* p)
+{
+ __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b);
+ m1 >>= 8;
+ _mm512_mask_storeu_epi32 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftrw} "1" } } */
+
+void
+foow (__m512i a, __m512i b, void* p)
+{
+ __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b);
+ m1 >>= 16;
+ _mm512_mask_storeu_epi16 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftrd} "1" } } */
+
+void
+foob (__m512i a, __m512i b, void* p)
+{
+ __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b);
+ m1 >>= 32;
+ _mm512_mask_storeu_epi8 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftrq} "1" { target { ! ia32 } } } } */
+
+void
+fooq1 (__m512i a, __m512i b, void* p)
+{
+ __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b);
+ m1 <<= 4;
+ _mm512_mask_storeu_epi64 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftlb} "1" } } */
+
+void
+food1 (__m512i a, __m512i b, void* p)
+{
+ __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b);
+ m1 <<= 8;
+ _mm512_mask_storeu_epi32 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftlw} "1" } } */
+
+void
+foow1 (__m512i a, __m512i b, void* p)
+{
+ __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b);
+ m1 <<= 16;
+ _mm512_mask_storeu_epi16 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftld} "1" } } */
+
+void
+foob1 (__m512i a, __m512i b, void* p)
+{
+ __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b);
+ m1 <<= 32;
+ _mm512_mask_storeu_epi8 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftlq} "1" { target { ! ia32 } } } } */