return LD->getAlign() >= 4 && LD->isSimple();
}]>;
-
-// An 'and' node with a single use.
-def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
- return N->hasOneUse();
-}]>;
-// An 'srl' node with a single use.
-def srl_su : PatFrag<(ops node:$lhs, node:$rhs), (srl node:$lhs, node:$rhs), [{
+// binary op with only one user
+class binop_oneuse<SDPatternOperator operator>
+ : PatFrag<(ops node:$A, node:$B),
+ (operator node:$A, node:$B), [{
return N->hasOneUse();
}]>;
-// An 'trunc' node with a single use.
-def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
+
+def add_su : binop_oneuse<add>;
+def and_su : binop_oneuse<and>;
+def srl_su : binop_oneuse<srl>;
+
+// unary op with only one user
+class unop_oneuse<SDPatternOperator operator>
+ : PatFrag<(ops node:$A),
+ (operator node:$A), [{
return N->hasOneUse();
}]>;
+
+def ineg_su : unop_oneuse<ineg>;
+def trunc_su : unop_oneuse<trunc>;
+
//===----------------------------------------------------------------------===//
// Instruction list.
//
return hasNoCarryFlagUses(SDValue(N, 1));
}]>;
+
let Predicates = [HasBMI] in {
- // FIXME: patterns for the load versions are not implemented
- def : Pat<(and GR32:$src, (add GR32:$src, -1)),
+ // FIXME(1): patterns for the load versions are not implemented
+ // FIXME(2): By only matching `add_su` and `ineg_su` we may emit
+ // extra `mov` instructions if `src` has future uses. It may be better
+ // to always match if `src` has more users.
+ def : Pat<(and GR32:$src, (add_su GR32:$src, -1)),
(BLSR32rr GR32:$src)>;
- def : Pat<(and GR64:$src, (add GR64:$src, -1)),
+ def : Pat<(and GR64:$src, (add_su GR64:$src, -1)),
(BLSR64rr GR64:$src)>;
- def : Pat<(xor GR32:$src, (add GR32:$src, -1)),
+ def : Pat<(xor GR32:$src, (add_su GR32:$src, -1)),
(BLSMSK32rr GR32:$src)>;
- def : Pat<(xor GR64:$src, (add GR64:$src, -1)),
+ def : Pat<(xor GR64:$src, (add_su GR64:$src, -1)),
(BLSMSK64rr GR64:$src)>;
- def : Pat<(and GR32:$src, (ineg GR32:$src)),
+ def : Pat<(and GR32:$src, (ineg_su GR32:$src)),
(BLSI32rr GR32:$src)>;
- def : Pat<(and GR64:$src, (ineg GR64:$src)),
+ def : Pat<(and GR64:$src, (ineg_su GR64:$src)),
(BLSI64rr GR64:$src)>;
// Versions to match flag producing ops.
- def : Pat<(and_flag_nocf GR32:$src, (add GR32:$src, -1)),
+ def : Pat<(and_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
(BLSR32rr GR32:$src)>;
- def : Pat<(and_flag_nocf GR64:$src, (add GR64:$src, -1)),
+ def : Pat<(and_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
(BLSR64rr GR64:$src)>;
- def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, -1)),
+ def : Pat<(xor_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
(BLSMSK32rr GR32:$src)>;
- def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, -1)),
+ def : Pat<(xor_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
(BLSMSK64rr GR64:$src)>;
- def : Pat<(and_flag_nocf GR32:$src, (ineg GR32:$src)),
+ def : Pat<(and_flag_nocf GR32:$src, (ineg_su GR32:$src)),
(BLSI32rr GR32:$src)>;
- def : Pat<(and_flag_nocf GR64:$src, (ineg GR64:$src)),
+ def : Pat<(and_flag_nocf GR64:$src, (ineg_su GR64:$src)),
(BLSI64rr GR64:$src)>;
}
liveins: $edi
; CHECK-LABEL: name: test_blsi32rr
- ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi
- ; CHECK: [[BLSI32rr:%[0-9]+]]:gr32 = BLSI32rr [[COPY]], implicit-def $eflags
- ; CHECK: $edi = COPY [[BLSI32rr]]
+ ; CHECK: liveins: $edi
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi
+ ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags
+ ; CHECK-NEXT: [[SUB32rr:%[0-9]+]]:gr32 = SUB32rr [[MOV32r0_]], [[COPY]], implicit-def $eflags
+ ; CHECK-NEXT: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[SUB32rr]], [[COPY]], implicit-def $eflags
+ ; CHECK-NEXT: $edi = COPY [[AND32rr]]
%0(s32) = COPY $edi
%1(s32) = G_CONSTANT i32 0
%2(s32) = G_SUB %1, %0
liveins: $edi
; CHECK-LABEL: name: test_blsi32rr_nomatch
- ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi
- ; CHECK: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags
- ; CHECK: [[SUB32ri:%[0-9]+]]:gr32 = SUB32ri8 [[MOV32r0_]], 0, implicit-def $eflags
- ; CHECK: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[SUB32ri]], [[COPY]], implicit-def $eflags
- ; CHECK: $edi = COPY [[AND32rr]]
+ ; CHECK: liveins: $edi
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi
+ ; CHECK-NEXT: [[MOV32r0_:%[0-9]+]]:gr32 = MOV32r0 implicit-def $eflags
+ ; CHECK-NEXT: [[SUB32ri8_:%[0-9]+]]:gr32 = SUB32ri8 [[MOV32r0_]], 0, implicit-def $eflags
+ ; CHECK-NEXT: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[SUB32ri8_]], [[COPY]], implicit-def $eflags
+ ; CHECK-NEXT: $edi = COPY [[AND32rr]]
%0(s32) = COPY $edi
%1(s32) = G_CONSTANT i32 0
%2(s32) = G_SUB %1, %1
liveins: $edi
; CHECK-LABEL: name: test_blsr32rr
- ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi
- ; CHECK: [[BLSR32rr:%[0-9]+]]:gr32 = BLSR32rr [[COPY]], implicit-def $eflags
- ; CHECK: $edi = COPY [[BLSR32rr]]
+ ; CHECK: liveins: $edi
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi
+ ; CHECK-NEXT: [[DEC32r:%[0-9]+]]:gr32 = DEC32r [[COPY]], implicit-def $eflags
+ ; CHECK-NEXT: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[DEC32r]], [[COPY]], implicit-def $eflags
+ ; CHECK-NEXT: $edi = COPY [[AND32rr]]
%0(s32) = COPY $edi
%1(s32) = G_CONSTANT i32 -1
%2(s32) = G_ADD %0, %1
liveins: $edi
; CHECK-LABEL: name: test_blsr32rr_nomatch
- ; CHECK: [[COPY:%[0-9]+]]:gr32 = COPY $edi
- ; CHECK: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri -1
- ; CHECK: [[DEC32r:%[0-9]+]]:gr32 = DEC32r [[MOV32ri]], implicit-def $eflags
- ; CHECK: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[DEC32r]], [[COPY]], implicit-def $eflags
- ; CHECK: $edi = COPY [[AND32rr]]
+ ; CHECK: liveins: $edi
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edi
+ ; CHECK-NEXT: [[MOV32ri:%[0-9]+]]:gr32 = MOV32ri -1
+ ; CHECK-NEXT: [[DEC32r:%[0-9]+]]:gr32 = DEC32r [[MOV32ri]], implicit-def $eflags
+ ; CHECK-NEXT: [[AND32rr:%[0-9]+]]:gr32 = AND32rr [[DEC32r]], [[COPY]], implicit-def $eflags
+ ; CHECK-NEXT: $edi = COPY [[AND32rr]]
%0(s32) = COPY $edi
%1(s32) = G_CONSTANT i32 -1
%2(s32) = G_ADD %1, %1
; X86-LABEL: blsmsk_used2:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: blsmskl %eax, %ecx
-; X86-NEXT: decl %eax
+; X86-NEXT: leal -1(%eax), %ecx
+; X86-NEXT: xorl %ecx, %eax
; X86-NEXT: imull %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: blsmsk_used2:
; X64: # %bb.0: # %entry
; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: blsmskl %edi, %ecx
; X64-NEXT: leal -1(%rdi), %eax
-; X64-NEXT: imull %ecx, %eax
+; X64-NEXT: xorl %eax, %edi
+; X64-NEXT: imull %edi, %eax
; X64-NEXT: retq
entry:
%sub = add i32 %a, -1
; X86-LABEL: blsi_used2:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: blsil %eax, %ecx
-; X86-NEXT: negl %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: negl %ecx
+; X86-NEXT: andl %ecx, %eax
; X86-NEXT: imull %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: blsi_used2:
; X64: # %bb.0: # %entry
-; X64-NEXT: blsil %edi, %eax
-; X64-NEXT: negl %edi
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: andl %eax, %edi
; X64-NEXT: imull %edi, %eax
; X64-NEXT: retq
entry:
; X86-LABEL: blsr_used2:
; X86: # %bb.0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: blsrl %eax, %ecx
-; X86-NEXT: decl %eax
+; X86-NEXT: leal -1(%eax), %ecx
+; X86-NEXT: andl %ecx, %eax
; X86-NEXT: imull %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: blsr_used2:
; X64: # %bb.0: # %entry
; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: blsrl %edi, %ecx
; X64-NEXT: leal -1(%rdi), %eax
-; X64-NEXT: imull %ecx, %eax
+; X64-NEXT: andl %eax, %edi
+; X64-NEXT: imull %edi, %eax
; X64-NEXT: retq
entry:
%sub = add i32 %a, -1