/// Check whether the definition can be converted
/// to remove a comparison against zero.
-inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag) {
+inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
+ bool &ClearsOverflowFlag) {
NoSignFlag = false;
+ ClearsOverflowFlag = false;
switch (MI.getOpcode()) {
default: return false;
case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1:
case X86::ANDN32rr: case X86::ANDN32rm:
case X86::ANDN64rr: case X86::ANDN64rm:
- case X86::BLSI32rr: case X86::BLSI32rm:
- case X86::BLSI64rr: case X86::BLSI64rm:
- case X86::BLSMSK32rr:case X86::BLSMSK32rm:
- case X86::BLSMSK64rr:case X86::BLSMSK64rm:
- case X86::BLSR32rr: case X86::BLSR32rm:
- case X86::BLSR64rr: case X86::BLSR64rm:
case X86::BZHI32rr: case X86::BZHI32rm:
case X86::BZHI64rr: case X86::BZHI64rm:
case X86::LZCNT16rr: case X86::LZCNT16rm:
case X86::TZCNT16rr: case X86::TZCNT16rm:
case X86::TZCNT32rr: case X86::TZCNT32rm:
case X86::TZCNT64rr: case X86::TZCNT64rm:
+ return true;
+ case X86::BLSI32rr: case X86::BLSI32rm:
+ case X86::BLSI64rr: case X86::BLSI64rm:
+ case X86::BLSMSK32rr: case X86::BLSMSK32rm:
+ case X86::BLSMSK64rr: case X86::BLSMSK64rm:
+ case X86::BLSR32rr: case X86::BLSR32rm:
+ case X86::BLSR64rr: case X86::BLSR64rm:
case X86::BLCFILL32rr: case X86::BLCFILL32rm:
case X86::BLCFILL64rr: case X86::BLCFILL64rm:
case X86::BLCI32rr: case X86::BLCI32rm:
case X86::T1MSKC64rr: case X86::T1MSKC64rm:
case X86::TZMSK32rr: case X86::TZMSK32rm:
case X86::TZMSK64rr: case X86::TZMSK64rm:
+ // These instructions clear the overflow flag just like TEST.
+ // FIXME: These are not the only instructions in this switch that clear the
+ // overflow flag.
+ ClearsOverflowFlag = true;
return true;
case X86::BEXTR32rr: case X86::BEXTR64rr:
case X86::BEXTR32rm: case X86::BEXTR64rm:
case X86::BEXTRI32ri: case X86::BEXTRI32mi:
case X86::BEXTRI64ri: case X86::BEXTRI64mi:
- // BEXTR doesn't update the sign flag so we can't use it.
+ // BEXTR doesn't update the sign flag so we can't use it. It does clear
+ // the overflow flag, but that's not useful without the sign flag.
NoSignFlag = true;
return true;
}
// right way.
bool ShouldUpdateCC = false;
bool NoSignFlag = false;
+ bool ClearsOverflowFlag = false;
X86::CondCode NewCC = X86::COND_INVALID;
- if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag)) {
+ if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag, ClearsOverflowFlag)) {
// Scan forward from the use until we hit the use we're looking for or the
// compare instruction.
for (MachineBasicBlock::iterator J = MI;; ++J) {
default: break;
case X86::COND_A: case X86::COND_AE:
case X86::COND_B: case X86::COND_BE:
+ // CF is used, we can't perform this optimization.
+ return false;
case X86::COND_G: case X86::COND_GE:
case X86::COND_L: case X86::COND_LE:
case X86::COND_O: case X86::COND_NO:
- // CF and OF are used, we can't perform this optimization.
- return false;
+ // If OF is used, the instruction needs to clear it like CmpZero does.
+ if (!ClearsOverflowFlag)
+ return false;
+ break;
case X86::COND_S: case X86::COND_NS:
// If SF is used, but the instruction doesn't update the SF, then we
// can't do the optimization.
ret i32 %t3
}
+; Inspired by PR48768, but using cmovcc instead of setcc. There should be
+; no test instruction.
define i32 @blsi32_sle(i32 %a, i32 %b, i32 %c) nounwind {
; X86-LABEL: blsi32_sle:
; X86: # %bb.0:
; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testl %eax, %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmovlel %eax, %ecx
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: blsil %edi, %ecx
-; X64-NEXT: testl %ecx, %ecx
; X64-NEXT: cmovgl %edx, %eax
; X64-NEXT: retq
%t0 = sub i32 0, %a
; X64: # %bb.0:
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: blsiq %rdi, %rcx
-; X64-NEXT: testq %rcx, %rcx
; X64-NEXT: cmovgq %rdx, %rax
; X64-NEXT: retq
%t0 = sub i64 0, %a
; X86-LABEL: blsmsk32_sle:
; X86: # %bb.0:
; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testl %eax, %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmovlel %eax, %ecx
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: blsmskl %edi, %ecx
-; X64-NEXT: testl %ecx, %ecx
; X64-NEXT: cmovgl %edx, %eax
; X64-NEXT: retq
%t0 = sub i32 %a, 1
; X64: # %bb.0:
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: blsmskq %rdi, %rcx
-; X64-NEXT: testq %rcx, %rcx
; X64-NEXT: cmovgq %rdx, %rax
; X64-NEXT: retq
%t0 = sub i64 %a, 1
; X86-LABEL: blsr32_sle:
; X86: # %bb.0:
; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: testl %eax, %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %eax
; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx
; X86-NEXT: cmovlel %eax, %ecx
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: blsrl %edi, %ecx
-; X64-NEXT: testl %ecx, %ecx
; X64-NEXT: cmovgl %edx, %eax
; X64-NEXT: retq
%t0 = sub i32 %a, 1
; X64: # %bb.0:
; X64-NEXT: movq %rsi, %rax
; X64-NEXT: blsrq %rdi, %rcx
-; X64-NEXT: testq %rcx, %rcx
; X64-NEXT: cmovgq %rdx, %rax
; X64-NEXT: retq
%t0 = sub i64 %a, 1
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: blcfilll %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: cmovgl %edx, %eax
; CHECK-NEXT: retq
%t0 = add i32 %a, 1
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: blcfillq %rdi, %rcx
-; CHECK-NEXT: testq %rcx, %rcx
; CHECK-NEXT: cmovgq %rdx, %rax
; CHECK-NEXT: retq
%t0 = add i64 %a, 1
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: blcil %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: cmovgl %edx, %eax
; CHECK-NEXT: retq
%t0 = add i32 1, %a
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: blciq %rdi, %rcx
-; CHECK-NEXT: testq %rcx, %rcx
; CHECK-NEXT: cmovgq %rdx, %rax
; CHECK-NEXT: retq
%t0 = add i64 1, %a
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: blcicl %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: cmovgl %edx, %eax
; CHECK-NEXT: retq
%t0 = xor i32 %a, -1
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: blcicq %rdi, %rcx
-; CHECK-NEXT: testq %rcx, %rcx
; CHECK-NEXT: cmovgq %rdx, %rax
; CHECK-NEXT: retq
%t0 = xor i64 %a, -1
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: blcmskl %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: cmovgl %edx, %eax
; CHECK-NEXT: retq
%t0 = add i32 %a, 1
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: blcmskq %rdi, %rcx
-; CHECK-NEXT: testq %rcx, %rcx
; CHECK-NEXT: cmovgq %rdx, %rax
; CHECK-NEXT: retq
%t0 = add i64 %a, 1
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: blcsl %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: cmovgl %edx, %eax
; CHECK-NEXT: retq
%t0 = add i32 %a, 1
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: blcsq %rdi, %rcx
-; CHECK-NEXT: testq %rcx, %rcx
; CHECK-NEXT: cmovgq %rdx, %rax
; CHECK-NEXT: retq
%t0 = add i64 %a, 1
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: blsfilll %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: cmovgl %edx, %eax
; CHECK-NEXT: retq
%t0 = add i32 %a, -1
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: blsfillq %rdi, %rcx
-; CHECK-NEXT: testq %rcx, %rcx
; CHECK-NEXT: cmovgq %rdx, %rax
; CHECK-NEXT: retq
%t0 = add i64 %a, -1
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: blsicl %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: cmovgl %edx, %eax
; CHECK-NEXT: retq
%t0 = xor i32 %a, -1
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: blsicq %rdi, %rcx
-; CHECK-NEXT: testq %rcx, %rcx
; CHECK-NEXT: cmovgq %rdx, %rax
; CHECK-NEXT: retq
%t0 = xor i64 %a, -1
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: t1mskcl %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: cmovgl %edx, %eax
; CHECK-NEXT: retq
%t0 = xor i32 %a, -1
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: t1mskcq %rdi, %rcx
-; CHECK-NEXT: testq %rcx, %rcx
; CHECK-NEXT: cmovgq %rdx, %rax
; CHECK-NEXT: retq
%t0 = xor i64 %a, -1
; CHECK: # %bb.0:
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: tzmskl %edi, %ecx
-; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: cmovgl %edx, %eax
; CHECK-NEXT: retq
%t0 = xor i32 %a, -1
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: tzmskq %rdi, %rcx
-; CHECK-NEXT: testq %rcx, %rcx
; CHECK-NEXT: cmovgq %rdx, %rax
; CHECK-NEXT: retq
%t0 = xor i64 %a, -1