From a923c2ee95a4b3b6d43a850789ba56c6aa249b3c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 6 Aug 2017 16:27:07 +0000 Subject: [PATCH] [x86] use more shift or LEA for select-of-constants We can convert any select-of-constants to math ops: http://rise4fun.com/Alive/d7d For this patch, I'm enhancing an existing x86 transform that uses fake multiplies (they always become shl/lea) to avoid cmov or branching. The current code misses cases where we have a negative constant and a positive constant, so this is just trying to plug that hole. The DAGCombiner diff prevents us from hitting a terrible inefficiency: we can start with a select in IR, create a select DAG node, convert it into a sext, convert it back into a select, and then lower it to sext machine code. Some notes about the test diffs: 1. 2010-08-04-MaskedSignedCompare.ll - We were creating control flow that didn't exist in the IR. 2. memcmp.ll - Choose -1 or 1 is the case that got me looking at this again. I think we could avoid the push/pop in some cases if we used 'movzbl %al' instead of an xor on a different reg? That's a post-DAG problem though. 3. mul-constant-result.ll - The trade-off between sbb+not vs. setne+neg could be addressed if that's a regression, but I think those would always be nearly equivalent. 4. pr22338.ll and sext-i1.ll - These tests have undef operands, so I don't think we actually care about these diffs. 5. sbb.ll - This shows a win for what I think is a common case: choose -1 or 0. 6. select.ll - There's another borderline case here: cmp+sbb+or vs. test+set+lea? Also, sbb+not vs. setae+neg shows up again. 7. select_const.ll - These are motivating cases for the enhancement; replace cmov with cheaper ops. Assembly differences between movzbl and xor to avoid a partial reg stall are caused later by the X86 Fixup SetCC pass. Differential Revision: https://reviews.llvm.org/D35340 llvm-svn: 310208 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 95 +++++++--------------- .../CodeGen/X86/2010-08-04-MaskedSignedCompare.ll | 18 ++-- llvm/test/CodeGen/X86/memcmp-optsize.ll | 48 +++++------ llvm/test/CodeGen/X86/memcmp.ll | 42 +++++----- llvm/test/CodeGen/X86/merge-consecutive-stores.ll | 8 +- llvm/test/CodeGen/X86/mul-constant-result.ll | 30 ++++--- llvm/test/CodeGen/X86/pr22338.ll | 20 ++--- llvm/test/CodeGen/X86/sbb.ll | 6 +- llvm/test/CodeGen/X86/select.ll | 83 ++++++++----------- llvm/test/CodeGen/X86/select_const.ll | 61 ++++++-------- llvm/test/CodeGen/X86/sext-i1.ll | 16 ++-- 12 files changed, 178 insertions(+), 251 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 937522e..353c86f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7394,7 +7394,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) return SCC; - if (!VT.isVector()) { + if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath()) { EVT SetCCVT = getSetCCResultType(N00VT); // Don't do this transform for i1 because there's a select transform // that would reverse it. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e1dd6d0..e60fcf5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30060,78 +30060,45 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) { return SDValue(); // Don't do this for crazy integer types. - if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) + EVT VT = N->getValueType(0); + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); - // If this is efficiently invertible, canonicalize the LHSC/RHSC values - // so that TrueC (the true value) is larger than FalseC. - bool NeedsCondInvert = false; - if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) && - // Efficiently invertible. - (Cond.getOpcode() == ISD::SETCC || // setcc -> invertible. - (Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible. - isa(Cond.getOperand(1))))) { - NeedsCondInvert = true; - std::swap(TrueC, FalseC); - } - - // Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0. - if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) { - if (NeedsCondInvert) // Invert the condition if needed. - Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, - DAG.getConstant(1, DL, Cond.getValueType())); - - // Zero extend the condition if needed. - Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond); - - unsigned ShAmt = TrueC->getAPIntValue().logBase2(); - return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond, - DAG.getConstant(ShAmt, DL, MVT::i8)); - } + // We're going to use the condition bit in math or logic ops. We could allow + // this with a wider condition value (post-legalization it becomes an i8), + // but if nothing is creating selects that late, it doesn't matter. + if (Cond.getValueType() != MVT::i1) + return SDValue(); - // Optimize cases that will turn into an LEA instruction. This requires - // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9). - if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { - uint64_t Diff = TrueC->getZExtValue() - FalseC->getZExtValue(); - if (N->getValueType(0) == MVT::i32) - Diff = (unsigned)Diff; + // A power-of-2 multiply is just a shift. LEA also cheaply handles multiply by + // 3, 5, or 9 with i32/i64, so those get transformed too. + // TODO: For constants that do not differ by power-of-2 or small multiplier, + // convert to 'and' + 'add'. + APInt AbsDiff = (TrueC->getAPIntValue() - FalseC->getAPIntValue()).abs(); + if (AbsDiff.isPowerOf2() || + ((VT == MVT::i32 || VT == MVT::i64) && + (AbsDiff == 3 || AbsDiff == 5 || AbsDiff == 9))) { - bool IsFastMultiplier = false; - if (Diff < 10) { - switch ((unsigned char)Diff) { - default: - break; - case 1: // result = add base, cond - case 2: // result = lea base( , cond*2) - case 3: // result = lea base(cond, cond*2) - case 4: // result = lea base( , cond*4) - case 5: // result = lea base(cond, cond*4) - case 8: // result = lea base( , cond*8) - case 9: // result = lea base(cond, cond*8) - IsFastMultiplier = true; - break; - } + // We need a positive multiplier constant for shift/LEA codegen. The 'not' + // of the condition can usually be folded into a compare predicate, but even + // without that, the sequence should be cheaper than a CMOV alternative. + if (TrueC->getAPIntValue().slt(FalseC->getAPIntValue())) { + Cond = DAG.getNOT(DL, Cond, MVT::i1); + std::swap(TrueC, FalseC); } - if (IsFastMultiplier) { - APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue(); - if (NeedsCondInvert) // Invert the condition if needed. - Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, - DAG.getConstant(1, DL, Cond.getValueType())); + // select Cond, TC, FC --> (zext(Cond) * (TC - FC)) + FC + SDValue R = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); - // Zero extend the condition if needed. - Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond); - // Scale the condition by the difference. - if (Diff != 1) - Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond, - DAG.getConstant(Diff, DL, Cond.getValueType())); + // Multiply condition by the difference if non-one. + if (!AbsDiff.isOneValue()) + R = DAG.getNode(ISD::MUL, DL, VT, R, DAG.getConstant(AbsDiff, DL, VT)); - // Add the base if non-zero. - if (FalseC->getAPIntValue() != 0) - Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, - SDValue(FalseC, 0)); - return Cond; - } + // Add the base if non-zero. + if (!FalseC->isNullValue()) + R = DAG.getNode(ISD::ADD, DL, VT, R, SDValue(FalseC, 0)); + + return R; } return SDValue(); diff --git a/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll b/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll index 66d3f31..cffefc2 100644 --- a/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll +++ b/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll @@ -9,21 +9,19 @@ define i32 @main() nounwind { ; CHECK-LABEL: main: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: cmpq $0, {{.*}}(%rip) -; CHECK-NEXT: movb $-106, %al -; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: # BB#1: # %entry ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: cmpq {{.*}}(%rip), %rax +; CHECK-NEXT: sbbl %eax, %eax +; CHECK-NEXT: andl $150, %eax ; CHECK-NEXT: testb %al, %al -; CHECK-NEXT: jle .LBB0_3 -; CHECK-NEXT: # BB#4: # %if.then +; CHECK-NEXT: jle .LBB0_1 +; CHECK-NEXT: # BB#2: # %if.then ; CHECK-NEXT: movl $1, {{.*}}(%rip) ; CHECK-NEXT: movl $1, %esi -; CHECK-NEXT: jmp .LBB0_5 -; CHECK-NEXT: .LBB0_3: # %entry.if.end_crit_edge +; CHECK-NEXT: jmp .LBB0_3 +; CHECK-NEXT: .LBB0_1: # %entry.if.end_crit_edge ; CHECK-NEXT: movl {{.*}}(%rip), %esi -; CHECK-NEXT: .LBB0_5: # %if.end +; CHECK-NEXT: .LBB0_3: # %if.end ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movl $.L.str, %edi ; CHECK-NEXT: xorl %eax, %eax diff --git a/llvm/test/CodeGen/X86/memcmp-optsize.ll b/llvm/test/CodeGen/X86/memcmp-optsize.ll index 80d5149..4489aaf 100644 --- a/llvm/test/CodeGen/X86/memcmp-optsize.ll +++ b/llvm/test/CodeGen/X86/memcmp-optsize.ll @@ -125,12 +125,9 @@ define i32 @length3(i8* %X, i8* %Y) nounwind optsize { ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: jmp .LBB4_3 ; X86-NEXT: .LBB4_1: # %res_block -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: incl %ecx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: decl %eax -; X86-NEXT: cmpw %si, %dx -; X86-NEXT: cmovael %ecx, %eax +; X86-NEXT: setae %al +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: .LBB4_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -149,9 +146,9 @@ define i32 @length3(i8* %X, i8* %Y) nounwind optsize { ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB4_1: # %res_block -; X64-NEXT: movl $-1, %ecx -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbl %ecx, %eax +; X64-NEXT: setae %al +; X64-NEXT: movzbl %al, %eax +; X64-NEXT: leal -1(%rax,%rax), %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind ret i32 %m @@ -286,12 +283,9 @@ define i32 @length5(i8* %X, i8* %Y) nounwind optsize { ; X86-NEXT: subl %ecx, %eax ; X86-NEXT: jmp .LBB9_3 ; X86-NEXT: .LBB9_1: # %res_block -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: incl %ecx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: decl %eax -; X86-NEXT: cmpl %esi, %edx -; X86-NEXT: cmovael %ecx, %eax +; X86-NEXT: setae %al +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: .LBB9_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -310,9 +304,9 @@ define i32 @length5(i8* %X, i8* %Y) nounwind optsize { ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB9_1: # %res_block -; X64-NEXT: movl $-1, %ecx -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbl %ecx, %eax +; X64-NEXT: setae %al +; X64-NEXT: movzbl %al, %eax +; X64-NEXT: leal -1(%rax,%rax), %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind ret i32 %m @@ -381,12 +375,10 @@ define i32 @length8(i8* %X, i8* %Y) nounwind optsize { ; X86-NEXT: cmpl %edx, %ecx ; X86-NEXT: je .LBB11_3 ; X86-NEXT: .LBB11_1: # %res_block -; X86-NEXT: xorl %esi, %esi -; X86-NEXT: incl %esi ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: decl %eax ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: cmovael %esi, %eax +; X86-NEXT: setae %al +; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: .LBB11_3: # %endblock ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -531,10 +523,10 @@ define i32 @length12(i8* %X, i8* %Y) nounwind optsize { ; X64-NEXT: # BB#3: # %endblock ; X64-NEXT: retq ; X64-NEXT: .LBB15_1: # %res_block +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: movl $-1, %ecx -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbl %ecx, %eax +; X64-NEXT: setae %al +; X64-NEXT: leal -1(%rax,%rax), %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind ret i32 %m @@ -572,10 +564,10 @@ define i32 @length16(i8* %X, i8* %Y) nounwind optsize { ; X64-NEXT: # BB#3: # %endblock ; X64-NEXT: retq ; X64-NEXT: .LBB16_1: # %res_block +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: movl $-1, %ecx -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbl %ecx, %eax +; X64-NEXT: setae %al +; X64-NEXT: leal -1(%rax,%rax), %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind ret i32 %m diff --git a/llvm/test/CodeGen/X86/memcmp.ll b/llvm/test/CodeGen/X86/memcmp.ll index 020a9c0..a269529 100644 --- a/llvm/test/CodeGen/X86/memcmp.ll +++ b/llvm/test/CodeGen/X86/memcmp.ll @@ -126,9 +126,9 @@ define i32 @length3(i8* %X, i8* %Y) nounwind { ; X86-NEXT: popl %esi ; X86-NEXT: retl ; X86-NEXT: .LBB4_1: # %res_block -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: movl $1, %eax -; X86-NEXT: cmovbl %ecx, %eax +; X86-NEXT: setae %al +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -146,9 +146,9 @@ define i32 @length3(i8* %X, i8* %Y) nounwind { ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB4_1: # %res_block -; X64-NEXT: movl $-1, %ecx -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbl %ecx, %eax +; X64-NEXT: setae %al +; X64-NEXT: movzbl %al, %eax +; X64-NEXT: leal -1(%rax,%rax), %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 3) nounwind ret i32 %m @@ -283,9 +283,9 @@ define i32 @length5(i8* %X, i8* %Y) nounwind { ; X86-NEXT: popl %esi ; X86-NEXT: retl ; X86-NEXT: .LBB9_1: # %res_block -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: movl $1, %eax -; X86-NEXT: cmovbl %ecx, %eax +; X86-NEXT: setae %al +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -303,9 +303,9 @@ define i32 @length5(i8* %X, i8* %Y) nounwind { ; X64-NEXT: subl %ecx, %eax ; X64-NEXT: retq ; X64-NEXT: .LBB9_1: # %res_block -; X64-NEXT: movl $-1, %ecx -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbl %ecx, %eax +; X64-NEXT: setae %al +; X64-NEXT: movzbl %al, %eax +; X64-NEXT: leal -1(%rax,%rax), %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 5) nounwind ret i32 %m @@ -376,10 +376,10 @@ define i32 @length8(i8* %X, i8* %Y) nounwind { ; X86-NEXT: popl %esi ; X86-NEXT: retl ; X86-NEXT: .LBB11_1: # %res_block +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl %edx, %ecx -; X86-NEXT: movl $-1, %ecx -; X86-NEXT: movl $1, %eax -; X86-NEXT: cmovbl %ecx, %eax +; X86-NEXT: setae %al +; X86-NEXT: leal -1(%eax,%eax), %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -521,10 +521,10 @@ define i32 @length12(i8* %X, i8* %Y) nounwind { ; X64-NEXT: # BB#3: # %endblock ; X64-NEXT: retq ; X64-NEXT: .LBB15_1: # %res_block +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: movl $-1, %ecx -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbl %ecx, %eax +; X64-NEXT: setae %al +; X64-NEXT: leal -1(%rax,%rax), %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 12) nounwind ret i32 %m @@ -562,10 +562,10 @@ define i32 @length16(i8* %X, i8* %Y) nounwind { ; X64-NEXT: # BB#3: # %endblock ; X64-NEXT: retq ; X64-NEXT: .LBB16_1: # %res_block +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpq %rdx, %rcx -; X64-NEXT: movl $-1, %ecx -; X64-NEXT: movl $1, %eax -; X64-NEXT: cmovbl %ecx, %eax +; X64-NEXT: setae %al +; X64-NEXT: leal -1(%rax,%rax), %eax ; X64-NEXT: retq %m = tail call i32 @memcmp(i8* %X, i8* %Y, i64 16) nounwind ret i32 %m diff --git a/llvm/test/CodeGen/X86/merge-consecutive-stores.ll b/llvm/test/CodeGen/X86/merge-consecutive-stores.ll index 4265295..8cb6f3a 100644 --- a/llvm/test/CodeGen/X86/merge-consecutive-stores.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-stores.ll @@ -16,11 +16,9 @@ define i32 @foo (i64* %so) nounwind uwtable ssp { ; CHECK-NEXT: cmpl 16(%eax), %edx ; CHECK-NEXT: movl $0, 16(%eax) ; CHECK-NEXT: sbbl %ecx, %edx -; CHECK-NEXT: movl $-1, %eax -; CHECK-NEXT: jl .LBB0_2 -; CHECK-NEXT: # BB#1: -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: setl %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: negl %eax ; CHECK-NEXT: retl %used = getelementptr inbounds i64, i64* %so, i32 3 store i64 0, i64* %used, align 8 diff --git a/llvm/test/CodeGen/X86/mul-constant-result.ll b/llvm/test/CodeGen/X86/mul-constant-result.ll index 65d80a6..83a9dbe 100644 --- a/llvm/test/CodeGen/X86/mul-constant-result.ll +++ b/llvm/test/CodeGen/X86/mul-constant-result.ll @@ -952,12 +952,11 @@ define i32 @foo() local_unnamed_addr #0 { ; X86-NEXT: .Lcfi103: ; X86-NEXT: .cfi_adjust_cfa_offset -8 ; X86-NEXT: xorl $32, %eax +; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: orl %ebx, %eax -; X86-NEXT: movl $-1, %eax -; X86-NEXT: jne .LBB1_2 -; X86-NEXT: # BB#1: -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: .LBB1_2: +; X86-NEXT: setne %cl +; X86-NEXT: negl %ecx +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -974,23 +973,20 @@ define i32 @foo() local_unnamed_addr #0 { ; X64-HSW-NEXT: pushq %r14 ; X64-HSW-NEXT: .Lcfi2: ; X64-HSW-NEXT: .cfi_def_cfa_offset 32 -; X64-HSW-NEXT: pushq %r12 +; X64-HSW-NEXT: pushq %rbx ; X64-HSW-NEXT: .Lcfi3: ; X64-HSW-NEXT: .cfi_def_cfa_offset 40 -; X64-HSW-NEXT: pushq %rbx +; X64-HSW-NEXT: pushq %rax ; X64-HSW-NEXT: .Lcfi4: ; X64-HSW-NEXT: .cfi_def_cfa_offset 48 ; X64-HSW-NEXT: .Lcfi5: -; X64-HSW-NEXT: .cfi_offset %rbx, -48 +; X64-HSW-NEXT: .cfi_offset %rbx, -40 ; X64-HSW-NEXT: .Lcfi6: -; X64-HSW-NEXT: .cfi_offset %r12, -40 -; X64-HSW-NEXT: .Lcfi7: ; X64-HSW-NEXT: .cfi_offset %r14, -32 -; X64-HSW-NEXT: .Lcfi8: +; X64-HSW-NEXT: .Lcfi7: ; X64-HSW-NEXT: .cfi_offset %r15, -24 -; X64-HSW-NEXT: .Lcfi9: +; X64-HSW-NEXT: .Lcfi8: ; X64-HSW-NEXT: .cfi_offset %rbp, -16 -; X64-HSW-NEXT: xorl %r12d, %r12d ; X64-HSW-NEXT: movl $1, %edi ; X64-HSW-NEXT: xorl %esi, %esi ; X64-HSW-NEXT: callq mult @@ -1180,11 +1176,13 @@ define i32 @foo() local_unnamed_addr #0 { ; X64-HSW-NEXT: movl $16, %esi ; X64-HSW-NEXT: callq mult ; X64-HSW-NEXT: xorl $32, %eax +; X64-HSW-NEXT: xorl %ecx, %ecx ; X64-HSW-NEXT: orl %ebx, %eax -; X64-HSW-NEXT: movl $-1, %eax -; X64-HSW-NEXT: cmovel %r12d, %eax +; X64-HSW-NEXT: setne %cl +; X64-HSW-NEXT: negl %ecx +; X64-HSW-NEXT: movl %ecx, %eax +; X64-HSW-NEXT: addq $8, %rsp ; X64-HSW-NEXT: popq %rbx -; X64-HSW-NEXT: popq %r12 ; X64-HSW-NEXT: popq %r14 ; X64-HSW-NEXT: popq %r15 ; X64-HSW-NEXT: popq %rbp diff --git a/llvm/test/CodeGen/X86/pr22338.ll b/llvm/test/CodeGen/X86/pr22338.ll index e0645d1..41430f5 100644 --- a/llvm/test/CodeGen/X86/pr22338.ll +++ b/llvm/test/CodeGen/X86/pr22338.ll @@ -5,30 +5,28 @@ define i32 @fn() { ; X86-LABEL: fn: ; X86: # BB#0: # %entry +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: cmpl $1, %eax +; X86-NEXT: setne %al ; X86-NEXT: sete %cl -; X86-NEXT: movl $-1, %eax -; X86-NEXT: jne .LBB0_2 -; X86-NEXT: # BB#1: # %entry -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: .LBB0_2: # %entry +; X86-NEXT: negl %eax ; X86-NEXT: addb %cl, %cl ; X86-NEXT: shll %cl, %eax ; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB0_3: # %bb1 +; X86-NEXT: .LBB0_1: # %bb1 ; X86-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-NEXT: testl %eax, %eax -; X86-NEXT: je .LBB0_3 -; X86-NEXT: # BB#4: # %bb2 +; X86-NEXT: je .LBB0_1 +; X86-NEXT: # BB#2: # %bb2 ; X86-NEXT: retl ; ; X64-LABEL: fn: ; X64: # BB#0: # %entry -; X64-NEXT: xorl %edx, %edx +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpl $1, %eax +; X64-NEXT: setne %al ; X64-NEXT: sete %cl -; X64-NEXT: movl $-1, %eax -; X64-NEXT: cmovel %edx, %eax +; X64-NEXT: negl %eax ; X64-NEXT: addb %cl, %cl ; X64-NEXT: shll %cl, %eax ; X64-NEXT: .p2align 4, 0x90 diff --git a/llvm/test/CodeGen/X86/sbb.ll b/llvm/test/CodeGen/X86/sbb.ll index b6e8ebf..7429c07 100644 --- a/llvm/test/CodeGen/X86/sbb.ll +++ b/llvm/test/CodeGen/X86/sbb.ll @@ -130,10 +130,8 @@ define i32 @ult_select_neg1_or_0(i32 %x, i32 %y) nounwind { define i32 @ugt_select_neg1_or_0(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: ugt_select_neg1_or_0: ; CHECK: # BB#0: -; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: cmpl %edi, %esi -; CHECK-NEXT: movl $-1, %eax -; CHECK-NEXT: cmovbel %ecx, %eax +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: sbbl %eax, %eax ; CHECK-NEXT: retq %cmp = icmp ugt i32 %y, %x %ext = sext i1 %cmp to i32 diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll index ec15d1a..b37644e 100644 --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -545,12 +545,11 @@ define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone { ; ; MCU-LABEL: test9b: ; MCU: # BB#0: -; MCU-NEXT: orl %edx, %eax -; MCU-NEXT: movl $-1, %edx -; MCU-NEXT: je .LBB10_2 -; MCU-NEXT: # BB#1: +; MCU-NEXT: movl %edx, %ecx ; MCU-NEXT: xorl %edx, %edx -; MCU-NEXT: .LBB10_2: +; MCU-NEXT: orl %ecx, %eax +; MCU-NEXT: sete %dl +; MCU-NEXT: negl %edx ; MCU-NEXT: movl {{[0-9]+}}(%esp), %eax ; MCU-NEXT: orl %edx, %eax ; MCU-NEXT: orl {{[0-9]+}}(%esp), %edx @@ -563,23 +562,14 @@ define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone { ;; Select between -1 and 1. define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone { -; GENERIC-LABEL: test10: -; GENERIC: ## BB#0: -; GENERIC-NEXT: cmpq $1, %rdi -; GENERIC-NEXT: sbbq %rax, %rax -; GENERIC-NEXT: orq $1, %rax -; GENERIC-NEXT: retq -; GENERIC-NEXT: ## -- End function -; -; ATOM-LABEL: test10: -; ATOM: ## BB#0: -; ATOM-NEXT: cmpq $1, %rdi -; ATOM-NEXT: sbbq %rax, %rax -; ATOM-NEXT: orq $1, %rax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; ATOM-NEXT: ## -- End function +; CHECK-LABEL: test10: +; CHECK: ## BB#0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: setne %al +; CHECK-NEXT: leaq -1(%rax,%rax), %rax +; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test10: ; MCU: # BB#0: @@ -747,29 +737,22 @@ define i32 @test13(i32 %a, i32 %b) nounwind { } define i32 @test14(i32 %a, i32 %b) nounwind { -; GENERIC-LABEL: test14: -; GENERIC: ## BB#0: -; GENERIC-NEXT: cmpl %esi, %edi -; GENERIC-NEXT: sbbl %eax, %eax -; GENERIC-NEXT: notl %eax -; GENERIC-NEXT: retq -; GENERIC-NEXT: ## -- End function -; -; ATOM-LABEL: test14: -; ATOM: ## BB#0: -; ATOM-NEXT: cmpl %esi, %edi -; ATOM-NEXT: sbbl %eax, %eax -; ATOM-NEXT: notl %eax -; ATOM-NEXT: nop -; ATOM-NEXT: nop -; ATOM-NEXT: retq -; ATOM-NEXT: ## -- End function +; CHECK-LABEL: test14: +; CHECK: ## BB#0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: setae %al +; CHECK-NEXT: negl %eax +; CHECK-NEXT: retq +; CHECK-NEXT: ## -- End function ; ; MCU-LABEL: test14: ; MCU: # BB#0: +; MCU-NEXT: xorl %ecx, %ecx ; MCU-NEXT: cmpl %edx, %eax -; MCU-NEXT: sbbl %eax, %eax -; MCU-NEXT: notl %eax +; MCU-NEXT: setae %cl +; MCU-NEXT: negl %ecx +; MCU-NEXT: movl %ecx, %eax ; MCU-NEXT: retl %c = icmp uge i32 %a, %b %d = sext i1 %c to i32 @@ -826,12 +809,11 @@ define i64 @test16(i64 %x) nounwind uwtable readnone ssp { ; ; MCU-LABEL: test16: ; MCU: # BB#0: # %entry -; MCU-NEXT: orl %edx, %eax -; MCU-NEXT: movl $-1, %eax -; MCU-NEXT: jne .LBB18_2 -; MCU-NEXT: # BB#1: # %entry +; MCU-NEXT: movl %eax, %ecx ; MCU-NEXT: xorl %eax, %eax -; MCU-NEXT: .LBB18_2: # %entry +; MCU-NEXT: orl %edx, %ecx +; MCU-NEXT: setne %al +; MCU-NEXT: negl %eax ; MCU-NEXT: movl %eax, %edx ; MCU-NEXT: retl entry: @@ -844,14 +826,16 @@ define i16 @test17(i16 %x) nounwind { ; GENERIC-LABEL: test17: ; GENERIC: ## BB#0: ## %entry ; GENERIC-NEXT: negw %di -; GENERIC-NEXT: sbbw %ax, %ax +; GENERIC-NEXT: sbbl %eax, %eax +; GENERIC-NEXT: ## kill: %AX %AX %EAX ; GENERIC-NEXT: retq ; GENERIC-NEXT: ## -- End function ; ; ATOM-LABEL: test17: ; ATOM: ## BB#0: ## %entry ; ATOM-NEXT: negw %di -; ATOM-NEXT: sbbw %ax, %ax +; ATOM-NEXT: sbbl %eax, %eax +; ATOM-NEXT: ## kill: %AX %AX %EAX ; ATOM-NEXT: nop ; ATOM-NEXT: nop ; ATOM-NEXT: nop @@ -862,7 +846,8 @@ define i16 @test17(i16 %x) nounwind { ; MCU-LABEL: test17: ; MCU: # BB#0: # %entry ; MCU-NEXT: negw %ax -; MCU-NEXT: sbbw %ax, %ax +; MCU-NEXT: sbbl %eax, %eax +; MCU-NEXT: # kill: %AX %AX %EAX ; MCU-NEXT: retl entry: %cmp = icmp ne i16 %x, 0 diff --git a/llvm/test/CodeGen/X86/select_const.ll b/llvm/test/CodeGen/X86/select_const.ll index 0eb9bf4..105abc1 100644 --- a/llvm/test/CodeGen/X86/select_const.ll +++ b/llvm/test/CodeGen/X86/select_const.ll @@ -211,10 +211,9 @@ define i32 @select_C_Cplus1_signext(i1 signext %cond) { define i32 @select_lea_2(i1 zeroext %cond) { ; CHECK-LABEL: select_lea_2: ; CHECK: # BB#0: -; CHECK-NEXT: testb %dil, %dil -; CHECK-NEXT: movl $-1, %ecx -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: xorb $1, %dil +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: leal -1(%rax,%rax), %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 -1, i32 1 ret i32 %sel @@ -223,10 +222,9 @@ define i32 @select_lea_2(i1 zeroext %cond) { define i64 @select_lea_3(i1 zeroext %cond) { ; CHECK-LABEL: select_lea_3: ; CHECK: # BB#0: -; CHECK-NEXT: testb %dil, %dil -; CHECK-NEXT: movl $1, %ecx -; CHECK-NEXT: movq $-2, %rax -; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: xorb $1, %dil +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: leaq -2(%rax,%rax,2), %rax ; CHECK-NEXT: retq %sel = select i1 %cond, i64 -2, i64 1 ret i64 %sel @@ -235,10 +233,9 @@ define i64 @select_lea_3(i1 zeroext %cond) { define i32 @select_lea_5(i1 zeroext %cond) { ; CHECK-LABEL: select_lea_5: ; CHECK: # BB#0: -; CHECK-NEXT: testb %dil, %dil -; CHECK-NEXT: movl $-2, %ecx -; CHECK-NEXT: movl $3, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: xorb $1, %dil +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: leal -2(%rax,%rax,4), %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 -2, i32 3 ret i32 %sel @@ -247,10 +244,9 @@ define i32 @select_lea_5(i1 zeroext %cond) { define i64 @select_lea_9(i1 zeroext %cond) { ; CHECK-LABEL: select_lea_9: ; CHECK: # BB#0: -; CHECK-NEXT: testb %dil, %dil -; CHECK-NEXT: movl $2, %ecx -; CHECK-NEXT: movq $-7, %rax -; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: xorb $1, %dil +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: leaq -7(%rax,%rax,8), %rax ; CHECK-NEXT: retq %sel = select i1 %cond, i64 -7, i64 2 ret i64 %sel @@ -263,12 +259,9 @@ define i64 @select_lea_9(i1 zeroext %cond) { define i8 @select_pow2_diff(i1 zeroext %cond) { ; CHECK-LABEL: select_pow2_diff: ; CHECK: # BB#0: -; CHECK-NEXT: testb %dil, %dil -; CHECK-NEXT: movb $19, %al -; CHECK-NEXT: jne .LBB22_2 -; CHECK-NEXT: # BB#1: -; CHECK-NEXT: movb $3, %al -; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: shlb $4, %dil +; CHECK-NEXT: orb $3, %dil +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i8 19, i8 3 ret i8 %sel @@ -277,10 +270,11 @@ define i8 @select_pow2_diff(i1 zeroext %cond) { define i16 @select_pow2_diff_invert(i1 zeroext %cond) { ; CHECK-LABEL: select_pow2_diff_invert: ; CHECK: # BB#0: -; CHECK-NEXT: testb %dil, %dil -; CHECK-NEXT: movw $7, %cx -; CHECK-NEXT: movw $71, %ax -; CHECK-NEXT: cmovnew %cx, %ax +; CHECK-NEXT: xorb $1, %dil +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: shll $6, %eax +; CHECK-NEXT: orl $7, %eax +; CHECK-NEXT: # kill: %AX %AX %EAX ; CHECK-NEXT: retq %sel = select i1 %cond, i16 7, i16 71 ret i16 %sel @@ -289,10 +283,9 @@ define i16 @select_pow2_diff_invert(i1 zeroext %cond) { define i32 @select_pow2_diff_neg(i1 zeroext %cond) { ; CHECK-LABEL: select_pow2_diff_neg: ; CHECK: # BB#0: -; CHECK-NEXT: testb %dil, %dil -; CHECK-NEXT: movl $-9, %ecx -; CHECK-NEXT: movl $-25, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: shlb $4, %dil +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: orl $-25, %eax ; CHECK-NEXT: retq %sel = select i1 %cond, i32 -9, i32 -25 ret i32 %sel @@ -301,10 +294,10 @@ define i32 @select_pow2_diff_neg(i1 zeroext %cond) { define i64 @select_pow2_diff_neg_invert(i1 zeroext %cond) { ; CHECK-LABEL: select_pow2_diff_neg_invert: ; CHECK: # BB#0: -; CHECK-NEXT: testb %dil, %dil -; CHECK-NEXT: movl $29, %ecx -; CHECK-NEXT: movq $-99, %rax -; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: xorb $1, %dil +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: shlq $7, %rax +; CHECK-NEXT: addq $-99, %rax ; CHECK-NEXT: retq %sel = select i1 %cond, i64 -99, i64 29 ret i64 %sel diff --git a/llvm/test/CodeGen/X86/sext-i1.ll b/llvm/test/CodeGen/X86/sext-i1.ll index 8c92434db..d159fe1 100644 --- a/llvm/test/CodeGen/X86/sext-i1.ll +++ b/llvm/test/CodeGen/X86/sext-i1.ll @@ -51,8 +51,10 @@ define i32 @t3() nounwind readonly { ; ; X64-LABEL: t3: ; X64: # BB#0: # %entry -; X64-NEXT: cmpl $1, %eax -; X64-NEXT: sbbq %rax, %rax +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testl %eax, %eax +; X64-NEXT: sete %al +; X64-NEXT: negq %rax ; X64-NEXT: cmpq %rax, %rax ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: retq @@ -75,13 +77,11 @@ if.end: define i32 @t4(i64 %x) nounwind readnone ssp { ; X32-LABEL: t4: ; X32: # BB#0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: orl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl $-1, %eax -; X32-NEXT: je .LBB3_2 -; X32-NEXT: # BB#1: +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: xorl %eax, %eax -; X32-NEXT: .LBB3_2: +; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: sete %al +; X32-NEXT: negl %eax ; X32-NEXT: retl ; ; X64-LABEL: t4: -- 2.7.4