From 10f889283751e1e253da3829bb5c2b0cb6a50536 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 16 Dec 2018 18:35:55 +0000 Subject: [PATCH] [X86] Remove truncation handling from EmitTest. Replace it with a DAG combine. I'd like to try to move a lot of the flag matching out of EmitTest and push it to isel or isel preprocessing. This is a step towards that. The test-shrink-bug.ll changie is an improvement because we are no longer interfering with test shrink handling in isel. The pr34137.ll change is a regression, but the IR came from -O0 and was not reduced by InstCombine. So it contains a lot of redundancies like duplicate loads that made it combine poorly. llvm-svn: 349315 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 155 +++++++++++++++++++++---------- llvm/test/CodeGen/X86/pr34137.ll | 9 +- llvm/test/CodeGen/X86/test-shrink-bug.ll | 2 +- 3 files changed, 111 insertions(+), 55 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 704b62d..5e7431f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18581,27 +18581,7 @@ static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, unsigned Opcode = 0; unsigned NumOperands = 0; - // Truncate operations may prevent the merge of the SETCC instruction - // and the arithmetic instruction before it. Attempt to truncate the operands - // of the arithmetic instruction and use a reduced bit-width instruction. - bool NeedTruncation = false; SDValue ArithOp = Op; - if (Op->getOpcode() == ISD::TRUNCATE && Op->hasOneUse()) { - SDValue Arith = Op->getOperand(0); - // Both the trunc and the arithmetic op need to have one user each. - if (Arith->hasOneUse()) - switch (Arith.getOpcode()) { - default: break; - case ISD::ADD: - case ISD::SUB: - case ISD::AND: - case ISD::OR: - case ISD::XOR: { - NeedTruncation = true; - ArithOp = Arith; - } - } - } // Sometimes flags can be set either with an AND or with an SRL/SHL // instruction. SRL/SHL variant should be preferred for masks longer than this @@ -18769,36 +18749,6 @@ static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, break; } - // If we found that truncation is beneficial, perform the truncation and - // update 'Op'. - if (NeedTruncation) { - EVT VT = Op.getValueType(); - SDValue WideVal = Op->getOperand(0); - EVT WideVT = WideVal.getValueType(); - unsigned ConvertedOp = 0; - // Use a target machine opcode to prevent further DAGCombine - // optimizations that may separate the arithmetic operations - // from the setcc node. - switch (WideVal.getOpcode()) { - default: break; - case ISD::ADD: ConvertedOp = X86ISD::ADD; break; - case ISD::SUB: ConvertedOp = X86ISD::SUB; break; - case ISD::AND: ConvertedOp = X86ISD::AND; break; - case ISD::OR: ConvertedOp = X86ISD::OR; break; - case ISD::XOR: ConvertedOp = X86ISD::XOR; break; - } - - if (ConvertedOp) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLI.isOperationLegal(WideVal.getOpcode(), WideVT)) { - SDValue V0 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(0)); - SDValue V1 = DAG.getNode(ISD::TRUNCATE, dl, VT, WideVal.getOperand(1)); - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); - Op = DAG.getNode(ConvertedOp, dl, VTs, V0, V1); - } - } - } - if (Opcode == 0) { // Emit a CMP with 0, which is the TEST pattern. return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, @@ -39956,6 +39906,110 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static bool needCarryOrOverflowFlag(SDValue Flags) { + assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!"); + + for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + + X86::CondCode CC; + switch (User->getOpcode()) { + default: + // Be conservative. + return true; + case X86ISD::SETCC: + case X86ISD::SETCC_CARRY: + CC = (X86::CondCode)User->getConstantOperandVal(0); + break; + case X86ISD::BRCOND: + CC = (X86::CondCode)User->getConstantOperandVal(2); + break; + case X86ISD::CMOV: + CC = (X86::CondCode)User->getConstantOperandVal(3); + break; + } + + switch (CC) { + default: break; + case X86::COND_A: case X86::COND_AE: + case X86::COND_B: case X86::COND_BE: + case X86::COND_O: case X86::COND_NO: + case X86::COND_G: case X86::COND_GE: + case X86::COND_L: case X86::COND_LE: + return true; + } + } + + return false; +} + +static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) { + // Only handle test patterns. + if (!isNullConstant(N->getOperand(1))) + return SDValue(); + + // If we have a CMP of a truncated binop, see if we can make a smaller binop + // and use its flags directly. + // TODO: Maybe we should try promoting compares that only use the zero flag + // first if we can prove the upper bits with computeKnownBits? + SDValue Op = N->getOperand(0); + EVT VT = Op.getValueType(); + + // Look for a truncate with a single use. + if (Op.getOpcode() != ISD::TRUNCATE || !Op.hasOneUse()) + return SDValue(); + + Op = Op.getOperand(0); + + // Arithmetic op can only have one use. + if (!Op.hasOneUse()) + return SDValue(); + + unsigned NewOpc; + switch (Op.getOpcode()) { + default: return SDValue(); + case ISD::AND: + // Skip and with constant. We have special handling for and with immediate + // during isel to generate test instructions. + if (isa(Op.getOperand(1))) + return SDValue(); + NewOpc = X86ISD::AND; + break; + case ISD::OR: NewOpc = X86ISD::OR; break; + case ISD::XOR: NewOpc = X86ISD::XOR; break; + case ISD::ADD: + // If the carry or overflow flag is used, we can't truncate. + if (needCarryOrOverflowFlag(SDValue(N, 0))) + return SDValue(); + NewOpc = X86ISD::ADD; + break; + case ISD::SUB: + // If the carry or overflow flag is used, we can't truncate. + if (needCarryOrOverflowFlag(SDValue(N, 0))) + return SDValue(); + NewOpc = X86ISD::SUB; + break; + } + + // We found an op we can narrow. Truncate its inputs. + SDLoc dl(N); + SDValue Op0 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(0)); + SDValue Op1 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(1)); + + // Use a X86 specific opcode to avoid DAG combine messing with it. + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + Op = DAG.getNode(NewOpc, dl, VTs, Op0, Op1); + + // For AND, keep a CMP so that we can match the test pattern. + if (NewOpc == X86ISD::AND) + return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, + DAG.getConstant(0, dl, VT)); + + // Return the flags. + return Op.getValue(1); +} + static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) { if (SDValue Flags = combineCarryThroughADD(N->getOperand(2))) { MVT VT = N->getSimpleValueType(0); @@ -41069,6 +41123,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget); case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget); case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget); + case X86ISD::CMP: return combineCMP(N, DAG); case ISD::ADD: return combineAdd(N, DAG, Subtarget); case ISD::SUB: return combineSub(N, DAG, Subtarget); case X86ISD::SBB: return combineSBB(N, DAG); diff --git a/llvm/test/CodeGen/X86/pr34137.ll b/llvm/test/CodeGen/X86/pr34137.ll index 4e81cb8..1a85a66 100644 --- a/llvm/test/CodeGen/X86/pr34137.ll +++ b/llvm/test/CodeGen/X86/pr34137.ll @@ -11,11 +11,12 @@ define void @pr34127() { ; CHECK-NEXT: movzwl {{.*}}(%rip), %eax ; CHECK-NEXT: movzwl {{.*}}(%rip), %ecx ; CHECK-NEXT: andl %eax, %ecx -; CHECK-NEXT: andl %eax, %ecx -; CHECK-NEXT: movzwl %cx, %ecx -; CHECK-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: andl %ecx, %edx +; CHECK-NEXT: movzwl %dx, %edx +; CHECK-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: testw %cx, %cx +; CHECK-NEXT: testw %cx, %ax ; CHECK-NEXT: sete %dl ; CHECK-NEXT: andl %eax, %edx ; CHECK-NEXT: movq %rdx, {{.*}}(%rip) diff --git a/llvm/test/CodeGen/X86/test-shrink-bug.ll b/llvm/test/CodeGen/X86/test-shrink-bug.ll index 4fba792..ca2316c 100644 --- a/llvm/test/CodeGen/X86/test-shrink-bug.ll +++ b/llvm/test/CodeGen/X86/test-shrink-bug.ll @@ -68,7 +68,7 @@ define void @fail(i16 %a, <2 x i8> %b) { ; CHECK-X64: # %bb.0: ; CHECK-X64-NEXT: pushq %rax ; CHECK-X64-NEXT: .cfi_def_cfa_offset 16 -; CHECK-X64-NEXT: testw $263, %di # imm = 0x107 +; CHECK-X64-NEXT: testl $263, %edi # imm = 0x107 ; CHECK-X64-NEXT: je .LBB1_2 ; CHECK-X64-NEXT: # %bb.1: ; CHECK-X64-NEXT: pand {{.*}}(%rip), %xmm0 -- 2.7.4