From 3d9946eb239dc27d6c784418e404dc8ab82789d2 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 19 Aug 2016 17:54:49 +0000 Subject: [PATCH] [Hexagon] Fixes for new-value jump formation - Recognize C2_cmpgtui, S2_tstbit_i, and S4_ntstbit_i. - Avoid creating new-value instructions with both source operands equal. llvm-svn: 279286 --- llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp | 41 ++++++++++++---- llvm/test/CodeGen/Hexagon/newvalueSameReg.ll | 63 +++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/Hexagon/newvalueSameReg.ll diff --git a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp index 6b3f692..f622dad 100644 --- a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -180,7 +180,7 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA, return false; // if call in path, bail out. - if (MII->getOpcode() == Hexagon::J2_call) + if (MII->isCall()) return false; // if NVJ is running prior to RA, do the following checks. @@ -189,9 +189,9 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA, // to new value jump. If they are in the path, bail out. // KILL sets kill flag on the opcode. It also sets up a // single register, out of pair. - // %D0 = Hexagon_S2_lsr_r_p %D0, %R2 + // %D0 = S2_lsr_r_p %D0, %R2 // %R0 = KILL %R0, %D0 - // %P0 = CMPEQri %R0, 0 + // %P0 = C2_cmpeqi %R0, 0 // PHI can be anything after RA. // COPY can remateriaze things in between feeder, compare and nvj. if (MII->getOpcode() == TargetOpcode::KILL || @@ -203,7 +203,7 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA, // of registers by individual passes in the backend. At this time, // we don't know the scope of usage and definitions of these // instructions. - if (MII->getOpcode() == Hexagon::LDriw_pred || + if (MII->getOpcode() == Hexagon::LDriw_pred || MII->getOpcode() == Hexagon::STriw_pred) return false; } @@ -226,10 +226,23 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, // range specified by the arch. if (!secondReg) { int64_t v = MI.getOperand(2).getImm(); + bool Valid = false; - if (!(isUInt<5>(v) || ((MI.getOpcode() == Hexagon::C2_cmpeqi || - MI.getOpcode() == Hexagon::C2_cmpgti) && - (v == -1)))) + switch (MI.getOpcode()) { + case Hexagon::C2_cmpeqi: + case Hexagon::C2_cmpgti: + Valid = (isUInt<5>(v) || v == -1); + break; + case Hexagon::C2_cmpgtui: + Valid = isUInt<5>(v); + break; + case Hexagon::S2_tstbit_i: + case Hexagon::S4_ntstbit_i: + Valid = (v == 0); + break; + } + + if (!Valid) return false; } @@ -239,6 +252,11 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, if (secondReg) { cmpOp2 = MI.getOperand(2).getReg(); + // If the same register appears as both operands, we cannot generate a new + // value compare. Only one operand may use the .new suffix. + if (cmpReg1 == cmpOp2) + return false; + // Make sure that that second register is not from COPY // At machine code level, we don't need this, but if we decide // to move new value jump prior to RA, we would be needing this. @@ -255,6 +273,8 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, ++II ; for (MachineBasicBlock::iterator localII = II; localII != end; ++localII) { + if (localII->isDebugValue()) + continue; // Check 1. // If "common" checks fail, bail out. @@ -474,7 +494,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { //if(LVs.isLiveOut(predReg, *MBB)) break; // Get all the successors of this block - which will always - // be 2. Check if the predicate register is live in in those + // be 2. Check if the predicate register is live-in in those // successor. If yes, we can not delete the predicate - // I am doing this only because LLVM does not provide LiveOut // at the BB level. @@ -582,8 +602,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) { if (isSecondOpReg) { // In case of CMPLT, or CMPLTU, or EQ with the second register // to newify, swap the operands. - if (cmpInstr->getOpcode() == Hexagon::C2_cmpeq && - feederReg == (unsigned) cmpOp2) { + unsigned COp = cmpInstr->getOpcode(); + if ((COp == Hexagon::C2_cmpeq || COp == Hexagon::C4_cmpneq) && + (feederReg == (unsigned) cmpOp2)) { unsigned tmp = cmpReg1; bool tmpIsKill = MO1IsKill; cmpReg1 = cmpOp2; diff --git a/llvm/test/CodeGen/Hexagon/newvalueSameReg.ll b/llvm/test/CodeGen/Hexagon/newvalueSameReg.ll new file mode 100644 index 0000000..0fc4df2 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/newvalueSameReg.ll @@ -0,0 +1,63 @@ +; RUN: llc -march=hexagon -hexagon-expand-condsets=0 < %s | FileCheck %s +; +; Expand-condsets eliminates the "mux" instruction, which is what this +; testcase is checking. + +%struct._Dnk_filet.1 = type { i16, i8, i32, i8*, i8*, i8*, i8*, i8*, i8*, i32*, [2 x i32], i8*, i8*, i8*, %struct._Mbstatet.0, i8*, [8 x i8], i8 } +%struct._Mbstatet.0 = type { i32, i16, i16 } + +@_Stdout = external global %struct._Dnk_filet.1 +@.str = external unnamed_addr constant [23 x i8], align 8 + +; Test that we don't generate a new value compare if the operands are +; the same register. + +; CHECK-NOT: cmp.eq([[REG0:(r[0-9]+)]].new, [[REG0]]) +; CHECK: cmp.eq([[REG1:(r[0-9]+)]], [[REG1]]) + +; Function Attrs: nounwind +declare void @fprintf(%struct._Dnk_filet.1* nocapture, i8* nocapture readonly, ...) #1 + +define void @main() #0 { +entry: + %0 = load i32*, i32** undef, align 4 + %1 = load i32, i32* undef, align 4 + br i1 undef, label %if.end, label %_ZNSt6vectorIbSaIbEE3endEv.exit + +_ZNSt6vectorIbSaIbEE3endEv.exit: + %2 = icmp slt i32 %1, 0 + %sub5.i.i.i = lshr i32 %1, 5 + %add619.i.i.i = add i32 %sub5.i.i.i, -134217728 + %sub5.i.pn.i.i = select i1 %2, i32 %add619.i.i.i, i32 %sub5.i.i.i + %storemerge2.i.i = getelementptr inbounds i32, i32* %0, i32 %sub5.i.pn.i.i + %cmp.i.i = icmp ult i32* %storemerge2.i.i, %0 + %.mux = select i1 %cmp.i.i, i32 0, i32 1 + br i1 undef, label %_ZNSt6vectorIbSaIbEE3endEv.exit57, label %if.end + +_ZNSt6vectorIbSaIbEE3endEv.exit57: + %3 = icmp slt i32 %1, 0 + %sub5.i.i.i44 = lshr i32 %1, 5 + %add619.i.i.i45 = add i32 %sub5.i.i.i44, -134217728 + %sub5.i.pn.i.i46 = select i1 %3, i32 %add619.i.i.i45, i32 %sub5.i.i.i44 + %storemerge2.i.i47 = getelementptr inbounds i32, i32* %0, i32 %sub5.i.pn.i.i46 + %cmp.i38 = icmp ult i32* %storemerge2.i.i47, %0 + %.reg2mem.sroa.0.sroa.0.0.load14.i.reload = select i1 %cmp.i38, i32 0, i32 1 + %cmp = icmp eq i32 %.mux, %.reg2mem.sroa.0.sroa.0.0.load14.i.reload + br i1 %cmp, label %if.end, label %if.then + +if.then: + call void (%struct._Dnk_filet.1*, i8*, ...) @fprintf(%struct._Dnk_filet.1* @_Stdout, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0), i32 %.mux, i32 %.reg2mem.sroa.0.sroa.0.0.load14.i.reload) #1 + unreachable + +if.end: + br i1 undef, label %_ZNSt6vectorIbSaIbEED2Ev.exit, label %if.then.i.i.i + +if.then.i.i.i: + unreachable + +_ZNSt6vectorIbSaIbEED2Ev.exit: + ret void +} + +attributes #0 = { "target-cpu"="hexagonv5" } +attributes #1 = { nounwind "target-cpu"="hexagonv5" } -- 2.7.4