From: Richard Sandiford Date: Tue, 16 Jul 2013 11:55:57 +0000 (+0000) Subject: [SystemZ] Use ROSBG and non-zero form of RISBG for OR nodes X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=885140c951801c0053930e933094613f66b17fec;p=platform%2Fupstream%2Fllvm.git [SystemZ] Use ROSBG and non-zero form of RISBG for OR nodes llvm-svn: 186405 --- diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 149001e..f5d5e5a 100644 --- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -222,6 +222,11 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { Addr, Base, Disp, Index); } + // Check whether (or Op (and X InsertMask)) is effectively an insertion + // of X into bits InsertMask of some Y != Op. Return true if so and + // set Op to that Y. + bool detectOrAndInsertion(SDValue &Op, uint64_t InsertMask); + // Try to fold some of Ops.Input into other fields of Ops. Return true // on success. bool expandRISBG(RISBGOperands &Ops); @@ -236,6 +241,10 @@ class SystemZDAGToDAGISel : public SelectionDAGISel { // Return the selected node on success, otherwise return null. SDNode *tryRISBGZero(SDNode *N); + // Try to use RISBG or ROSBG to implement OR node N. Return the selected + // node on success, otherwise return null. + SDNode *tryRISBGOrROSBG(SDNode *N); + // If Op0 is null, then Node is a constant that can be loaded using: // // (Opcode UpperVal LowerVal) @@ -557,6 +566,38 @@ bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form, return true; } +bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op, + uint64_t InsertMask) { + // We're only interested in cases where the insertion is into some operand + // of Op, rather than into Op itself. The only useful case is an AND. + if (Op.getOpcode() != ISD::AND) + return false; + + // We need a constant mask. + ConstantSDNode *MaskNode = + dyn_cast(Op.getOperand(1).getNode()); + if (!MaskNode) + return false; + + // It's not an insertion of Op.getOperand(0) if the two masks overlap. + uint64_t AndMask = MaskNode->getZExtValue(); + if (InsertMask & AndMask) + return false; + + // It's only an insertion if all bits are covered or are known to be zero. + // The inner check covers all cases but is more expensive. + uint64_t Used = allOnes(Op.getValueType().getSizeInBits()); + if (Used != (AndMask | InsertMask)) { + APInt KnownZero, KnownOne; + CurDAG->ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne); + if (Used != (AndMask | InsertMask | KnownZero.getZExtValue())) + return false; + } + + Op = Op.getOperand(0); + return true; +} + // Return true if Mask matches the regexp 0*1+0*, given that zero masks // have already been filtered out. Store the first set bit in LSB and // the number of set bits in Length if so. @@ -761,6 +802,47 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode(); } +SDNode *SystemZDAGToDAGISel::tryRISBGOrROSBG(SDNode *N) { + // Try treating each operand of N as the second operand of RISBG or ROSBG + // and see which goes deepest. + RISBGOperands RISBG[] = { N->getOperand(0), N->getOperand(1) }; + unsigned Count[] = { 0, 0 }; + for (unsigned I = 0; I < 2; ++I) + while (expandRISBG(RISBG[I])) + Count[I] += 1; + + // Do nothing if neither operand is suitable. + if (Count[0] == 0 && Count[1] == 0) + return 0; + + // Pick the deepest second operand. + unsigned I = Count[0] > Count[1] ? 0 : 1; + SDValue Op0 = N->getOperand(I ^ 1); + + // Prefer IC for character insertions from memory. + if ((RISBG[I].Mask & 0xff) == 0) + if (LoadSDNode *Load = dyn_cast(Op0.getNode())) + if (Load->getMemoryVT() == MVT::i8) + return 0; + + // See whether we can avoid an AND in the first operand by converting + // ROSBG to RISBG. + unsigned Opcode = SystemZ::ROSBG; + if (detectOrAndInsertion(Op0, RISBG[I].Mask)) + Opcode = SystemZ::RISBG; + + EVT VT = N->getValueType(0); + SDValue Ops[5] = { + convertTo(SDLoc(N), MVT::i64, Op0), + convertTo(SDLoc(N), MVT::i64, RISBG[I].Input), + CurDAG->getTargetConstant(RISBG[I].Start, MVT::i32), + CurDAG->getTargetConstant(RISBG[I].End, MVT::i32), + CurDAG->getTargetConstant(RISBG[I].Rotate, MVT::i32) + }; + N = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, Ops); + return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode(); +} + SDNode *SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, uint64_t UpperVal, uint64_t LowerVal) { @@ -833,10 +915,13 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { SDNode *ResNode = 0; switch (Opcode) { case ISD::OR: + if (Node->getOperand(1).getOpcode() != ISD::Constant) + ResNode = tryRISBGOrROSBG(Node); + // Fall through. case ISD::XOR: // If this is a 64-bit operation in which both 32-bit halves are nonzero, // split the operation into two. - if (Node->getValueType(0) == MVT::i64) + if (!ResNode && Node->getValueType(0) == MVT::i64) if (ConstantSDNode *Op1 = dyn_cast(Node->getOperand(1))) { uint64_t Val = Op1->getZExtValue(); if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) diff --git a/llvm/test/CodeGen/SystemZ/risbg-02.ll b/llvm/test/CodeGen/SystemZ/risbg-02.ll new file mode 100644 index 0000000..5ccfab0 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/risbg-02.ll @@ -0,0 +1,93 @@ +; Test sequences that can use RISBG with a normal first operand. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test a case with two ANDs. +define i32 @f1(i32 %a, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: risbg %r2, %r3, 60, 62, 0 +; CHECK: br %r14 + %anda = and i32 %a, -15 + %andb = and i32 %b, 14 + %or = or i32 %anda, %andb + ret i32 %or +} + +; ...and again with i64. +define i64 @f2(i64 %a, i64 %b) { +; CHECK-LABEL: f2: +; CHECK: risbg %r2, %r3, 60, 62, 0 +; CHECK: br %r14 + %anda = and i64 %a, -15 + %andb = and i64 %b, 14 + %or = or i64 %anda, %andb + ret i64 %or +} + +; Test a case with two ANDs and a shift. +define i32 @f3(i32 %a, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: risbg %r2, %r3, 60, 63, 56 +; CHECK: br %r14 + %anda = and i32 %a, -16 + %shr = lshr i32 %b, 8 + %andb = and i32 %shr, 15 + %or = or i32 %anda, %andb + ret i32 %or +} + +; ...and again with i64. +define i64 @f4(i64 %a, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: risbg %r2, %r3, 60, 63, 56 +; CHECK: br %r14 + %anda = and i64 %a, -16 + %shr = lshr i64 %b, 8 + %andb = and i64 %shr, 15 + %or = or i64 %anda, %andb + ret i64 %or +} + +; Test a case with a single AND and a left shift. +define i32 @f5(i32 %a, i32 %b) { +; CHECK-LABEL: f5: +; CHECK: risbg %r2, %r3, 32, 53, 10 +; CHECK: br %r14 + %anda = and i32 %a, 1023 + %shlb = shl i32 %b, 10 + %or = or i32 %anda, %shlb + ret i32 %or +} + +; ...and again with i64. +define i64 @f6(i64 %a, i64 %b) { +; CHECK-LABEL: f6: +; CHECK: risbg %r2, %r3, 0, 53, 10 +; CHECK: br %r14 + %anda = and i64 %a, 1023 + %shlb = shl i64 %b, 10 + %or = or i64 %anda, %shlb + ret i64 %or +} + +; Test a case with a single AND and a right shift. +define i32 @f7(i32 %a, i32 %b) { +; CHECK-LABEL: f7: +; CHECK: risbg %r2, %r3, 40, 63, 56 +; CHECK: br %r14 + %anda = and i32 %a, -16777216 + %shrb = lshr i32 %b, 8 + %or = or i32 %anda, %shrb + ret i32 %or +} + +; ...and again with i64. +define i64 @f8(i64 %a, i64 %b) { +; CHECK-LABEL: f8: +; CHECK: risbg %r2, %r3, 8, 63, 56 +; CHECK: br %r14 + %anda = and i64 %a, -72057594037927936 + %shrb = lshr i64 %b, 8 + %or = or i64 %anda, %shrb + ret i64 %or +} diff --git a/llvm/test/CodeGen/SystemZ/rosbg-01.ll b/llvm/test/CodeGen/SystemZ/rosbg-01.ll new file mode 100644 index 0000000..0abaccc --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/rosbg-01.ll @@ -0,0 +1,110 @@ +; Test sequences that can use ROSBG. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test the simple case. +define i32 @f1(i32 %a, i32 %b) { +; CHECK-LABEL: f1: +; CHECK: rosbg %r2, %r3, 59, 59, 0 +; CHECK: br %r14 + %andb = and i32 %b, 16 + %or = or i32 %a, %andb + ret i32 %or +} + +; ...and again with i64. +define i64 @f2(i64 %a, i64 %b) { +; CHECK-LABEL: f2: +; CHECK: rosbg %r2, %r3, 59, 59, 0 +; CHECK: br %r14 + %andb = and i64 %b, 16 + %or = or i64 %a, %andb + ret i64 %or +} + +; Test a case where wraparound is needed. +define i32 @f3(i32 %a, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: rosbg %r2, %r3, 63, 60, 0 +; CHECK: br %r14 + %andb = and i32 %b, -7 + %or = or i32 %a, %andb + ret i32 %or +} + +; ...and again with i64. +define i64 @f4(i64 %a, i64 %b) { +; CHECK-LABEL: f4: +; CHECK: rosbg %r2, %r3, 63, 60, 0 +; CHECK: br %r14 + %andb = and i64 %b, -7 + %or = or i64 %a, %andb + ret i64 %or +} + +; Test a case with just a shift. +define i32 @f6(i32 %a, i32 %b) { +; CHECK-LABEL: f6: +; CHECK: rosbg %r2, %r3, 32, 51, 12 +; CHECK: br %r14 + %shrb = shl i32 %b, 12 + %or = or i32 %a, %shrb + ret i32 %or +} + +; ...and again with i64. +define i64 @f7(i64 %a, i64 %b) { +; CHECK-LABEL: f7: +; CHECK: rosbg %r2, %r3, 0, 51, 12 +; CHECK: br %r14 + %shrb = shl i64 %b, 12 + %or = or i64 %a, %shrb + ret i64 %or +} + +; Test a case with just a rotate. This can't use ROSBG. +define i32 @f8(i32 %a, i32 %b) { +; CHECK-LABEL: f8: +; CHECK: rll {{%r[0-5]}} +; CHECK: or {{%r[0-5]}} +; CHECK: br %r14 + %shlb = shl i32 %b, 30 + %shrb = lshr i32 %b, 2 + %rotlb = or i32 %shlb, %shrb + %or = or i32 %a, %rotlb + ret i32 %or +} + +; ...and again with i64, which can. +define i64 @f9(i64 %a, i64 %b) { +; CHECK-LABEL: f9: +; CHECK: rosbg %r2, %r3, 0, 63, 47 +; CHECK: br %r14 + %shlb = shl i64 %b, 47 + %shrb = lshr i64 %b, 17 + %rotlb = or i64 %shlb, %shrb + %or = or i64 %a, %rotlb + ret i64 %or +} + +; Test a case with a shift and AND. +define i32 @f10(i32 %a, i32 %b) { +; CHECK-LABEL: f10: +; CHECK: rosbg %r2, %r3, 56, 59, 4 +; CHECK: br %r14 + %shrb = shl i32 %b, 4 + %andb = and i32 %shrb, 240 + %or = or i32 %a, %andb + ret i32 %or +} + +; ...and again with i64. +define i64 @f11(i64 %a, i64 %b) { +; CHECK-LABEL: f11: +; CHECK: rosbg %r2, %r3, 56, 59, 4 +; CHECK: br %r14 + %shrb = shl i64 %b, 4 + %andb = and i64 %shrb, 240 + %or = or i64 %a, %andb + ret i64 %or +}