From 0cd65429befcf5d39ad2e53c0e56252108982916 Mon Sep 17 00:00:00 2001 From: Guozhi Wei Date: Fri, 14 Oct 2016 20:41:50 +0000 Subject: [PATCH] [PPC] Shorter sequence to load 64bit constant with same hi/lo words This is a patch to implement pr30640. When a 64bit constant has the same hi/lo words, we can use rldimi to copy the low word into high word of the same register. This optimization caused failure of test case bperm.ll because of not optimal heuristic in function SelectAndParts64. It chooses AND or ROTATE to extract bit groups from a register, and OR them together. This optimization lowers the cost of loading 64bit constant mask used in AND method, and causes different code sequence. But actually ROTATE method is better in this test case. The reason is in ROTATE method the final OR operation can be avoided since rldimi can insert the rotated bits into target register directly. So this patch also enhances SelectAndParts64 to prefer ROTATE method when the two methods have same cost and there are multiple bit groups need to be ORed together. Differential Revision: https://reviews.llvm.org/D25521 llvm-svn: 284276 --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 25 +++++++++++++++++++++++-- llvm/test/CodeGen/PowerPC/pr30640.ll | 11 +++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/pr30640.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6f8f566..150b0c8 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -633,6 +633,13 @@ static unsigned getInt64CountDirect(int64_t Imm) { // If no shift, we're done. if (!Shift) return Result; + // If Hi word == Lo word, + // we can use rldimi to insert the Lo word into Hi word. + if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { + ++Result; + return Result; + } + // Shift for next step if the upper 32-bits were not zero. if (Imm) ++Result; @@ -731,6 +738,14 @@ static SDNode *getInt64Direct(SelectionDAG *CurDAG, const SDLoc &dl, // If no shift, we're done. if (!Shift) return Result; + // If Hi word == Lo word, + // we can use rldimi to insert the Lo word into Hi word. + if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { + SDValue Ops[] = + { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)}; + return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); + } + // Shift for next step if the upper 32-bits were not zero. if (Imm) { Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, @@ -1659,9 +1674,12 @@ class BitPermutationSelector { unsigned NumRLInsts = 0; bool FirstBG = true; + bool MoreBG = false; for (auto &BG : BitGroups) { - if (!MatchingBG(BG)) + if (!MatchingBG(BG)) { + MoreBG = true; continue; + } NumRLInsts += SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, !FirstBG); @@ -1679,7 +1697,10 @@ class BitPermutationSelector { // because that exposes more opportunities for CSE. if (NumAndInsts > NumRLInsts) continue; - if (Use32BitInsts && NumAndInsts == NumRLInsts) + // When merging multiple bit groups, instruction or is used. + // But when rotate is used, rldimi can inert the rotated value into any + // register, so instruction or can be avoided. + if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts) continue; DEBUG(dbgs() << "\t\t\t\tusing masking\n"); diff --git a/llvm/test/CodeGen/PowerPC/pr30640.ll b/llvm/test/CodeGen/PowerPC/pr30640.ll new file mode 100644 index 0000000..92cf138 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr30640.ll @@ -0,0 +1,11 @@ +; RUN: llc -O2 -march=ppc64 -mcpu=pwr8 < %s | FileCheck %s + +define i64 @foo() { +entry: + ret i64 -3617008641903833651 + +; CHECK: lis [[REG1:[0-9]+]], -12851 +; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 52685 +; CHECK: rldimi 3, 3, 32, 0 +} + -- 2.7.4