// If no shift, we're done.
if (!Shift) return Result;
+ // If Hi word == Lo word,
+ // we can use rldimi to insert the Lo word into Hi word.
+ if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
+ ++Result;
+ return Result;
+ }
+
// Shift for next step if the upper 32-bits were not zero.
if (Imm)
++Result;
// If no shift, we're done.
if (!Shift) return Result;
+ // If Hi word == Lo word,
+ // we can use rldimi to insert the Lo word into Hi word.
+ if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
+ SDValue Ops[] =
+ { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)};
+ return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
+ }
+
// Shift for next step if the upper 32-bits were not zero.
if (Imm) {
Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
unsigned NumRLInsts = 0;
bool FirstBG = true;
+ bool MoreBG = false;
for (auto &BG : BitGroups) {
- if (!MatchingBG(BG))
+ if (!MatchingBG(BG)) {
+ MoreBG = true;
continue;
+ }
NumRLInsts +=
SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
!FirstBG);
// because that exposes more opportunities for CSE.
if (NumAndInsts > NumRLInsts)
continue;
- if (Use32BitInsts && NumAndInsts == NumRLInsts)
+ // When merging multiple bit groups, instruction or is used.
+ // But when rotate is used, rldimi can inert the rotated value into any
+ // register, so instruction or can be avoided.
+ if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
continue;
DEBUG(dbgs() << "\t\t\t\tusing masking\n");
--- /dev/null
+; RUN: llc -O2 -march=ppc64 -mcpu=pwr8 < %s | FileCheck %s
+
+define i64 @foo() {
+entry:
+ ret i64 -3617008641903833651
+
+; CHECK: lis [[REG1:[0-9]+]], -12851
+; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 52685
+; CHECK: rldimi 3, 3, 32, 0
+}
+