From 2224b53f064a824ad85dff0b18ada0da9379460b Mon Sep 17 00:00:00 2001 From: esmeyi Date: Tue, 31 Jan 2023 06:02:17 -0500 Subject: [PATCH] [PowerPC] Improve materialization for immediates which is almost a 32 bit splat. Summary: Some 64 bit constants can be materialized with fewer instructions than we currently use. We consider a 64 bit immediate value divided into four parts, Hi16OfHi32 (bits 48...63), Lo16OfHi32 (bits 32...47), Hi16OfLo32 (bits 16...31), Lo16OfLo32 (bits 0...15). When any three parts are equal, the immediate can be treated as "almost" a splat of a 32 bit value in a 64 bit register. For such case, we can use 3 instructions to generate the splat and use 1 instruction to modify the different part: Reviewed By: shchenz Differential Revision: https://reviews.llvm.org/D139813 --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 58 +++++++++++++++++++++++++++-- llvm/test/CodeGen/PowerPC/constants-i64.ll | 31 +++++++++++++-- 2 files changed, 81 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index abf4855..2c450a3 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1319,18 +1319,68 @@ static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, auto getI32Imm = [CurDAG, dl](unsigned Imm) { return CurDAG->getTargetConstant(Imm, dl, MVT::i32); }; + + uint32_t Hi16OfLo32 = (Lo_32(Imm) >> 16) & 0xffff; + uint32_t Lo16OfLo32 = Lo_32(Imm) & 0xffff; + + // Try to use 4 instructions to materialize the immediate which is "almost" a + // splat of a 32 bit immediate. + if (Hi16OfLo32 && Lo16OfLo32) { + uint32_t Hi16OfHi32 = (Hi_32(Imm) >> 16) & 0xffff; + uint32_t Lo16OfHi32 = Hi_32(Imm) & 0xffff; + bool IsSelected = false; + + auto getSplat = [CurDAG, dl, getI32Imm](uint32_t Hi16, uint32_t Lo16) { + SDNode *Result = + CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi16)); + Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, + SDValue(Result, 0), getI32Imm(Lo16)); + SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32), + getI32Imm(0)}; + return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); + }; + + if (Hi16OfHi32 == Lo16OfHi32 && Lo16OfHi32 == Lo16OfLo32) { + IsSelected = true; + Result = getSplat(Hi16OfLo32, Lo16OfLo32); + // Modify Hi16OfHi32. + SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(48), + getI32Imm(0)}; + Result = CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); + } else if (Hi16OfHi32 == Hi16OfLo32 && Hi16OfLo32 == Lo16OfLo32) { + IsSelected = true; + Result = getSplat(Hi16OfHi32, Lo16OfHi32); + // Modify Lo16OfLo32. + SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16), + getI32Imm(16), getI32Imm(31)}; + Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops); + } else if (Lo16OfHi32 == Lo16OfLo32 && Hi16OfLo32 == Lo16OfLo32) { + IsSelected = true; + Result = getSplat(Hi16OfHi32, Lo16OfHi32); + // Modify Hi16OfLo32. + SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16), + getI32Imm(0), getI32Imm(15)}; + Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops); + } + if (IsSelected == true) { + if (InstCnt) + *InstCnt = 4; + return Result; + } + } + // Handle the upper 32 bit value. Result = selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect); // Add in the last bits as required. - if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) { + if (Hi16OfLo32) { Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, - SDValue(Result, 0), getI32Imm(Hi16)); + SDValue(Result, 0), getI32Imm(Hi16OfLo32)); ++InstCntDirect; } - if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) { + if (Lo16OfLo32) { Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0), - getI32Imm(Lo16)); + getI32Imm(Lo16OfLo32)); ++InstCntDirect; } if (InstCnt) diff --git a/llvm/test/CodeGen/PowerPC/constants-i64.ll b/llvm/test/CodeGen/PowerPC/constants-i64.ll index 423836b..9a23442 100644 --- a/llvm/test/CodeGen/PowerPC/constants-i64.ll +++ b/llvm/test/CodeGen/PowerPC/constants-i64.ll @@ -391,14 +391,37 @@ entry: define i64 @imm20() { ; CHECK-LABEL: imm20: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lis 3, -13057 -; CHECK-NEXT: ori 3, 3, 52479 -; CHECK-NEXT: rldic 3, 3, 32, 0 -; CHECK-NEXT: oris 3, 3, 291 +; CHECK-NEXT: lis 3, 291 ; CHECK-NEXT: ori 3, 3, 52479 +; CHECK-NEXT: rldimi 3, 3, 32, 0 +; CHECK-NEXT: rldimi 3, 3, 48, 0 ; CHECK-NEXT: blr entry: ret i64 14771750698406366463 ;0xCCFFCCFF0123CCFF } +define i64 @imm21() { +; CHECK-LABEL: imm21: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lis 3, -13057 +; CHECK-NEXT: ori 3, 3, 291 +; CHECK-NEXT: rldimi 3, 3, 32, 0 +; CHECK-NEXT: rlwimi 3, 3, 16, 16, 31 +; CHECK-NEXT: blr +entry: + ret i64 14771526556073315583 ;0xCCFF0123CCFFCCFF +} + +define i64 @imm22() { +; CHECK-LABEL: imm22: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lis 3, 291 +; CHECK-NEXT: ori 3, 3, 52479 +; CHECK-NEXT: rldimi 3, 3, 32, 0 +; CHECK-NEXT: rlwimi 3, 3, 16, 0, 15 +; CHECK-NEXT: blr +entry: + ret i64 82134617250843903 ;0x0123CCFFCCFFCCFF +} + attributes #0 = { nounwind readnone } -- 2.7.4