From 0171a9f4eca3fe562e94895e618b1312e2134fcf Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Mon, 19 Mar 2018 11:54:28 +0000 Subject: [PATCH] [RISCV] Peephole optimisation for load/store of global values or constant addresses (load (add base, off), 0) -> (load base, off) (store val, (add base, off)) -> (store val, base, off) This is similar to an equivalent peephole optimisation in PPCISelDAGToDAG. llvm-svn: 327831 --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 95 +++++++++++++++++++++++++++++ llvm/test/CodeGen/RISCV/blockaddress.ll | 11 ++-- llvm/test/CodeGen/RISCV/byval.ll | 12 ++-- llvm/test/CodeGen/RISCV/fp128.ll | 48 +++++---------- llvm/test/CodeGen/RISCV/inline-asm.ll | 3 +- llvm/test/CodeGen/RISCV/mem.ll | 21 +++---- llvm/test/CodeGen/RISCV/wide-mem.ll | 8 +-- 7 files changed, 131 insertions(+), 67 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index d07301d..9e64007 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -42,6 +42,8 @@ public: return SelectionDAGISel::runOnMachineFunction(MF); } + void PostprocessISelDAG() override; + void Select(SDNode *Node) override; bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, @@ -51,9 +53,14 @@ public: // Include the pieces autogenerated from the target description. #include "RISCVGenDAGISel.inc" + +private: + void doPeepholeLoadStoreADDI(); }; } +void RISCVDAGToDAGISel::PostprocessISelDAG() { doPeepholeLoadStoreADDI(); } + void RISCVDAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); MVT XLenVT = Subtarget->getXLenVT(); @@ -117,6 +124,94 @@ bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) { return false; } +// Merge an ADDI into the offset of a load/store instruction where possible. +// (load (add base, off), 0) -> (load base, off) +// (store val, (add base, off)) -> (store val, base, off) +void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() { + SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); + ++Position; + + while (Position != CurDAG->allnodes_begin()) { + SDNode *N = &*--Position; + // Skip dead nodes and any non-machine opcodes. + if (N->use_empty() || !N->isMachineOpcode()) + continue; + + int OffsetOpIdx; + int BaseOpIdx; + + // Only attempt this optimisation for I-type loads and S-type stores. + switch (N->getMachineOpcode()) { + default: + continue; + case RISCV::LB: + case RISCV::LH: + case RISCV::LW: + case RISCV::LBU: + case RISCV::LHU: + case RISCV::LWU: + case RISCV::LD: + case RISCV::FLW: + case RISCV::FLD: + BaseOpIdx = 0; + OffsetOpIdx = 1; + break; + case RISCV::SB: + case RISCV::SH: + case RISCV::SW: + case RISCV::SD: + case RISCV::FSW: + case RISCV::FSD: + BaseOpIdx = 1; + OffsetOpIdx = 2; + break; + } + + // Currently, the load/store offset must be 0 to be considered for this + // peephole optimisation. + if (!isa(N->getOperand(OffsetOpIdx)) || + N->getConstantOperandVal(OffsetOpIdx) != 0) + continue; + + SDValue Base = N->getOperand(BaseOpIdx); + + // If the base is an ADDI, we can merge it in to the load/store. + if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI) + continue; + + SDValue ImmOperand = Base.getOperand(1); + + if (auto Const = dyn_cast(ImmOperand)) { + ImmOperand = CurDAG->getTargetConstant( + Const->getSExtValue(), SDLoc(ImmOperand), ImmOperand.getValueType()); + } else if (auto GA = dyn_cast(ImmOperand)) { + ImmOperand = CurDAG->getTargetGlobalAddress( + GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), + GA->getOffset(), GA->getTargetFlags()); + } else { + continue; + } + + DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); + DEBUG(Base->dump(CurDAG)); + DEBUG(dbgs() << "\nN: "); + DEBUG(N->dump(CurDAG)); + DEBUG(dbgs() << "\n"); + + // Modify the offset operand of the load/store. + if (BaseOpIdx == 0) // Load + CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, + N->getOperand(2)); + else // Store + CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), + ImmOperand, N->getOperand(3)); + + // The add-immediate may now be dead, in which case remove it. + if (Base.getNode()->use_empty()) + CurDAG->RemoveDeadNode(Base.getNode()); + } +} + // This pass converts a legalized DAG into a RISCV-specific DAG, ready // for instruction scheduling. FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) { diff --git a/llvm/test/CodeGen/RISCV/blockaddress.ll b/llvm/test/CodeGen/RISCV/blockaddress.ll index 9099251..5b7f818 100644 --- a/llvm/test/CodeGen/RISCV/blockaddress.ll +++ b/llvm/test/CodeGen/RISCV/blockaddress.ll @@ -9,12 +9,11 @@ define void @test_blockaddress() nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) -; RV32I-NEXT: lui a0, %hi(addr) -; RV32I-NEXT: addi a0, a0, %lo(addr) -; RV32I-NEXT: lui a1, %hi(.Ltmp0) -; RV32I-NEXT: addi a1, a1, %lo(.Ltmp0) -; RV32I-NEXT: sw a1, 0(a0) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lui a0, %hi(.Ltmp0) +; RV32I-NEXT: addi a0, a0, %lo(.Ltmp0) +; RV32I-NEXT: lui a1, %hi(addr) +; RV32I-NEXT: sw a0, %lo(addr)(a1) +; RV32I-NEXT: lw a0, %lo(addr)(a1) ; RV32I-NEXT: jr a0 ; RV32I-NEXT: .Ltmp0: # Block address taken ; RV32I-NEXT: .LBB0_1: # %block diff --git a/llvm/test/CodeGen/RISCV/byval.ll b/llvm/test/CodeGen/RISCV/byval.ll index a406058..943983a 100644 --- a/llvm/test/CodeGen/RISCV/byval.ll +++ b/llvm/test/CodeGen/RISCV/byval.ll @@ -23,20 +23,16 @@ define void @caller() nounwind { ; RV32I-NEXT: addi sp, sp, -32 ; RV32I-NEXT: sw ra, 28(sp) ; RV32I-NEXT: lui a0, %hi(foo+12) -; RV32I-NEXT: addi a0, a0, %lo(foo+12) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(foo+12)(a0) ; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, %hi(foo+8) -; RV32I-NEXT: addi a0, a0, %lo(foo+8) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(foo+8)(a0) ; RV32I-NEXT: sw a0, 20(sp) ; RV32I-NEXT: lui a0, %hi(foo+4) -; RV32I-NEXT: addi a0, a0, %lo(foo+4) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(foo+4)(a0) ; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, %hi(foo) -; RV32I-NEXT: addi a0, a0, %lo(foo) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(foo)(a0) ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: lui a0, %hi(callee) ; RV32I-NEXT: addi a1, a0, %lo(callee) diff --git a/llvm/test/CodeGen/RISCV/fp128.ll b/llvm/test/CodeGen/RISCV/fp128.ll index e8a34fd..b2d07cd 100644 --- a/llvm/test/CodeGen/RISCV/fp128.ll +++ b/llvm/test/CodeGen/RISCV/fp128.ll @@ -14,36 +14,28 @@ define i32 @test_load_and_cmp() nounwind { ; RV32I-NEXT: addi sp, sp, -48 ; RV32I-NEXT: sw ra, 44(sp) ; RV32I-NEXT: lui a0, %hi(y+12) -; RV32I-NEXT: addi a0, a0, %lo(y+12) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(y+12)(a0) ; RV32I-NEXT: sw a0, 20(sp) ; RV32I-NEXT: lui a0, %hi(y+8) -; RV32I-NEXT: addi a0, a0, %lo(y+8) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(y+8)(a0) ; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, %hi(y+4) -; RV32I-NEXT: addi a0, a0, %lo(y+4) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(y+4)(a0) ; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: lui a0, %hi(y) -; RV32I-NEXT: addi a0, a0, %lo(y) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(y)(a0) ; RV32I-NEXT: sw a0, 8(sp) ; RV32I-NEXT: lui a0, %hi(x+12) -; RV32I-NEXT: addi a0, a0, %lo(x+12) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(x+12)(a0) ; RV32I-NEXT: sw a0, 36(sp) ; RV32I-NEXT: lui a0, %hi(x+8) -; RV32I-NEXT: addi a0, a0, %lo(x+8) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(x+8)(a0) ; RV32I-NEXT: sw a0, 32(sp) ; RV32I-NEXT: lui a0, %hi(x+4) -; RV32I-NEXT: addi a0, a0, %lo(x+4) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(x+4)(a0) ; RV32I-NEXT: sw a0, 28(sp) ; RV32I-NEXT: lui a0, %hi(x) -; RV32I-NEXT: addi a0, a0, %lo(x) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(x)(a0) ; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, %hi(__netf2) ; RV32I-NEXT: addi a2, a0, %lo(__netf2) @@ -68,36 +60,28 @@ define i32 @test_add_and_fptosi() nounwind { ; RV32I-NEXT: addi sp, sp, -80 ; RV32I-NEXT: sw ra, 76(sp) ; RV32I-NEXT: lui a0, %hi(y+12) -; RV32I-NEXT: addi a0, a0, %lo(y+12) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(y+12)(a0) ; RV32I-NEXT: sw a0, 36(sp) ; RV32I-NEXT: lui a0, %hi(y+8) -; RV32I-NEXT: addi a0, a0, %lo(y+8) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(y+8)(a0) ; RV32I-NEXT: sw a0, 32(sp) ; RV32I-NEXT: lui a0, %hi(y+4) -; RV32I-NEXT: addi a0, a0, %lo(y+4) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(y+4)(a0) ; RV32I-NEXT: sw a0, 28(sp) ; RV32I-NEXT: lui a0, %hi(y) -; RV32I-NEXT: addi a0, a0, %lo(y) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(y)(a0) ; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, %hi(x+12) -; RV32I-NEXT: addi a0, a0, %lo(x+12) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(x+12)(a0) ; RV32I-NEXT: sw a0, 52(sp) ; RV32I-NEXT: lui a0, %hi(x+8) -; RV32I-NEXT: addi a0, a0, %lo(x+8) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(x+8)(a0) ; RV32I-NEXT: sw a0, 48(sp) ; RV32I-NEXT: lui a0, %hi(x+4) -; RV32I-NEXT: addi a0, a0, %lo(x+4) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(x+4)(a0) ; RV32I-NEXT: sw a0, 44(sp) ; RV32I-NEXT: lui a0, %hi(x) -; RV32I-NEXT: addi a0, a0, %lo(x) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(x)(a0) ; RV32I-NEXT: sw a0, 40(sp) ; RV32I-NEXT: lui a0, %hi(__addtf3) ; RV32I-NEXT: addi a3, a0, %lo(__addtf3) diff --git a/llvm/test/CodeGen/RISCV/inline-asm.ll b/llvm/test/CodeGen/RISCV/inline-asm.ll index 5c46b82..a16375b 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm.ll @@ -8,8 +8,7 @@ define i32 @constraint_r(i32 %a) { ; RV32I-LABEL: constraint_r: ; RV32I: # %bb.0: ; RV32I-NEXT: lui a1, %hi(gi) -; RV32I-NEXT: addi a1, a1, %lo(gi) -; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: lw a1, %lo(gi)(a1) ; RV32I-NEXT: #APP ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: #NO_APP diff --git a/llvm/test/CodeGen/RISCV/mem.ll b/llvm/test/CodeGen/RISCV/mem.ll index 60c19de..f157b23 100644 --- a/llvm/test/CodeGen/RISCV/mem.ll +++ b/llvm/test/CodeGen/RISCV/mem.ll @@ -163,17 +163,14 @@ define i16 @load_sext_zext_anyext_i1_i16(i1 *%a) nounwind { @G = global i32 0 define i32 @lw_sw_global(i32 %a) nounwind { -; TODO: the addi should be folded in to the lw/sw operations ; RV32I-LABEL: lw_sw_global: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a1, %hi(G) -; RV32I-NEXT: addi a2, a1, %lo(G) -; RV32I-NEXT: lw a1, 0(a2) -; RV32I-NEXT: sw a0, 0(a2) +; RV32I-NEXT: lui a2, %hi(G) +; RV32I-NEXT: lw a1, %lo(G)(a2) +; RV32I-NEXT: sw a0, %lo(G)(a2) ; RV32I-NEXT: lui a2, %hi(G+36) -; RV32I-NEXT: addi a2, a2, %lo(G+36) -; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: sw a0, 0(a2) +; RV32I-NEXT: lw a3, %lo(G+36)(a2) +; RV32I-NEXT: sw a0, %lo(G+36)(a2) ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: ret %1 = load volatile i32, i32* @G @@ -186,13 +183,11 @@ define i32 @lw_sw_global(i32 %a) nounwind { ; Ensure that 1 is added to the high 20 bits if bit 11 of the low part is 1 define i32 @lw_sw_constant(i32 %a) nounwind { -; TODO: the addi should be folded in to the lw/sw ; RV32I-LABEL: lw_sw_constant: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a1, 912092 -; RV32I-NEXT: addi a2, a1, -273 -; RV32I-NEXT: lw a1, 0(a2) -; RV32I-NEXT: sw a0, 0(a2) +; RV32I-NEXT: lui a2, 912092 +; RV32I-NEXT: lw a1, -273(a2) +; RV32I-NEXT: sw a0, -273(a2) ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: ret %1 = inttoptr i32 3735928559 to i32* diff --git a/llvm/test/CodeGen/RISCV/wide-mem.ll b/llvm/test/CodeGen/RISCV/wide-mem.ll index 92993f6d..95db242 100644 --- a/llvm/test/CodeGen/RISCV/wide-mem.ll +++ b/llvm/test/CodeGen/RISCV/wide-mem.ll @@ -17,17 +17,13 @@ define i64 @load_i64(i64 *%a) nounwind { @val64 = local_unnamed_addr global i64 2863311530, align 8 -; TODO: codegen on this should be improved. It shouldn't be necessary to -; generate two addi define i64 @load_i64_global() nounwind { ; RV32I-LABEL: load_i64_global: ; RV32I: # %bb.0: ; RV32I-NEXT: lui a0, %hi(val64) -; RV32I-NEXT: addi a0, a0, %lo(val64) -; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a0, %lo(val64)(a0) ; RV32I-NEXT: lui a1, %hi(val64+4) -; RV32I-NEXT: addi a1, a1, %lo(val64+4) -; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: lw a1, %lo(val64+4)(a1) ; RV32I-NEXT: ret %1 = load i64, i64* @val64 ret i64 %1 -- 2.7.4