[RISCV] Fold addiw from (add X, (addiw (lui C1, C2))) into load/store address

author Craig Topper <craig.topper@sifive.com>

Wed, 11 May 2022 19:16:37 +0000 (12:16 -0700)

committer Craig Topper <craig.topper@sifive.com>

Wed, 11 May 2022 19:47:13 +0000 (12:47 -0700)
author Craig Topper <craig.topper@sifive.com>
Wed, 11 May 2022 19:16:37 +0000 (12:16 -0700)
committer Craig Topper <craig.topper@sifive.com>
Wed, 11 May 2022 19:47:13 +0000 (12:47 -0700)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

index 0115c60..1d3b9b1 100644 (file)
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -2125,10 +2125,8 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
    // There is a ADD between ADDI and load/store. We can only fold ADDI that
    // do not have a FrameIndex operand.
    SDValue Add;
-  int AddBaseIdx;
-  if (Base.getMachineOpcode() == RISCV::ADD) {
-    if (!Base.hasOneUse())
-      return false;
+  unsigned AddBaseIdx;
+  if (Base.getMachineOpcode() == RISCV::ADD && Base.hasOneUse()) {
      Add = Base;
      SDValue Op0 = Base.getOperand(0);
      SDValue Op1 = Base.getOperand(1);
@@ -2142,12 +2140,36 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
                 isa<ConstantSDNode>(Op1.getOperand(1))) {
        AddBaseIdx = 0;
        Base = Op1;
+    } else if (Op1.isMachineOpcode() &&
+               Op1.getMachineOpcode() == RISCV::ADDIW &&
+               isa<ConstantSDNode>(Op1.getOperand(1)) &&
+               Op1.getOperand(0).isMachineOpcode() &&
+               Op1.getOperand(0).getMachineOpcode() == RISCV::LUI) {
+      // We found an LUI+ADDIW constant materialization. We might be able to
+      // fold the ADDIW offset if it could be treated as ADDI.
+      // Emulate the constant materialization to see if the result would be
+      // a simm32 if ADDI was used instead of ADDIW.
+
+      // First the LUI.
+      uint64_t Imm = Op1.getOperand(0).getConstantOperandVal(0);
+      Imm <<= 12;
+      Imm = SignExtend64(Imm, 32);
+
+      // Then the ADDI.
+      uint64_t LoImm = cast<ConstantSDNode>(Op1.getOperand(1))->getSExtValue();
+      Imm += LoImm;
+
+      // If the result isn't a simm32, we can't do the optimization.
+      if (!isInt<32>(Imm))
+        return false;
+
+      AddBaseIdx = 0;
+      Base = Op1;
      } else
        return false;
-  }
-
-  // If the base is an ADDI, we can merge it in to the load/store.
-  if (Base.getMachineOpcode() != RISCV::ADDI)
+  } else if (Base.getMachineOpcode() == RISCV::ADDI) {
+    // If the base is an ADDI, we can merge it in to the load/store.
+  } else
      return false;
  
    SDValue ImmOperand = Base.getOperand(1);
diff --git a/llvm/test/CodeGen/RISCV/mem.ll b/llvm/test/CodeGen/RISCV/mem.ll

index 00bd830..e18f501 100644 (file)
--- a/llvm/test/CodeGen/RISCV/mem.ll
+++ b/llvm/test/CodeGen/RISCV/mem.ll
@@ -238,6 +238,45 @@ define i32 @lw_sw_far_local(i32* %a, i32 %b)  {
    ret i32 %2
  }
  
+define i32 @lw_really_far_local(i32* %a)  {
+; RV32I-LABEL: lw_really_far_local:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 524288
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    lw a0, -2048(a0)
+; RV32I-NEXT:    ret
+  %1 = getelementptr inbounds i32, i32* %a, i32 536870400
+  %2 = load volatile i32, i32* %1
+  ret i32 %2
+}
+
+define void @st_really_far_local(i32* %a, i32 %b)  {
+; RV32I-LABEL: st_really_far_local:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 524288
+; RV32I-NEXT:    add a0, a0, a2
+; RV32I-NEXT:    sw a1, -2048(a0)
+; RV32I-NEXT:    ret
+  %1 = getelementptr inbounds i32, i32* %a, i32 536870400
+  store i32 %b, i32* %1
+  ret void
+}
+
+define i32 @lw_sw_really_far_local(i32* %a, i32 %b)  {
+; RV32I-LABEL: lw_sw_really_far_local:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 524288
+; RV32I-NEXT:    addi a2, a2, -2048
+; RV32I-NEXT:    add a2, a0, a2
+; RV32I-NEXT:    lw a0, 0(a2)
+; RV32I-NEXT:    sw a1, 0(a2)
+; RV32I-NEXT:    ret
+  %1 = getelementptr inbounds i32, i32* %a, i32 536870400
+  %2 = load volatile i32, i32* %1
+  store i32 %b, i32* %1
+  ret i32 %2
+}
+
  %struct.quux = type { i32, [0 x i8] }
  
  ; Make sure we don't remove the addi and fold the C from
diff --git a/llvm/test/CodeGen/RISCV/mem64.ll b/llvm/test/CodeGen/RISCV/mem64.ll

index 9b86be8..19f7413 100644 (file)
--- a/llvm/test/CodeGen/RISCV/mem64.ll
+++ b/llvm/test/CodeGen/RISCV/mem64.ll
@@ -233,9 +233,8 @@ define i64 @lw_far_local(i64* %a)  {
  ; RV64I-LABEL: lw_far_local:
  ; RV64I:       # %bb.0:
  ; RV64I-NEXT:    lui a1, 8
-; RV64I-NEXT:    addiw a1, a1, -8
  ; RV64I-NEXT:    add a0, a0, a1
-; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ld a0, -8(a0)
  ; RV64I-NEXT:    ret
    %1 = getelementptr inbounds i64, i64* %a, i64 4095
    %2 = load volatile i64, i64* %1
@@ -246,9 +245,8 @@ define void @st_far_local(i64* %a, i64 %b)  {
  ; RV64I-LABEL: st_far_local:
  ; RV64I:       # %bb.0:
  ; RV64I-NEXT:    lui a2, 8
-; RV64I-NEXT:    addiw a2, a2, -8
  ; RV64I-NEXT:    add a0, a0, a2
-; RV64I-NEXT:    sd a1, 0(a0)
+; RV64I-NEXT:    sd a1, -8(a0)
  ; RV64I-NEXT:    ret
    %1 = getelementptr inbounds i64, i64* %a, i64 4095
    store i64 %b, i64* %1
@@ -270,6 +268,53 @@ define i64 @lw_sw_far_local(i64* %a, i64 %b)  {
    ret i64 %2
  }
  
+; Make sure we don't fold the addiw into the load offset. The sign extend of the
+; addiw is required.
+define i64 @lw_really_far_local(i64* %a)  {
+; RV64I-LABEL: lw_really_far_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 524288
+; RV64I-NEXT:    addiw a1, a1, -2048
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    ld a0, 0(a0)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, i64* %a, i64 268435200
+  %2 = load volatile i64, i64* %1
+  ret i64 %2
+}
+
+; Make sure we don't fold the addiw into the store offset. The sign extend of
+; the addiw is required.
+define void @st_really_far_local(i64* %a, i64 %b)  {
+; RV64I-LABEL: st_really_far_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 524288
+; RV64I-NEXT:    addiw a2, a2, -2048
+; RV64I-NEXT:    add a0, a0, a2
+; RV64I-NEXT:    sd a1, 0(a0)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, i64* %a, i64 268435200
+  store i64 %b, i64* %1
+  ret void
+}
+
+; Make sure we don't fold the addiw into the load/store offset. The sign extend
+; of the addiw is required.
+define i64 @lw_sw_really_far_local(i64* %a, i64 %b)  {
+; RV64I-LABEL: lw_sw_really_far_local:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 524288
+; RV64I-NEXT:    addiw a2, a2, -2048
+; RV64I-NEXT:    add a2, a0, a2
+; RV64I-NEXT:    ld a0, 0(a2)
+; RV64I-NEXT:    sd a1, 0(a2)
+; RV64I-NEXT:    ret
+  %1 = getelementptr inbounds i64, i64* %a, i64 268435200
+  %2 = load volatile i64, i64* %1
+  store i64 %b, i64* %1
+  ret i64 %2
+}
+
  %struct.quux = type { i32, [0 x i8] }
  
  ; Make sure we don't remove the addi and fold the C from
author	Craig Topper <craig.topper@sifive.com>
	Wed, 11 May 2022 19:16:37 +0000 (12:16 -0700)
committer	Craig Topper <craig.topper@sifive.com>
	Wed, 11 May 2022 19:47:13 +0000 (12:47 -0700)
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp		patch \| blob \| history
llvm/test/CodeGen/RISCV/mem.ll		patch \| blob \| history
llvm/test/CodeGen/RISCV/mem64.ll		patch \| blob \| history