// There is a ADD between ADDI and load/store. We can only fold ADDI that
// do not have a FrameIndex operand.
SDValue Add;
- int AddBaseIdx;
- if (Base.getMachineOpcode() == RISCV::ADD) {
- if (!Base.hasOneUse())
- return false;
+ unsigned AddBaseIdx;
+ if (Base.getMachineOpcode() == RISCV::ADD && Base.hasOneUse()) {
Add = Base;
SDValue Op0 = Base.getOperand(0);
SDValue Op1 = Base.getOperand(1);
isa<ConstantSDNode>(Op1.getOperand(1))) {
AddBaseIdx = 0;
Base = Op1;
+ } else if (Op1.isMachineOpcode() &&
+ Op1.getMachineOpcode() == RISCV::ADDIW &&
+ isa<ConstantSDNode>(Op1.getOperand(1)) &&
+ Op1.getOperand(0).isMachineOpcode() &&
+ Op1.getOperand(0).getMachineOpcode() == RISCV::LUI) {
+ // We found an LUI+ADDIW constant materialization. We might be able to
+ // fold the ADDIW offset if it could be treated as ADDI.
+ // Emulate the constant materialization to see if the result would be
+ // a simm32 if ADDI was used instead of ADDIW.
+
+ // First the LUI.
+ uint64_t Imm = Op1.getOperand(0).getConstantOperandVal(0);
+ Imm <<= 12;
+ Imm = SignExtend64(Imm, 32);
+
+ // Then the ADDI.
+ uint64_t LoImm = cast<ConstantSDNode>(Op1.getOperand(1))->getSExtValue();
+ Imm += LoImm;
+
+ // If the result isn't a simm32, we can't do the optimization.
+ if (!isInt<32>(Imm))
+ return false;
+
+ AddBaseIdx = 0;
+ Base = Op1;
} else
return false;
- }
-
- // If the base is an ADDI, we can merge it in to the load/store.
- if (Base.getMachineOpcode() != RISCV::ADDI)
+ } else if (Base.getMachineOpcode() == RISCV::ADDI) {
+ // If the base is an ADDI, we can merge it in to the load/store.
+ } else
return false;
SDValue ImmOperand = Base.getOperand(1);
ret i32 %2
}
+define i32 @lw_really_far_local(i32* %a) {
+; RV32I-LABEL: lw_really_far_local:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a1, 524288
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: lw a0, -2048(a0)
+; RV32I-NEXT: ret
+ %1 = getelementptr inbounds i32, i32* %a, i32 536870400
+ %2 = load volatile i32, i32* %1
+ ret i32 %2
+}
+
+define void @st_really_far_local(i32* %a, i32 %b) {
+; RV32I-LABEL: st_really_far_local:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a2, 524288
+; RV32I-NEXT: add a0, a0, a2
+; RV32I-NEXT: sw a1, -2048(a0)
+; RV32I-NEXT: ret
+ %1 = getelementptr inbounds i32, i32* %a, i32 536870400
+ store i32 %b, i32* %1
+ ret void
+}
+
+define i32 @lw_sw_really_far_local(i32* %a, i32 %b) {
+; RV32I-LABEL: lw_sw_really_far_local:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a2, 524288
+; RV32I-NEXT: addi a2, a2, -2048
+; RV32I-NEXT: add a2, a0, a2
+; RV32I-NEXT: lw a0, 0(a2)
+; RV32I-NEXT: sw a1, 0(a2)
+; RV32I-NEXT: ret
+ %1 = getelementptr inbounds i32, i32* %a, i32 536870400
+ %2 = load volatile i32, i32* %1
+ store i32 %b, i32* %1
+ ret i32 %2
+}
+
%struct.quux = type { i32, [0 x i8] }
; Make sure we don't remove the addi and fold the C from
; RV64I-LABEL: lw_far_local:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a1, 8
-; RV64I-NEXT: addiw a1, a1, -8
; RV64I-NEXT: add a0, a0, a1
-; RV64I-NEXT: ld a0, 0(a0)
+; RV64I-NEXT: ld a0, -8(a0)
; RV64I-NEXT: ret
%1 = getelementptr inbounds i64, i64* %a, i64 4095
%2 = load volatile i64, i64* %1
; RV64I-LABEL: st_far_local:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a2, 8
-; RV64I-NEXT: addiw a2, a2, -8
; RV64I-NEXT: add a0, a0, a2
-; RV64I-NEXT: sd a1, 0(a0)
+; RV64I-NEXT: sd a1, -8(a0)
; RV64I-NEXT: ret
%1 = getelementptr inbounds i64, i64* %a, i64 4095
store i64 %b, i64* %1
ret i64 %2
}
+; Make sure we don't fold the addiw into the load offset. The sign extend of the
+; addiw is required.
+define i64 @lw_really_far_local(i64* %a) {
+; RV64I-LABEL: lw_really_far_local:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lui a1, 524288
+; RV64I-NEXT: addiw a1, a1, -2048
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: ld a0, 0(a0)
+; RV64I-NEXT: ret
+ %1 = getelementptr inbounds i64, i64* %a, i64 268435200
+ %2 = load volatile i64, i64* %1
+ ret i64 %2
+}
+
+; Make sure we don't fold the addiw into the store offset. The sign extend of
+; the addiw is required.
+define void @st_really_far_local(i64* %a, i64 %b) {
+; RV64I-LABEL: st_really_far_local:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lui a2, 524288
+; RV64I-NEXT: addiw a2, a2, -2048
+; RV64I-NEXT: add a0, a0, a2
+; RV64I-NEXT: sd a1, 0(a0)
+; RV64I-NEXT: ret
+ %1 = getelementptr inbounds i64, i64* %a, i64 268435200
+ store i64 %b, i64* %1
+ ret void
+}
+
+; Make sure we don't fold the addiw into the load/store offset. The sign extend
+; of the addiw is required.
+define i64 @lw_sw_really_far_local(i64* %a, i64 %b) {
+; RV64I-LABEL: lw_sw_really_far_local:
+; RV64I: # %bb.0:
+; RV64I-NEXT: lui a2, 524288
+; RV64I-NEXT: addiw a2, a2, -2048
+; RV64I-NEXT: add a2, a0, a2
+; RV64I-NEXT: ld a0, 0(a2)
+; RV64I-NEXT: sd a1, 0(a2)
+; RV64I-NEXT: ret
+ %1 = getelementptr inbounds i64, i64* %a, i64 268435200
+ %2 = load volatile i64, i64* %1
+ store i64 %b, i64* %1
+ ret i64 %2
+}
+
%struct.quux = type { i32, [0 x i8] }
; Make sure we don't remove the addi and fold the C from