return true;
}
+static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
+ return DAG.getDataLayout().getPrefTypeAlign(
+ VT.getTypeForEVT(*DAG.getContext()));
+}
+
// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
// and output parameter nodes.
SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// For now, only handle fully promoted and indirect arguments.
if (VA.getLocInfo() == CCValAssign::Indirect) {
// Store the argument in a stack slot and pass its address.
- SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
- int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
- MemOpChains.push_back(
- DAG.getStore(Chain, DL, ArgValue, SpillSlot,
- MachinePointerInfo::getFixedStack(MF, FI)));
+ Align StackAlign =
+ std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
+ getPrefTypeAlign(ArgValue.getValueType(), DAG));
+ TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
// If the original argument was split (e.g. i128), we need
// to store the required parts of it here (and pass just one address).
// Vectors may be partly split to registers and partly to the stack, in
unsigned ArgIndex = Outs[i].OrigArgIndex;
unsigned ArgPartOffset = Outs[i].PartOffset;
assert(VA.getValVT().isVector() || ArgPartOffset == 0);
+ // Calculate the total size to store. We don't have access to what we're
+ // actually storing other than performing the loop and collecting the
+ // info.
+ SmallVector<std::pair<SDValue, unsigned>> Parts;
while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
SDValue PartValue = OutVals[i + 1];
unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
+ EVT PartVT = PartValue.getValueType();
+ StoredSize += PartVT.getStoreSize();
+ StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
+ Parts.push_back(std::make_pair(PartValue, PartOffset));
+ ++i;
+ }
+ SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
+ int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+ MemOpChains.push_back(
+ DAG.getStore(Chain, DL, ArgValue, SpillSlot,
+ MachinePointerInfo::getFixedStack(MF, FI)));
+ for (const auto &Part : Parts) {
+ SDValue PartValue = Part.first;
+ unsigned PartOffset = Part.second;
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
DAG.getIntPtrConstant(PartOffset, DL));
MemOpChains.push_back(
DAG.getStore(Chain, DL, PartValue, Address,
MachinePointerInfo::getFixedStack(MF, FI)));
- ++i;
}
ArgValue = SpillSlot;
} else {
;
; LMULMAX2-LABEL: call_split_vector_args:
; LMULMAX2: # %bb.0:
-; LMULMAX2-NEXT: addi sp, sp, -256
-; LMULMAX2-NEXT: .cfi_def_cfa_offset 256
-; LMULMAX2-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
-; LMULMAX2-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
+; LMULMAX2-NEXT: addi sp, sp, -128
+; LMULMAX2-NEXT: .cfi_def_cfa_offset 128
+; LMULMAX2-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; LMULMAX2-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
; LMULMAX2-NEXT: .cfi_offset ra, -8
; LMULMAX2-NEXT: .cfi_offset s0, -16
-; LMULMAX2-NEXT: addi s0, sp, 256
+; LMULMAX2-NEXT: addi s0, sp, 128
; LMULMAX2-NEXT: .cfi_def_cfa s0, 0
; LMULMAX2-NEXT: andi sp, sp, -128
; LMULMAX2-NEXT: vsetivli a2, 2, e32,m1,ta,mu
; LMULMAX2-NEXT: vmv1r.v v12, v8
; LMULMAX2-NEXT: vmv2r.v v22, v14
; LMULMAX2-NEXT: call split_vector_args@plt
-; LMULMAX2-NEXT: addi sp, s0, -256
-; LMULMAX2-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
-; LMULMAX2-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
-; LMULMAX2-NEXT: addi sp, sp, 256
+; LMULMAX2-NEXT: addi sp, s0, -128
+; LMULMAX2-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; LMULMAX2-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; LMULMAX2-NEXT: addi sp, sp, 128
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: call_split_vector_args:
; LMULMAX1: # %bb.0:
-; LMULMAX1-NEXT: addi sp, sp, -256
-; LMULMAX1-NEXT: .cfi_def_cfa_offset 256
-; LMULMAX1-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
-; LMULMAX1-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
+; LMULMAX1-NEXT: addi sp, sp, -128
+; LMULMAX1-NEXT: .cfi_def_cfa_offset 128
+; LMULMAX1-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
+; LMULMAX1-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
; LMULMAX1-NEXT: .cfi_offset ra, -8
; LMULMAX1-NEXT: .cfi_offset s0, -16
-; LMULMAX1-NEXT: addi s0, sp, 256
+; LMULMAX1-NEXT: addi s0, sp, 128
; LMULMAX1-NEXT: .cfi_def_cfa s0, 0
; LMULMAX1-NEXT: andi sp, sp, -128
; LMULMAX1-NEXT: vsetivli a2, 2, e32,m1,ta,mu
; LMULMAX1-NEXT: vmv1r.v v22, v14
; LMULMAX1-NEXT: vmv1r.v v23, v15
; LMULMAX1-NEXT: call split_vector_args@plt
-; LMULMAX1-NEXT: addi sp, s0, -256
-; LMULMAX1-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
-; LMULMAX1-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
-; LMULMAX1-NEXT: addi sp, sp, 256
+; LMULMAX1-NEXT: addi sp, s0, -128
+; LMULMAX1-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
+; LMULMAX1-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
+; LMULMAX1-NEXT: addi sp, sp, 128
; LMULMAX1-NEXT: ret
%a = load <2 x i32>, <2 x i32>* %pa
%b = load <32 x i32>, <32 x i32>* %pb
declare void @callee160(i160)
declare void @callee161(i161)
-; FIXME: Stack write clobbers the spilled value (on RV64).
define i32 @caller129() nounwind {
; RV32I-LABEL: caller129:
; RV32I: # %bb.0:
;
; RV64I-LABEL: caller129:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT: addi a0, zero, 42
-; RV64I-NEXT: sw a0, 20(sp)
+; RV64I-NEXT: sw a0, 36(sp)
; RV64I-NEXT: sd zero, 16(sp)
; RV64I-NEXT: sd zero, 8(sp)
; RV64I-NEXT: mv a0, sp
; RV64I-NEXT: sd zero, 0(sp)
; RV64I-NEXT: call callee129@plt
-; RV64I-NEXT: lw a0, 20(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: lw a0, 36(sp)
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
%1 = alloca i32
store i32 42, i32* %1
ret i32 %2
}
-; FIXME: Stack write clobbers the spilled value (on RV64).
define i32 @caller160() nounwind {
; RV32I-LABEL: caller160:
; RV32I: # %bb.0:
;
; RV64I-LABEL: caller160:
; RV64I: # %bb.0:
-; RV64I-NEXT: addi sp, sp, -32
-; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT: addi sp, sp, -48
+; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT: addi a0, zero, 42
-; RV64I-NEXT: sw a0, 20(sp)
+; RV64I-NEXT: sw a0, 36(sp)
; RV64I-NEXT: sd zero, 16(sp)
; RV64I-NEXT: sd zero, 8(sp)
; RV64I-NEXT: mv a0, sp
; RV64I-NEXT: sd zero, 0(sp)
; RV64I-NEXT: call callee160@plt
-; RV64I-NEXT: lw a0, 20(sp)
-; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT: addi sp, sp, 32
+; RV64I-NEXT: lw a0, 36(sp)
+; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
%1 = alloca i32
store i32 42, i32* %1
; RUN: llc -mtriple=riscv32 -stop-after finalize-isel < %s | FileCheck %s -check-prefix=RV32
; RUN: llc -mtriple=riscv64 -stop-after finalize-isel < %s | FileCheck %s -check-prefix=RV64
-; FIXME: The stack location used to pass the parameter to the function has the
-; incorrect size and alignment for how we use it, and we clobber the stack.
-
declare void @callee(<4 x i8> %v)
define void @caller() {
; RV32-LABEL: name: caller
; RV32: stack:
- ; RV32: - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
+ ; RV32: - { id: 0, name: '', type: default, offset: 0, size: 16, alignment: 4,
; RV32-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
; RV32-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
; RV32: bb.0 (%ir-block.0):
; RV32: PseudoRET
; RV64-LABEL: name: caller
; RV64: stack:
- ; RV64: - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
+ ; RV64: - { id: 0, name: '', type: default, offset: 0, size: 32, alignment: 8,
; RV64-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
; RV64-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
; RV64: bb.0 (%ir-block.0):