From b18ea162dfef6a390f84e4be048038675a722304 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Tue, 20 Sep 2016 15:20:36 +0000 Subject: [PATCH] GlobalISel: split aggregates for PCS lowering This should match the existing behaviour for passing complicated struct and array types, in particular HFAs come through like that from Clang. For C & C++ we still need to somehow support all the weird ABI flags, or at least those that are present in the IR (signext, byval, ...), and stack-based parameter passing. llvm-svn: 281977 --- .../include/llvm/CodeGen/GlobalISel/CallLowering.h | 11 +- .../llvm/CodeGen/GlobalISel/MachineIRBuilder.h | 2 +- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 3 +- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 1 - llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 3 +- .../CodeGen/GlobalISel/MachineLegalizeHelper.cpp | 6 +- llvm/lib/Target/AArch64/AArch64CallLowering.cpp | 165 ++++++++++++++++----- llvm/lib/Target/AArch64/AArch64CallLowering.h | 12 +- .../CodeGen/AArch64/GlobalISel/call-translator.ll | 50 +++++++ 9 files changed, 199 insertions(+), 54 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index 451d5d2..62bd0f4 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -75,11 +75,10 @@ class CallLowering { /// \p Callee is the destination of the call. It should be either a register, /// globaladdress, or externalsymbol. /// - /// \p ResTys is a list of the individual result types this function call will - /// produce. The types are used to assign physical registers to each slot. + /// \p ResTy is the type returned by the function /// - /// \p ResRegs is a list of the virtual registers that we expect to be defined - /// by this call, one per entry in \p ResTys. + /// \p ResReg is the generic virtual register that the returned + /// value should be lowered into. /// /// \p ArgTys is a list of the types each member of \p ArgRegs has; used by /// the target to decide which register/stack slot should be allocated. @@ -89,8 +88,8 @@ class CallLowering { /// /// \return true if the lowering succeeded, false otherwise. virtual bool lowerCall(MachineIRBuilder &MIRBuilder, - const MachineOperand &Callee, ArrayRef ResTys, - ArrayRef ResRegs, ArrayRef ArgTys, + const MachineOperand &Callee, Type * ResTy, + unsigned ResReg, ArrayRef ArgTys, ArrayRef ArgRegs) const { return false; } diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 4795aad..3a9dd8f 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -363,7 +363,7 @@ public: /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildSequence(unsigned Res, ArrayRef Ops, - ArrayRef Indices); + ArrayRef Indices); void addUsesWithIndices(MachineInstrBuilder MIB) {} diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 4ce643d..de63280 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -35,6 +35,5 @@ bool CallLowering::lowerCall( else Callee = MachineOperand::CreateReg(GetCalleeReg(), false); - return lowerCall(MIRBuilder, Callee, CI.getType(), - ResReg ? ResReg : ArrayRef(), ArgTys, ArgRegs); + return lowerCall(MIRBuilder, Callee, CI.getType(), ResReg, ArgTys, ArgRegs); } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index a6aeec4..4d4a696 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -399,7 +399,6 @@ bool IRTranslator::translateCall(const User &U) { const Function *F = CI.getCalledFunction(); if (!F || !F->isIntrinsic()) { - // FIXME: handle multiple return values. unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI); SmallVector Args; for (auto &Arg: CI.arg_operands()) diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index afb8079..e530e8d 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -33,6 +33,7 @@ void MachineIRBuilder::setMF(MachineFunction &MF) { void MachineIRBuilder::setMBB(MachineBasicBlock &MBB, bool Beginning) { this->MBB = &MBB; + this->MI = nullptr; Before = Beginning; assert(&getMF() == MBB.getParent() && "Basic block is in a different function"); @@ -274,7 +275,7 @@ MachineInstrBuilder MachineIRBuilder::buildExtract(ArrayRef Results, MachineInstrBuilder MachineIRBuilder::buildSequence(unsigned Res, ArrayRef Ops, - ArrayRef Indices) { + ArrayRef Indices) { #ifndef NDEBUG assert(Ops.size() == Indices.size() && "incompatible args"); assert(!Ops.empty() && "invalid trivial sequence"); diff --git a/llvm/lib/CodeGen/GlobalISel/MachineLegalizeHelper.cpp b/llvm/lib/CodeGen/GlobalISel/MachineLegalizeHelper.cpp index 6d18139..ede2bc2 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineLegalizeHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineLegalizeHelper.cpp @@ -135,7 +135,8 @@ MachineLegalizeHelper::narrowScalar(MachineInstr &MI, unsigned TypeIdx, MIRBuilder.setInstr(MI); - SmallVector Src1Regs, Src2Regs, DstRegs, Indexes; + SmallVector Src1Regs, Src2Regs, DstRegs; + SmallVector Indexes; extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); @@ -333,7 +334,8 @@ MachineLegalizeHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, MIRBuilder.setInstr(MI); - SmallVector Src1Regs, Src2Regs, DstRegs, Indexes; + SmallVector Src1Regs, Src2Regs, DstRegs; + SmallVector Indexes; extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); diff --git a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp index 062627b..6e31813 100644 --- a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp @@ -17,7 +17,9 @@ #include "AArch64ISelLowering.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -30,29 +32,6 @@ AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI) : CallLowering(&TLI) { } -bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, - const Value *Val, unsigned VReg) const { - MachineFunction &MF = MIRBuilder.getMF(); - const Function &F = *MF.getFunction(); - - MachineInstrBuilder MIB = MIRBuilder.buildInstr(AArch64::RET_ReallyLR); - assert(MIB.getInstr() && "Unable to build a return instruction?!"); - - assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg"); - if (VReg) { - MIRBuilder.setInstr(*MIB.getInstr(), /* Before */ true); - const AArch64TargetLowering &TLI = *getTLI(); - CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv()); - - handleAssignments(MIRBuilder, AssignFn, Val->getType(), VReg, - [&](MachineIRBuilder &MIRBuilder, Type *Ty, - unsigned ValReg, unsigned PhysReg) { - MIRBuilder.buildCopy(PhysReg, ValReg); - MIB.addUse(PhysReg, RegState::Implicit); - }); - } - return true; -} bool AArch64CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, CCAssignFn *AssignFn, @@ -107,36 +86,130 @@ bool AArch64CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, return true; } +void AArch64CallLowering::splitToValueTypes( + unsigned Reg, Type *Ty, SmallVectorImpl &SplitRegs, + SmallVectorImpl &SplitTys, const DataLayout &DL, + MachineRegisterInfo &MRI, SplitArgTy SplitArg) const { + const AArch64TargetLowering &TLI = *getTLI(); + LLVMContext &Ctx = Ty->getContext(); + + SmallVector SplitVTs; + SmallVector Offsets; + ComputeValueVTs(TLI, DL, Ty, SplitVTs, &Offsets, 0); + + if (SplitVTs.size() == 1) { + // No splitting to do, just forward the input directly. + SplitTys.push_back(Ty); + SplitRegs.push_back(Reg); + return; + } + + unsigned FirstRegIdx = SplitRegs.size(); + for (auto SplitVT : SplitVTs) { + Type *SplitTy = SplitVT.getTypeForEVT(Ctx); + SplitRegs.push_back(MRI.createGenericVirtualRegister(LLT{*SplitTy, DL})); + SplitTys.push_back(SplitTy); + } + + SmallVector BitOffsets; + for (auto Offset : Offsets) + BitOffsets.push_back(Offset * 8); + + SplitArg(ArrayRef(&SplitRegs[FirstRegIdx], SplitRegs.end()), + BitOffsets); +} + +bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, + const Value *Val, unsigned VReg) const { + MachineFunction &MF = MIRBuilder.getMF(); + const Function &F = *MF.getFunction(); + + MachineInstrBuilder MIB = MIRBuilder.buildInstr(AArch64::RET_ReallyLR); + assert(MIB.getInstr() && "Unable to build a return instruction?!"); + + assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg"); + if (VReg) { + MIRBuilder.setInstr(*MIB.getInstr(), /* Before */ true); + const AArch64TargetLowering &TLI = *getTLI(); + CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv()); + MachineRegisterInfo &MRI = MF.getRegInfo(); + auto &DL = F.getParent()->getDataLayout(); + + SmallVector SplitTys; + SmallVector SplitRegs; + splitToValueTypes(VReg, Val->getType(), SplitRegs, SplitTys, DL, MRI, + [&](ArrayRef Regs, ArrayRef Offsets) { + MIRBuilder.buildExtract(Regs, Offsets, VReg); + }); + + return handleAssignments(MIRBuilder, AssignFn, SplitTys, SplitRegs, + [&](MachineIRBuilder &MIRBuilder, Type *Ty, + unsigned ValReg, unsigned PhysReg) { + MIRBuilder.buildCopy(PhysReg, ValReg); + MIB.addUse(PhysReg, RegState::Implicit); + }); + } + return true; +} + bool AArch64CallLowering::lowerFormalArguments( MachineIRBuilder &MIRBuilder, const Function::ArgumentListType &Args, ArrayRef VRegs) const { MachineFunction &MF = MIRBuilder.getMF(); + MachineBasicBlock &MBB = MIRBuilder.getMBB(); + MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = *MF.getFunction(); + auto &DL = F.getParent()->getDataLayout(); + + SmallVector Seqs; + SmallVector SplitTys; + SmallVector SplitRegs; + unsigned i = 0; + for (auto &Arg : Args) { + splitToValueTypes(VRegs[i], Arg.getType(), SplitRegs, SplitTys, DL, MRI, + [&](ArrayRef Regs, ArrayRef Offsets) { + MIRBuilder.buildSequence(VRegs[i], Regs, Offsets); + }); + ++i; + } - SmallVector ArgTys; - for (auto &Arg : Args) - ArgTys.push_back(Arg.getType()); + if (!MBB.empty()) + MIRBuilder.setInstr(*MBB.begin()); const AArch64TargetLowering &TLI = *getTLI(); CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false); - return handleAssignments(MIRBuilder, AssignFn, ArgTys, VRegs, - [](MachineIRBuilder &MIRBuilder, Type *Ty, - unsigned ValReg, unsigned PhysReg) { - MIRBuilder.getMBB().addLiveIn(PhysReg); - MIRBuilder.buildCopy(ValReg, PhysReg); - }); + bool Res = handleAssignments(MIRBuilder, AssignFn, SplitTys, SplitRegs, + [](MachineIRBuilder &MIRBuilder, Type *Ty, + unsigned ValReg, unsigned PhysReg) { + MIRBuilder.getMBB().addLiveIn(PhysReg); + MIRBuilder.buildCopy(ValReg, PhysReg); + }); + + // Move back to the end of the basic block. + MIRBuilder.setMBB(MBB); + + return Res; } bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, - const MachineOperand &Callee, - ArrayRef ResTys, - ArrayRef ResRegs, - ArrayRef ArgTys, + const MachineOperand &Callee, Type *ResTy, + unsigned ResReg, ArrayRef ArgTys, ArrayRef ArgRegs) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = *MF.getFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + auto &DL = F.getParent()->getDataLayout(); + + SmallVector SplitTys; + SmallVector SplitRegs; + for (unsigned i = 0; i < ArgTys.size(); ++i) { + splitToValueTypes(ArgRegs[i], ArgTys[i], SplitRegs, SplitTys, DL, MRI, + [&](ArrayRef Regs, ArrayRef Offsets) { + MIRBuilder.buildExtract(Regs, Offsets, ArgRegs[i]); + }); + } // Find out which ABI gets to decide where things go. const AArch64TargetLowering &TLI = *getTLI(); @@ -146,7 +219,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // And finally we can do the actual assignments. For a call we need to keep // track of the registers used because they'll be implicit uses of the BL. SmallVector PhysRegs; - handleAssignments(MIRBuilder, CallAssignFn, ArgTys, ArgRegs, + handleAssignments(MIRBuilder, CallAssignFn, SplitTys, SplitRegs, [&](MachineIRBuilder &MIRBuilder, Type *Ty, unsigned ValReg, unsigned PhysReg) { MIRBuilder.buildCopy(PhysReg, ValReg); @@ -168,13 +241,27 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // symmetry with the arugments, the physical register must be an // implicit-define of the call instruction. CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(F.getCallingConv()); - if (!ResRegs.empty()) - handleAssignments(MIRBuilder, RetAssignFn, ResTys, ResRegs, + if (ResReg) { + SplitTys.clear(); + SplitRegs.clear(); + + SmallVector RegOffsets; + splitToValueTypes(ResReg, ResTy, SplitRegs, SplitTys, DL, MRI, + [&](ArrayRef Regs, ArrayRef Offsets) { + std::copy(Offsets.begin(), Offsets.end(), + std::back_inserter(RegOffsets)); + }); + + handleAssignments(MIRBuilder, RetAssignFn, SplitTys, SplitRegs, [&](MachineIRBuilder &MIRBuilder, Type *Ty, unsigned ValReg, unsigned PhysReg) { MIRBuilder.buildCopy(ValReg, PhysReg); MIB.addDef(PhysReg, RegState::Implicit); }); + if (!RegOffsets.empty()) + MIRBuilder.buildSequence(ResReg, SplitRegs, RegOffsets); + } + return true; } diff --git a/llvm/lib/Target/AArch64/AArch64CallLowering.h b/llvm/lib/Target/AArch64/AArch64CallLowering.h index 1588ad4..05b415f 100644 --- a/llvm/lib/Target/AArch64/AArch64CallLowering.h +++ b/llvm/lib/Target/AArch64/AArch64CallLowering.h @@ -35,14 +35,22 @@ class AArch64CallLowering: public CallLowering { ArrayRef VRegs) const override; bool lowerCall(MachineIRBuilder &MIRBuilder, const MachineOperand &Callee, - ArrayRef ResTys, ArrayRef ResRegs, - ArrayRef ArgTys, + Type *ResTy, unsigned ResReg, ArrayRef ArgTys, ArrayRef ArgRegs) const override; private: typedef std::function AssignFnTy; + typedef std::function, ArrayRef)> + SplitArgTy; + + void splitToValueTypes(unsigned Reg, Type *Ty, + SmallVectorImpl &SplitRegs, + SmallVectorImpl &SplitTys, + const DataLayout &DL, MachineRegisterInfo &MRI, + SplitArgTy SplitArg) const; + bool handleAssignments(MachineIRBuilder &MIRBuilder, CCAssignFn *AssignFn, ArrayRef ArgsTypes, ArrayRef ArgRegs, AssignFnTy AssignValToReg) const; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll index 5afd47d..8ab17d3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator.ll @@ -51,3 +51,53 @@ define void @test_multiple_args(i64 %in) { call void @multiple_args_callee(i32 42, i64 %in) ret void } + + +; CHECK-LABEL: name: test_struct_formal +; CHECK: [[DBL:%[0-9]+]](s64) = COPY %d0 +; CHECK: [[I64:%[0-9]+]](s64) = COPY %x0 +; CHECK: [[I8:%[0-9]+]](s8) = COPY %w1 +; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x2 +; CHECK: [[ARG:%[0-9]+]](s192) = G_SEQUENCE [[DBL]](s64), 0, [[I64]](s64), 64, [[I8]](s8), 128 +; CHECK: G_STORE [[ARG]](s192), [[ADDR]](p0) +; CHECK: RET_ReallyLR +define void @test_struct_formal({double, i64, i8} %in, {double, i64, i8}* %addr) { + store {double, i64, i8} %in, {double, i64, i8}* %addr + ret void +} + + +; CHECK-LABEL: name: test_struct_return +; CHECK: [[ADDR:%[0-9]+]](p0) = COPY %x0 +; CHECK: [[VAL:%[0-9]+]](s192) = G_LOAD [[ADDR]](p0) +; CHECK: [[DBL:%[0-9]+]](s64), [[I64:%[0-9]+]](s64), [[I32:%[0-9]+]](s32) = G_EXTRACT [[VAL]](s192), 0, 64, 128 +; CHECK: %d0 = COPY [[DBL]](s64) +; CHECK: %x0 = COPY [[I64]](s64) +; CHECK: %w1 = COPY [[I32]](s32) +; CHECK: RET_ReallyLR implicit %d0, implicit %x0, implicit %w1 +define {double, i64, i32} @test_struct_return({double, i64, i32}* %addr) { + %val = load {double, i64, i32}, {double, i64, i32}* %addr + ret {double, i64, i32} %val +} + +; CHECK-LABEL: name: test_arr_call +; CHECK: [[ARG:%[0-9]+]](s256) = G_LOAD +; CHECK: [[E0:%[0-9]+]](s64), [[E1:%[0-9]+]](s64), [[E2:%[0-9]+]](s64), [[E3:%[0-9]+]](s64) = G_EXTRACT [[ARG]](s256), 0, 64, 128, 192 +; CHECK: %x0 = COPY [[E0]](s64) +; CHECK: %x1 = COPY [[E1]](s64) +; CHECK: %x2 = COPY [[E2]](s64) +; CHECK: %x3 = COPY [[E3]](s64) +; CHECK: BL @arr_callee, csr_aarch64_aapcs, implicit-def %lr, implicit %sp, implicit %x0, implicit %x1, implicit %x2, implicit %x3, implicit-def %x0, implicit-def %x1, implicit-def %x2, implicit-def %x3 +; CHECK: [[E0:%[0-9]+]](s64) = COPY %x0 +; CHECK: [[E1:%[0-9]+]](s64) = COPY %x1 +; CHECK: [[E2:%[0-9]+]](s64) = COPY %x2 +; CHECK: [[E3:%[0-9]+]](s64) = COPY %x3 +; CHECK: [[RES:%[0-9]+]](s256) = G_SEQUENCE [[E0]](s64), 0, [[E1]](s64), 64, [[E2]](s64), 128, [[E3]](s64), 192 +; CHECK: G_EXTRACT [[RES]](s256), 64 +declare [4 x i64] @arr_callee([4 x i64]) +define i64 @test_arr_call([4 x i64]* %addr) { + %arg = load [4 x i64], [4 x i64]* %addr + %res = call [4 x i64] @arr_callee([4 x i64] %arg) + %val = extractvalue [4 x i64] %res, 1 + ret i64 %val +} -- 2.7.4