From 23ae35e858da37c753b8efaac965046358ec3818 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 28 Feb 2021 11:35:37 -0500 Subject: [PATCH] X86/GlobalISel: Use generic version of splitToValueTypes The custom insert of an unmerge and the callback weirdness should be unnecessary. Since handleAssignments should now use getRegisterTypeForCalling conv as SelectionDAG builder would, this should now just be able to use the generic code. X86-32 relies on the generated CCAssignFns not seeing illegal types and sharing code with x86_64, so i64 values would incorrectly be assigned to 64-bit registers. --- llvm/lib/Target/X86/X86CallLowering.cpp | 88 +++------------------- llvm/lib/Target/X86/X86CallLowering.h | 9 --- .../X86/GlobalISel/irtranslator-callingconv.ll | 18 ++--- 3 files changed, 19 insertions(+), 96 deletions(-) diff --git a/llvm/lib/Target/X86/X86CallLowering.cpp b/llvm/lib/Target/X86/X86CallLowering.cpp index aa7861f..740f01f 100644 --- a/llvm/lib/Target/X86/X86CallLowering.cpp +++ b/llvm/lib/Target/X86/X86CallLowering.cpp @@ -50,50 +50,6 @@ using namespace llvm; X86CallLowering::X86CallLowering(const X86TargetLowering &TLI) : CallLowering(&TLI) {} -// FIXME: This should be removed and the generic version used -bool X86CallLowering::splitToValueTypes(const ArgInfo &OrigArg, - SmallVectorImpl &SplitArgs, - const DataLayout &DL, - MachineRegisterInfo &MRI, - SplitArgTy PerformArgSplit) const { - const X86TargetLowering &TLI = *getTLI(); - LLVMContext &Context = OrigArg.Ty->getContext(); - - SmallVector SplitVTs; - SmallVector Offsets; - ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); - assert(OrigArg.Regs.size() == 1 && "Can't handle multple regs yet"); - - if (OrigArg.Ty->isVoidTy()) - return true; - - EVT VT = SplitVTs[0]; - unsigned NumParts = TLI.getNumRegisters(Context, VT); - - if (NumParts == 1) { - // replace the original type ( pointer -> GPR ). - SplitArgs.emplace_back(OrigArg.Regs[0], VT.getTypeForEVT(Context), - OrigArg.Flags, OrigArg.IsFixed); - return true; - } - - SmallVector SplitRegs; - - EVT PartVT = TLI.getRegisterType(Context, VT); - Type *PartTy = PartVT.getTypeForEVT(Context); - - for (unsigned i = 0; i < NumParts; ++i) { - ArgInfo Info = - ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*PartTy, DL)), - PartTy, OrigArg.Flags}; - SplitArgs.push_back(Info); - SplitRegs.push_back(Info.Regs[0]); - } - - PerformArgSplit(SplitRegs); - return true; -} - namespace { struct X86OutgoingValueHandler : public CallLowering::OutgoingValueHandler { @@ -179,27 +135,15 @@ bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Function &F = MF.getFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); const DataLayout &DL = MF.getDataLayout(); - LLVMContext &Ctx = Val->getType()->getContext(); - const X86TargetLowering &TLI = *getTLI(); - - SmallVector SplitEVTs; - ComputeValueVTs(TLI, DL, Val->getType(), SplitEVTs); - assert(VRegs.size() == SplitEVTs.size() && - "For each split Type there should be exactly one VReg."); - - SmallVector SplitArgs; - for (unsigned i = 0; i < SplitEVTs.size(); ++i) { - ArgInfo CurArgInfo = ArgInfo{VRegs[i], SplitEVTs[i].getTypeForEVT(Ctx)}; - setArgFlags(CurArgInfo, AttributeList::ReturnIndex, DL, F); - if (!splitToValueTypes(CurArgInfo, SplitArgs, DL, MRI, - [&](ArrayRef Regs) { - MIRBuilder.buildUnmerge(Regs, VRegs[i]); - })) - return false; - } + + ArgInfo OrigRetInfo(VRegs, Val->getType()); + setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F); + + SmallVector SplitRetInfos; + splitToValueTypes(OrigRetInfo, SplitRetInfos, DL, F.getCallingConv()); X86OutgoingValueHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86); - if (!handleAssignments(MIRBuilder, SplitArgs, Handler, F.getCallingConv(), + if (!handleAssignments(MIRBuilder, SplitRetInfos, Handler, F.getCallingConv(), F.isVarArg())) return false; } @@ -312,11 +256,7 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, ArgInfo OrigArg(VRegs[Idx], Arg.getType()); setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F); - if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI, - [&](ArrayRef Regs) { - MIRBuilder.buildMerge(VRegs[Idx][0], Regs); - })) - return false; + splitToValueTypes(OrigArg, SplitArgs, DL, F.getCallingConv()); Idx++; } @@ -374,11 +314,7 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (OrigArg.Regs.size() > 1) return false; - if (!splitToValueTypes(OrigArg, SplitArgs, DL, MRI, - [&](ArrayRef Regs) { - MIRBuilder.buildUnmerge(Regs, OrigArg.Regs[0]); - })) - return false; + splitToValueTypes(OrigArg, SplitArgs, DL, Info.CallConv); } // Do the actual argument marshalling. X86OutgoingValueHandler Handler(MIRBuilder, MRI, MIB, CC_X86); @@ -425,11 +361,7 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, SplitArgs.clear(); SmallVector NewRegs; - if (!splitToValueTypes(Info.OrigRet, SplitArgs, DL, MRI, - [&](ArrayRef Regs) { - NewRegs.assign(Regs.begin(), Regs.end()); - })) - return false; + splitToValueTypes(Info.OrigRet, SplitArgs, DL, Info.CallConv); CallReturnHandler Handler(MIRBuilder, MRI, RetCC_X86, MIB); if (!handleAssignments(MIRBuilder, SplitArgs, Handler, Info.CallConv, diff --git a/llvm/lib/Target/X86/X86CallLowering.h b/llvm/lib/Target/X86/X86CallLowering.h index 9390122..ac5b92b 100644 --- a/llvm/lib/Target/X86/X86CallLowering.h +++ b/llvm/lib/Target/X86/X86CallLowering.h @@ -38,15 +38,6 @@ public: bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override; - -private: - /// A function of this type is used to perform value split action. - using SplitArgTy = std::function)>; - - bool splitToValueTypes(const ArgInfo &OrigArgInfo, - SmallVectorImpl &SplitArgs, - const DataLayout &DL, MachineRegisterInfo &MRI, - SplitArgTy SplitArg) const; }; } // end namespace llvm diff --git a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll index fb51031..71e24b6 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll @@ -148,41 +148,41 @@ define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg ; X86: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 4 from %fixed-stack.15, align 16) ; X86: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.14 ; X86: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 4 from %fixed-stack.14) + ; X86: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) ; X86: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.13 ; X86: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load 4 from %fixed-stack.13, align 8) ; X86: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.12 ; X86: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load 4 from %fixed-stack.12) + ; X86: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) ; X86: [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.11 ; X86: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (invariant load 4 from %fixed-stack.11, align 16) ; X86: [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.10 ; X86: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (invariant load 4 from %fixed-stack.10) + ; X86: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) ; X86: [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.9 ; X86: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (invariant load 4 from %fixed-stack.9, align 8) ; X86: [[FRAME_INDEX7:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.8 ; X86: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (invariant load 4 from %fixed-stack.8) + ; X86: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) ; X86: [[FRAME_INDEX8:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.7 ; X86: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p0) :: (invariant load 4 from %fixed-stack.7, align 16) ; X86: [[FRAME_INDEX9:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.6 ; X86: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p0) :: (invariant load 4 from %fixed-stack.6) + ; X86: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD8]](s32), [[LOAD9]](s32) ; X86: [[FRAME_INDEX10:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.5 ; X86: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p0) :: (invariant load 4 from %fixed-stack.5, align 8) ; X86: [[FRAME_INDEX11:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.4 ; X86: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p0) :: (invariant load 4 from %fixed-stack.4) + ; X86: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD10]](s32), [[LOAD11]](s32) ; X86: [[FRAME_INDEX12:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 ; X86: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p0) :: (invariant load 4 from %fixed-stack.3, align 16) ; X86: [[FRAME_INDEX13:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 ; X86: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p0) :: (invariant load 4 from %fixed-stack.2) + ; X86: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD12]](s32), [[LOAD13]](s32) ; X86: [[FRAME_INDEX14:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 ; X86: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p0) :: (invariant load 4 from %fixed-stack.1, align 8) ; X86: [[FRAME_INDEX15:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 ; X86: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p0) :: (invariant load 4 from %fixed-stack.0) - ; X86: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32) - ; X86: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; X86: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; X86: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32) - ; X86: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD8]](s32), [[LOAD9]](s32) - ; X86: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD10]](s32), [[LOAD11]](s32) - ; X86: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD12]](s32), [[LOAD13]](s32) ; X86: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD14]](s32), [[LOAD15]](s32) ; X86: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_64bit ; X86: [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_64bit @@ -489,10 +489,10 @@ define <8 x i32> @test_split_return_callee(<8 x i32> %arg1, <8 x i32> %arg2) { ; X86: liveins: $xmm0, $xmm1, $xmm2 ; X86: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0 ; X86: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1 + ; X86: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>) ; X86: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $xmm2 ; X86: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 ; X86: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 16 from %fixed-stack.0) - ; X86: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>) ; X86: [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY2]](<4 x s32>), [[LOAD]](<4 x s32>) ; X86: ADJCALLSTACKDOWN32 0, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp ; X86: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<8 x s32>) @@ -513,9 +513,9 @@ define <8 x i32> @test_split_return_callee(<8 x i32> %arg1, <8 x i32> %arg2) { ; X64: liveins: $xmm0, $xmm1, $xmm2, $xmm3 ; X64: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0 ; X64: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1 + ; X64: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>) ; X64: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $xmm2 ; X64: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $xmm3 - ; X64: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>) ; X64: [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY2]](<4 x s32>), [[COPY3]](<4 x s32>) ; X64: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp ; X64: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<8 x s32>) -- 2.7.4