From a126d1ef3ce7392e236586dbc7389434124e59cb Mon Sep 17 00:00:00 2001 From: Juergen Ributzka Date: Tue, 5 Aug 2014 05:43:48 +0000 Subject: [PATCH] [FastISel][AArch64] Implement the FastLowerArguments hook. This implements basic argument lowering for AArch64 in FastISel. It only handles a small subset of the C calling convention. It supports simple arguments that can be passed in GPR and FPR registers. This should cover most of the trivial cases without falling back to SelectionDAG. This fixes . llvm-svn: 214846 --- llvm/lib/Target/AArch64/AArch64FastISel.cpp | 103 +++++++++++++++ llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll | 152 +++++++++++++++++++++- 2 files changed, 252 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index bf418e0..7aebf4b 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -94,6 +94,7 @@ class AArch64FastISel : public FastISel { const AArch64Subtarget *Subtarget; LLVMContext *Context; + bool FastLowerArguments() override; bool FastLowerCall(CallLoweringInfo &CLI) override; bool FastLowerIntrinsicCall(const IntrinsicInst *II) override; @@ -1313,6 +1314,108 @@ bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) { return true; } +bool AArch64FastISel::FastLowerArguments() { + if (!FuncInfo.CanLowerReturn) + return false; + + const Function *F = FuncInfo.Fn; + if (F->isVarArg()) + return false; + + CallingConv::ID CC = F->getCallingConv(); + if (CC != CallingConv::C) + return false; + + // Only handle simple cases like i1/i8/i16/i32/i64/f32/f64 of up to 8 GPR and + // FPR each. + unsigned GPRCnt = 0; + unsigned FPRCnt = 0; + unsigned Idx = 0; + for (auto const &Arg : F->args()) { + // The first argument is at index 1. + ++Idx; + if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || + F->getAttributes().hasAttribute(Idx, Attribute::InReg) || + F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || + F->getAttributes().hasAttribute(Idx, Attribute::Nest)) + return false; + + Type *ArgTy = Arg.getType(); + if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) + return false; + + EVT ArgVT = TLI.getValueType(ArgTy); + if (!ArgVT.isSimple()) return false; + switch (ArgVT.getSimpleVT().SimpleTy) { + default: return false; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + ++GPRCnt; + break; + case MVT::f16: + case MVT::f32: + case MVT::f64: + ++FPRCnt; + break; + } + + if (GPRCnt > 8 || FPRCnt > 8) + return false; + } + + static const MCPhysReg Registers[5][8] = { + { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, + AArch64::W5, AArch64::W6, AArch64::W7 }, + { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, + AArch64::X5, AArch64::X6, AArch64::X7 }, + { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, + AArch64::H5, AArch64::H6, AArch64::H7 }, + { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, + AArch64::S5, AArch64::S6, AArch64::S7 }, + { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, + AArch64::D5, AArch64::D6, AArch64::D7 } + }; + + unsigned GPRIdx = 0; + unsigned FPRIdx = 0; + for (auto const &Arg : F->args()) { + MVT VT = TLI.getSimpleValueType(Arg.getType()); + unsigned SrcReg; + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type."); + case MVT::i1: + case MVT::i8: + case MVT::i16: VT = MVT::i32; // fall-through + case MVT::i32: SrcReg = Registers[0][GPRIdx++]; break; + case MVT::i64: SrcReg = Registers[1][GPRIdx++]; break; + case MVT::f16: SrcReg = Registers[2][FPRIdx++]; break; + case MVT::f32: SrcReg = Registers[3][FPRIdx++]; break; + case MVT::f64: SrcReg = Registers[4][FPRIdx++]; break; + } + + // Skip unused arguments. + if (Arg.use_empty()) { + UpdateValueMap(&Arg, 0); + continue; + } + + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); + // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. + // Without this, EmitLiveInCopies may eliminate the livein if its only + // use is a bitcast (which isn't turned into an instruction). + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(DstReg, getKillRegState(true)); + UpdateValueMap(&Arg, ResultReg); + } + return true; +} + bool AArch64FastISel::ProcessCallArgs(CallLoweringInfo &CLI, SmallVectorImpl &OutVTs, unsigned &NumBytes) { diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll index 2a77eb4..3024858 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll @@ -1,6 +1,6 @@ -; RUN: llc -O0 -fast-isel-abort -code-model=small -mtriple=arm64-apple-darwin < %s | FileCheck %s -; RUN: llc -O0 -fast-isel-abort -code-model=large -mtriple=arm64-apple-darwin < %s | FileCheck %s --check-prefix=LARGE -; RUN: llc -O0 -fast-isel-abort -code-model=small -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=small -mtriple=arm64-apple-darwin < %s | FileCheck %s +; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=large -mtriple=arm64-apple-darwin < %s | FileCheck %s --check-prefix=LARGE +; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=small -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE define void @call0() nounwind { entry: @@ -105,3 +105,149 @@ entry: ret void } +define zeroext i1 @call_arguments1(i1 %a1, i1 %a2, i1 %a3, i1 %a4, i1 %a5, i1 %a6, i1 %a7, i1 %a8) { +; CHECK-LABEL: call_arguments1 +; CHECK: and {{w[0-9]+}}, w0, w1 +; CHECK-NEXT: and {{w[0-9]+}}, w2, w3 +; CHECK-NEXT: and {{w[0-9]+}}, w4, w5 +; CHECK-NEXT: and {{w[0-9]+}}, w6, w7 + %1 = and i1 %a1, %a2 + %2 = and i1 %a3, %a4 + %3 = and i1 %a5, %a6 + %4 = and i1 %a7, %a8 + %5 = and i1 %1, %2 + %6 = and i1 %3, %4 + %7 = and i1 %5, %6 + ret i1 %7 +} + +define i32 @call_arguments2(i8 zeroext %a1, i8 zeroext %a2, i8 zeroext %a3, i8 zeroext %a4, i8 signext %a5, i8 signext %a6, i8 signext %a7, i8 signext %a8) { +; CHECK-LABEL: call_arguments2 +; CHECK: add {{w[0-9]+}}, w0, w1 +; CHECK-NEXT: add {{w[0-9]+}}, w2, w3 +; CHECK-NEXT: add {{w[0-9]+}}, w4, w5 +; CHECK-NEXT: add {{w[0-9]+}}, w6, w7 + %a1z = zext i8 %a1 to i32 + %a2z = zext i8 %a2 to i32 + %a3z = zext i8 %a3 to i32 + %a4z = zext i8 %a4 to i32 + %a5s = sext i8 %a5 to i32 + %a6s = sext i8 %a6 to i32 + %a7s = sext i8 %a7 to i32 + %a8s = sext i8 %a8 to i32 + %1 = add i32 %a1z, %a2z + %2 = add i32 %a3z, %a4z + %3 = add i32 %a5s, %a6s + %4 = add i32 %a7s, %a8s + %5 = add i32 %1, %2 + %6 = add i32 %3, %4 + %7 = add i32 %5, %6 + ret i32 %7 +} + +define i32 @call_arguments3(i16 zeroext %a1, i16 zeroext %a2, i16 zeroext %a3, i16 zeroext %a4, i16 signext %a5, i16 signext %a6, i16 signext %a7, i16 signext %a8) { +; CHECK-LABEL: call_arguments3 +; CHECK: add {{w[0-9]+}}, w0, w1 +; CHECK-NEXT: add {{w[0-9]+}}, w2, w3 +; CHECK-NEXT: add {{w[0-9]+}}, w4, w5 +; CHECK-NEXT: add {{w[0-9]+}}, w6, w7 + %a1z = zext i16 %a1 to i32 + %a2z = zext i16 %a2 to i32 + %a3z = zext i16 %a3 to i32 + %a4z = zext i16 %a4 to i32 + %a5s = sext i16 %a5 to i32 + %a6s = sext i16 %a6 to i32 + %a7s = sext i16 %a7 to i32 + %a8s = sext i16 %a8 to i32 + %1 = add i32 %a1z, %a2z + %2 = add i32 %a3z, %a4z + %3 = add i32 %a5s, %a6s + %4 = add i32 %a7s, %a8s + %5 = add i32 %1, %2 + %6 = add i32 %3, %4 + %7 = add i32 %5, %6 + ret i32 %7 +} + +define i32 @call_arguments4(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) { +; CHECK-LABEL: call_arguments4 +; CHECK: add {{w[0-9]+}}, w0, w1 +; CHECK-NEXT: add {{w[0-9]+}}, w2, w3 +; CHECK-NEXT: add {{w[0-9]+}}, w4, w5 +; CHECK-NEXT: add {{w[0-9]+}}, w6, w7 + %1 = add i32 %a1, %a2 + %2 = add i32 %a3, %a4 + %3 = add i32 %a5, %a6 + %4 = add i32 %a7, %a8 + %5 = add i32 %1, %2 + %6 = add i32 %3, %4 + %7 = add i32 %5, %6 + ret i32 %7 +} + +define i64 @call_arguments5(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8) { +; CHECK-LABEL: call_arguments5 +; CHECK: add {{x[0-9]+}}, x0, x1 +; CHECK-NEXT: add {{x[0-9]+}}, x2, x3 +; CHECK-NEXT: add {{x[0-9]+}}, x4, x5 +; CHECK-NEXT: add {{x[0-9]+}}, x6, x7 + %1 = add i64 %a1, %a2 + %2 = add i64 %a3, %a4 + %3 = add i64 %a5, %a6 + %4 = add i64 %a7, %a8 + %5 = add i64 %1, %2 + %6 = add i64 %3, %4 + %7 = add i64 %5, %6 + ret i64 %7 +} + +define float @call_arguments6(float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8) { +; CHECK-LABEL: call_arguments6 +; CHECK: fadd {{s[0-9]+}}, s0, s1 +; CHECK-NEXT: fadd {{s[0-9]+}}, s2, s3 +; CHECK-NEXT: fadd {{s[0-9]+}}, s4, s5 +; CHECK-NEXT: fadd {{s[0-9]+}}, s6, s7 + %1 = fadd float %a1, %a2 + %2 = fadd float %a3, %a4 + %3 = fadd float %a5, %a6 + %4 = fadd float %a7, %a8 + %5 = fadd float %1, %2 + %6 = fadd float %3, %4 + %7 = fadd float %5, %6 + ret float %7 +} + +define double @call_arguments7(double %a1, double %a2, double %a3, double %a4, double %a5, double %a6, double %a7, double %a8) { +; CHECK-LABEL: call_arguments7 +; CHECK: fadd {{d[0-9]+}}, d0, d1 +; CHECK-NEXT: fadd {{d[0-9]+}}, d2, d3 +; CHECK-NEXT: fadd {{d[0-9]+}}, d4, d5 +; CHECK-NEXT: fadd {{d[0-9]+}}, d6, d7 + %1 = fadd double %a1, %a2 + %2 = fadd double %a3, %a4 + %3 = fadd double %a5, %a6 + %4 = fadd double %a7, %a8 + %5 = fadd double %1, %2 + %6 = fadd double %3, %4 + %7 = fadd double %5, %6 + ret double %7 +} + +define i64 @call_arguments8(i32 %a1, i64 %a2, i32 %a3, i64 %a4) { +; CHECK-LABEL: call_arguments8 +; CHECK: ubfx [[REG1:x[0-9]+]], {{x[0-9]+}}, #0, #32 +; CHECK: ubfx [[REG2:x[0-9]+]], {{x[0-9]+}}, #0, #32 +; CHECK: add {{x[0-9]+}}, [[REG1]], x1 +; CHECK-NEXT: add {{x[0-9]+}}, [[REG2]], x3 + %aa1 = zext i32 %a1 to i64 + %aa3 = zext i32 %a3 to i64 + %1 = add i64 %aa1, %a2 + %2 = add i64 %aa3, %a4 + %3 = add i64 %1, %2 + ret i64 %3 +} + +define void @call_arguments9(i8 %a1, i16 %a2, i32 %a3, i64 %a4, float %a5, double %a6, i64 %a7, double %a8) { +; CHECK-LABEL: call_arguments9 + ret void +} -- 2.7.4