From 9bacf1588583014538a0217add18f370acb95788 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Mon, 20 Jul 2020 14:43:50 +0100 Subject: [PATCH] [AArch64][SVE] Fix PCS for functions taking/returning scalable types. The default calling convention needs to save/restore the SVE callee saves according to the SVE PCS when the function takes or returns scalable types, even when the `aarch64_sve_vector_pcs` CC is not specified for the function. Reviewers: efriedma, paulwalker-arm, david-arm, rengolin Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D84041 --- llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp | 10 ++++++++++ .../test/CodeGen/AArch64/sve-calling-convention.ll | 23 ++++++++++++++++++++++ llvm/test/CodeGen/AArch64/sve-trunc.ll | 9 +++++++++ 3 files changed, 42 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 886158c..de1ae47 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -40,6 +40,14 @@ AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT) AArch64_MC::initLLVMToCVRegMapping(this); } +static bool hasSVEArgsOrReturn(const MachineFunction *MF) { + const Function &F = MF->getFunction(); + return isa(F.getReturnType()) || + any_of(F.args(), [](const Argument &Arg) { + return isa(Arg.getType()); + }); +} + const MCPhysReg * AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { assert(MF && "Invalid MachineFunction pointer."); @@ -75,6 +83,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { // This is for OSes other than Windows; Windows is a separate case further // above. return CSR_AArch64_AAPCS_X18_SaveList; + if (hasSVEArgsOrReturn(MF)) + return CSR_AArch64_SVE_AAPCS_SaveList; return CSR_AArch64_AAPCS_SaveList; } diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention.ll index 767a3cd..f95e749 100644 --- a/llvm/test/CodeGen/AArch64/sve-calling-convention.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -stop-after=finalize-isel < %s 2>%t | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -stop-after=prologepilog < %s 2>%t | FileCheck %s --check-prefix=CHECKCSR ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t ; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. @@ -123,3 +124,25 @@ define @sve_signature_pred_caller( %arg1, @sve_signature_pred( %arg2, %arg1) ret %res } + +; Test that functions returning or taking SVE arguments use the correct +; callee-saved set when using the default C calling convention (as opposed +; to aarch64_sve_vector_pcs) + +; CHECKCSR-LABEL: name: sve_signature_vec_ret_callee +; CHECKCSR: callee-saved-register: '$z8' +; CHECKCSR: callee-saved-register: '$p4' +; CHECKCSR: RET_ReallyLR +define @sve_signature_vec_ret_callee() nounwind { + call void asm sideeffect "nop", "~{z8},~{p4}"() + ret zeroinitializer +} + +; CHECKCSR-LABEL: name: sve_signature_vec_arg_callee +; CHECKCSR: callee-saved-register: '$z8' +; CHECKCSR: callee-saved-register: '$p4' +; CHECKCSR: RET_ReallyLR +define void @sve_signature_vec_arg_callee( %v) nounwind { + call void asm sideeffect "nop", "~{z8},~{p4}"() + ret void +} diff --git a/llvm/test/CodeGen/AArch64/sve-trunc.ll b/llvm/test/CodeGen/AArch64/sve-trunc.ll index 3743301..46d152b 100644 --- a/llvm/test/CodeGen/AArch64/sve-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-trunc.ll @@ -113,6 +113,12 @@ entry: define @trunc_i64toi1_split3( %in) { ; CHECK-LABEL: trunc_i64toi1_split3: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset p4, -16 +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z7.d, z7.d, #0x1 ; CHECK-NEXT: and z6.d, z6.d, #0x1 @@ -134,9 +140,12 @@ define @trunc_i64toi1_split3( %in) { ; CHECK-NEXT: cmpne p4.d, p0/z, z1.d, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: uzp1 p0.s, p0.s, p4.s +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: uzp1 p1.h, p3.h, p1.h ; CHECK-NEXT: uzp1 p0.h, p0.h, p2.h ; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %out = trunc %in to -- 2.7.4