From 1c4108ab661d43e21b1d1c804d8a403e5b0cf7d6 Mon Sep 17 00:00:00 2001 From: "Liu, Chen3" Date: Fri, 2 Apr 2021 08:45:26 +0800 Subject: [PATCH] [i386] Modify the alignment of __m128/__m256/__m512 vector type according i386 abi. According to i386 System V ABI: 1. when __m256 are required to be passed on the stack, the stack pointer must be aligned on a 0 mod 32 byte boundary at the time of the call. 2. when __m512 are required to be passed on the stack, the stack pointer must be aligned on a 0 mod 64 byte boundary at the time of the call. The current method of clang passing __m512 parameter are as follow: 1. when target supports avx512, passing it with 64 byte alignment; 2. when target supports avx, passing it with 32 byte alignment; 3. Otherwise, passing it with 16 byte alignment. Passing __m256 parameter are as follow: 1. when target supports avx or avx512, passing it with 32 byte alignment; 2. Otherwise, passing it with 16 byte alignment. This pach will passing __m128/__m256/__m512 following i386 System V ABI and apply it to Linux only since other System V OS (e.g Darwin, PS4 and FreeBSD) don't want to spend any effort dealing with the ramifications of ABI breaks at present. Differential Revision: https://reviews.llvm.org/D78564 --- clang/lib/CodeGen/TargetInfo.cpp | 13 +++++-- clang/test/CodeGen/x86_32-align-linux.c | 60 +++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/x86_32-align-linux.c diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 7f4deb21..55e3874 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -1105,6 +1105,7 @@ class X86_32ABIInfo : public SwiftABIInfo { bool IsWin32StructABI; bool IsSoftFloatABI; bool IsMCUABI; + bool IsLinuxABI; unsigned DefaultNumRegisterParameters; static bool isRegisterSize(unsigned Size) { @@ -1167,9 +1168,9 @@ public: unsigned NumRegisterParameters, bool SoftFloatABI) : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), IsRetSmallStructInRegABI(RetSmallStructInRegABI), - IsWin32StructABI(Win32StructABI), - IsSoftFloatABI(SoftFloatABI), + IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), + IsLinuxABI(CGT.getTarget().getTriple().isOSLinux()), DefaultNumRegisterParameters(NumRegisterParameters) {} bool shouldPassIndirectlyForSwift(ArrayRef scalars, @@ -1594,6 +1595,14 @@ unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty, if (Align <= MinABIStackAlignInBytes) return 0; // Use default alignment. + if (IsLinuxABI) { + // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't + // want to spend any effort dealing with the ramifications of ABI breaks. + // + // If the vector type is __m128/__m256/__m512, return the default alignment. + if (Ty->isVectorType() && (Align == 16 || Align == 32 || Align == 64)) + return Align; + } // On non-Darwin, the stack type alignment is always 4. if (!IsDarwinVectorABI) { // Set explicit alignment, since we may need to realign the top. diff --git a/clang/test/CodeGen/x86_32-align-linux.c b/clang/test/CodeGen/x86_32-align-linux.c new file mode 100644 index 0000000..6e6ddd7 --- /dev/null +++ b/clang/test/CodeGen/x86_32-align-linux.c @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx512f -emit-llvm -o - %s | FileCheck %s + +#include + +// CHECK-LABEL: define dso_local void @testm128 +// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4 +// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32 +// CHECK-NEXT: %1 = add i32 %0, 15 +// CHECK-NEXT: %2 = and i32 %1, -16 +// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8* +void testm128(int argCount, ...) { + __m128 res; + __builtin_va_list args; + __builtin_va_start(args, argCount); + res = __builtin_va_arg(args, __m128); + __builtin_va_end(args); +} + +// CHECK-LABEL: define dso_local void @testm256 +// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4 +// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32 +// CHECK-NEXT: %1 = add i32 %0, 31 +// CHECK-NEXT: %2 = and i32 %1, -32 +// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8* +void testm256(int argCount, ...) { + __m256 res; + __builtin_va_list args; + __builtin_va_start(args, argCount); + res = __builtin_va_arg(args, __m256); + __builtin_va_end(args); +} + +// CHECK-LABEL: define dso_local void @testm512 +// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4 +// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32 +// CHECK-NEXT: %1 = add i32 %0, 63 +// CHECK-NEXT: %2 = and i32 %1, -64 +// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8* +void testm512(int argCount, ...) { + __m512 res; + __builtin_va_list args; + __builtin_va_start(args, argCount); + res = __builtin_va_arg(args, __m512); + __builtin_va_end(args); +} + +// CHECK-LABEL: define dso_local void @testPastArguments +// CHECK: call void (i32, ...) @testm128(i32 1, <4 x float> %0) +// CHECK: call void (i32, ...) @testm256(i32 1, <8 x float> %1) +// CHECK: call void (i32, ...) @testm512(i32 1, <16 x float> %2) +void testPastArguments(void) { + __m128 a; + __m256 b; + __m512 c; + testm128(1, a); + testm256(1, b); + testm512(1, c); +} -- 2.7.4