From 0404605dda562b3ba70030014c838f7209e05342 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Mon, 2 May 2016 17:41:07 +0000 Subject: [PATCH] Expand aggregate arguments more often on 32-bit Windows Before this change, we would pass all non-HFA record arguments on Windows with byval. Byval often blocks optimizations and results in bad code generation. Windows now uses the existing workaround that other x86_32 platforms use. I also expanded the workaround to handle C++ records with constructors on Windows. On non-Windows platforms, we have to keep generating the same LLVM IR prototypes if we want our bitcode to be ABI compatible. Otherwise we will encounter mismatch issues like PR21573. Essentially fixes PR27522 in Clang instead of LLVM. Reviewers: hans Differential Revision: http://reviews.llvm.org/D19756 llvm-svn: 268261 --- clang/lib/CodeGen/TargetInfo.cpp | 172 +++++++++++---------- clang/test/CodeGen/vectorcall.c | 4 +- clang/test/CodeGen/windows-struct-abi.c | 6 +- clang/test/CodeGen/x86_32-arguments-win32.c | 6 +- .../CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp | 4 +- .../CodeGenCXX/microsoft-abi-sret-and-byval.cpp | 31 +++- 6 files changed, 125 insertions(+), 98 deletions(-) diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 08d6741..db7c1f0 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -503,72 +503,6 @@ static const Type *isSingleElementStruct(QualType T, ASTContext &Context) { return Found; } -static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { - // Treat complex types as the element type. - if (const ComplexType *CTy = Ty->getAs()) - Ty = CTy->getElementType(); - - // Check for a type which we know has a simple scalar argument-passing - // convention without any padding. (We're specifically looking for 32 - // and 64-bit integer and integer-equivalents, float, and double.) - if (!Ty->getAs() && !Ty->hasPointerRepresentation() && - !Ty->isEnumeralType() && !Ty->isBlockPointerType()) - return false; - - uint64_t Size = Context.getTypeSize(Ty); - return Size == 32 || Size == 64; -} - -/// canExpandIndirectArgument - Test whether an argument type which is to be -/// passed indirectly (on the stack) would have the equivalent layout if it was -/// expanded into separate arguments. If so, we prefer to do the latter to avoid -/// inhibiting optimizations. -/// -// FIXME: This predicate is missing many cases, currently it just follows -// llvm-gcc (checks that all fields are 32-bit or 64-bit primitive types). We -// should probably make this smarter, or better yet make the LLVM backend -// capable of handling it. -static bool canExpandIndirectArgument(QualType Ty, ASTContext &Context) { - // We can only expand structure types. - const RecordType *RT = Ty->getAs(); - if (!RT) - return false; - - // We can only expand (C) structures. - // - // FIXME: This needs to be generalized to handle classes as well. - const RecordDecl *RD = RT->getDecl(); - if (!RD->isStruct()) - return false; - - // We try to expand CLike CXXRecordDecl. - if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { - if (!CXXRD->isCLike()) - return false; - } - - uint64_t Size = 0; - - for (const auto *FD : RD->fields()) { - if (!is32Or64BitBasicType(FD->getType(), Context)) - return false; - - // FIXME: Reject bit-fields wholesale; there are two problems, we don't know - // how to expand them yet, and the predicate for telling if a bitfield still - // counts as "basic" is more complicated than what we were doing previously. - if (FD->isBitField()) - return false; - - Size += Context.getTypeSize(FD->getType()); - } - - // Make sure there are not any holes in the struct. - if (Size != Context.getTypeSize(Ty)) - return false; - - return true; -} - namespace { Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, const ABIArgInfo &AI) { @@ -959,6 +893,8 @@ class X86_32ABIInfo : public SwiftABIInfo { bool &NeedsPadding) const; bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const; + bool canExpandIndirectArgument(QualType Ty) const; + /// \brief Rewrite the function info so that all memory arguments use /// inalloca. void rewriteWithInAlloca(CGFunctionInfo &FI) const; @@ -1179,6 +1115,72 @@ bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty, return true; } +static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { + // Treat complex types as the element type. + if (const ComplexType *CTy = Ty->getAs()) + Ty = CTy->getElementType(); + + // Check for a type which we know has a simple scalar argument-passing + // convention without any padding. (We're specifically looking for 32 + // and 64-bit integer and integer-equivalents, float, and double.) + if (!Ty->getAs() && !Ty->hasPointerRepresentation() && + !Ty->isEnumeralType() && !Ty->isBlockPointerType()) + return false; + + uint64_t Size = Context.getTypeSize(Ty); + return Size == 32 || Size == 64; +} + +/// Test whether an argument type which is to be passed indirectly (on the +/// stack) would have the equivalent layout if it was expanded into separate +/// arguments. If so, we prefer to do the latter to avoid inhibiting +/// optimizations. +bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { + // We can only expand structure types. + const RecordType *RT = Ty->getAs(); + if (!RT) + return false; + const RecordDecl *RD = RT->getDecl(); + if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { + if (!IsWin32StructABI ) { + // On non-Windows, we have to conservatively match our old bitcode + // prototypes in order to be ABI-compatible at the bitcode level. + if (!CXXRD->isCLike()) + return false; + } else { + // Don't do this for dynamic classes. + if (CXXRD->isDynamicClass()) + return false; + // Don't do this if there are any non-empty bases. + for (const CXXBaseSpecifier &Base : CXXRD->bases()) { + if (!isEmptyRecord(getContext(), Base.getType(), /*AllowArrays=*/true)) + return false; + } + } + } + + uint64_t Size = 0; + + for (const auto *FD : RD->fields()) { + // Scalar arguments on the stack get 4 byte alignment on x86. If the + // argument is smaller than 32-bits, expanding the struct will create + // alignment padding. + if (!is32Or64BitBasicType(FD->getType(), getContext())) + return false; + + // FIXME: Reject bit-fields wholesale; there are two problems, we don't know + // how to expand them yet, and the predicate for telling if a bitfield still + // counts as "basic" is more complicated than what we were doing previously. + if (FD->isBitField()) + return false; + + Size += getContext().getTypeSize(FD->getType()); + } + + // We can do this if there was no alignment padding. + return Size == getContext().getTypeSize(Ty); +} + ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const { // If the return value is indirect, then the hidden argument is consuming one // integer register. @@ -1395,6 +1397,12 @@ bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const { bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, bool &NeedsPadding) const { + // On Windows, aggregates other than HFAs are never passed in registers, and + // they do not consume register slots. Homogenous floating-point aggregates + // (HFAs) have already been dealt with at this point. + if (IsWin32StructABI && isAggregateTypeForABI(Ty)) + return false; + NeedsPadding = false; InReg = !IsMCUABI; @@ -1468,23 +1476,19 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, } if (isAggregateTypeForABI(Ty)) { - if (RT) { - // Structs are always byval on win32, regardless of what they contain. - if (IsWin32StructABI) - return getIndirectResult(Ty, true, State); - - // Structures with flexible arrays are always indirect. - if (RT->getDecl()->hasFlexibleArrayMember()) - return getIndirectResult(Ty, true, State); - } + // Structures with flexible arrays are always indirect. + // FIXME: This should not be byval! + if (RT && RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectResult(Ty, true, State); - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), Ty, true)) + // Ignore empty structs/unions on non-Windows. + if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); llvm::LLVMContext &LLVMContext = getVMContext(); llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); - bool NeedsPadding, InReg; + bool NeedsPadding = false; + bool InReg; if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) { unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32; SmallVector Elements(SizeInRegs, Int32); @@ -1502,9 +1506,8 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, // optimizations. // Don't do this for the MCU if there are still free integer registers // (see X86_64 ABI for full explanation). - if (getContext().getTypeSize(Ty) <= 4*32 && - canExpandIndirectArgument(Ty, getContext()) && - (!IsMCUABI || State.FreeRegs == 0)) + if (getContext().getTypeSize(Ty) <= 4 * 32 && + (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty)) return ABIArgInfo::getExpandWithPadding( State.CC == llvm::CallingConv::X86_FastCall || State.CC == llvm::CallingConv::X86_VectorCall, @@ -1624,11 +1627,14 @@ static bool isArgInAlloca(const ABIArgInfo &Info) { return false; case ABIArgInfo::Direct: case ABIArgInfo::Extend: - case ABIArgInfo::Expand: - case ABIArgInfo::CoerceAndExpand: if (Info.getInReg()) return false; return true; + case ABIArgInfo::Expand: + case ABIArgInfo::CoerceAndExpand: + // These are aggregate types which are never passed in registers when + // inalloca is involved. + return true; } llvm_unreachable("invalid enum"); } diff --git a/clang/test/CodeGen/vectorcall.c b/clang/test/CodeGen/vectorcall.c index 9ee35b1..b38d5e5 100644 --- a/clang/test/CodeGen/vectorcall.c +++ b/clang/test/CodeGen/vectorcall.c @@ -9,9 +9,9 @@ void __vectorcall v2(char a, char b) {} // CHECK: define x86_vectorcallcc void @"\01v2@@8"(i8 inreg signext %a, i8 inreg signext %b) // X64: define x86_vectorcallcc void @"\01v2@@16"(i8 %a, i8 %b) -struct Small { int a; }; +struct Small { int x; }; void __vectorcall v3(int a, struct Small b, int c) {} -// CHECK: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, %struct.Small* byval align 4 %b, i32 inreg %c) +// CHECK: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, i32 %b.0, i32 inreg %c) // X64: define x86_vectorcallcc void @"\01v3@@24"(i32 %a, i32 %b.coerce, i32 %c) struct Large { int a[5]; }; diff --git a/clang/test/CodeGen/windows-struct-abi.c b/clang/test/CodeGen/windows-struct-abi.c index 4b4a6f1..1631f61 100644 --- a/clang/test/CodeGen/windows-struct-abi.c +++ b/clang/test/CodeGen/windows-struct-abi.c @@ -10,7 +10,7 @@ struct f1 return_f1(void) { while (1); } void receive_f1(struct f1 a0) { } -// CHECK: define void @receive_f1(%struct.f1* byval align 4 %a0) +// CHECK: define void @receive_f1(float %a0.0) struct f2 { float f; @@ -23,7 +23,7 @@ struct f2 return_f2(void) { while (1); } void receive_f2(struct f2 a0) { } -// CHECK: define void @receive_f2(%struct.f2* byval align 4 %a0) +// CHECK: define void @receive_f2(float %a0.0, float %a0.1) struct f4 { float f; @@ -38,5 +38,5 @@ struct f4 return_f4(void) { while (1); } void receive_f4(struct f4 a0) { } -// CHECK: define void @receive_f4(%struct.f4* byval align 4 %a0) +// CHECK: define void @receive_f4(float %a0.0, float %a0.1, float %a0.2, float %a0.3) diff --git a/clang/test/CodeGen/x86_32-arguments-win32.c b/clang/test/CodeGen/x86_32-arguments-win32.c index f8b0995..7b27fc7 100644 --- a/clang/test/CodeGen/x86_32-arguments-win32.c +++ b/clang/test/CodeGen/x86_32-arguments-win32.c @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -w -triple i386-pc-win32 -emit-llvm -o - %s | FileCheck %s // CHECK-LABEL: define i64 @f1_1() -// CHECK-LABEL: define void @f1_2(%struct.s1* byval align 4 %a0) +// CHECK-LABEL: define void @f1_2(i32 %a0.0, i32 %a0.1) struct s1 { int a; int b; @@ -31,7 +31,7 @@ struct s4 { struct s4 f4_1(void) { while (1) {} } // CHECK-LABEL: define i64 @f5_1() -// CHECK-LABEL: define void @f5_2(%struct.s5* byval align 4) +// CHECK-LABEL: define void @f5_2(double %a0.0) struct s5 { double a; }; @@ -39,7 +39,7 @@ struct s5 f5_1(void) { while (1) {} } void f5_2(struct s5 a0) {} // CHECK-LABEL: define i32 @f6_1() -// CHECK-LABEL: define void @f6_2(%struct.s6* byval align 4 %a0) +// CHECK-LABEL: define void @f6_2(float %a0.0) struct s6 { float a; }; diff --git a/clang/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp b/clang/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp index da58c46..6da7a50 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp @@ -2,10 +2,10 @@ // PR15768 -// A trivial 12 byte struct is returned indirectly. +// A trivial 20 byte struct is returned indirectly and taken as byval. struct S { S(); - int a, b, c; + int a, b, c, d, e; }; struct C { diff --git a/clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp b/clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp index 4c2d850..a5cb98d 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp @@ -22,6 +22,16 @@ struct SmallWithCtor { int x; }; +struct Multibyte { + char a, b, c, d; +}; + +struct Packed { + short a; + int b; + short c; +}; + struct SmallWithDtor { SmallWithDtor(); ~SmallWithDtor(); @@ -102,19 +112,30 @@ Big big_return() { return Big(); } void small_arg(Small s) {} // LINUX-LABEL: define void @_Z9small_arg5Small(i32 %s.0) -// WIN32: define void @"\01?small_arg@@YAXUSmall@@@Z"(%struct.Small* byval align 4 %s) +// WIN32: define void @"\01?small_arg@@YAXUSmall@@@Z"(i32 %s.0) // WIN64: define void @"\01?small_arg@@YAXUSmall@@@Z"(i32 %s.coerce) void medium_arg(Medium s) {} // LINUX-LABEL: define void @_Z10medium_arg6Medium(i32 %s.0, i32 %s.1) -// WIN32: define void @"\01?medium_arg@@YAXUMedium@@@Z"(%struct.Medium* byval align 4 %s) +// WIN32: define void @"\01?medium_arg@@YAXUMedium@@@Z"(i32 %s.0, i32 %s.1) // WIN64: define void @"\01?medium_arg@@YAXUMedium@@@Z"(i64 %s.coerce) void small_arg_with_ctor(SmallWithCtor s) {} // LINUX-LABEL: define void @_Z19small_arg_with_ctor13SmallWithCtor(%struct.SmallWithCtor* byval align 4 %s) -// WIN32: define void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(%struct.SmallWithCtor* byval align 4 %s) +// WIN32: define void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(i32 %s.0) // WIN64: define void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(i32 %s.coerce) +// FIXME: We could coerce to a series of i32s here if we wanted to. +void multibyte_arg(Multibyte s) {} +// LINUX-LABEL: define void @_Z13multibyte_arg9Multibyte(%struct.Multibyte* byval align 4 %s) +// WIN32: define void @"\01?multibyte_arg@@YAXUMultibyte@@@Z"(%struct.Multibyte* byval align 4 %s) +// WIN64: define void @"\01?multibyte_arg@@YAXUMultibyte@@@Z"(i32 %s.coerce) + +void packed_arg(Packed s) {} +// LINUX-LABEL: define void @_Z10packed_arg6Packed(%struct.Packed* byval align 4 %s) +// WIN32: define void @"\01?packed_arg@@YAXUPacked@@@Z"(%struct.Packed* byval align 4 %s) +// WIN64: define void @"\01?packed_arg@@YAXUPacked@@@Z"(%struct.Packed* %s) + // Test that dtors are invoked in the callee. void small_arg_with_dtor(SmallWithDtor s) {} // WIN32: define void @"\01?small_arg_with_dtor@@YAXUSmallWithDtor@@@Z"(<{ %struct.SmallWithDtor }>* inalloca) {{.*}} { @@ -230,12 +251,12 @@ class Class { void thiscall_method_arg(Small s) {} // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE5Small(%class.Class* %this, i32 %s.0) - // WIN32: define {{.*}} void @"\01?thiscall_method_arg@Class@@QAEXUSmall@@@Z"(%class.Class* %this, %struct.Small* byval align 4 %s) + // WIN32: define {{.*}} void @"\01?thiscall_method_arg@Class@@QAEXUSmall@@@Z"(%class.Class* %this, i32 %s.0) // WIN64: define linkonce_odr void @"\01?thiscall_method_arg@Class@@QEAAXUSmall@@@Z"(%class.Class* %this, i32 %s.coerce) void thiscall_method_arg(SmallWithCtor s) {} // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE13SmallWithCtor(%class.Class* %this, %struct.SmallWithCtor* byval align 4 %s) - // WIN32: define {{.*}} void @"\01?thiscall_method_arg@Class@@QAEXUSmallWithCtor@@@Z"(%class.Class* %this, %struct.SmallWithCtor* byval align 4 %s) + // WIN32: define {{.*}} void @"\01?thiscall_method_arg@Class@@QAEXUSmallWithCtor@@@Z"(%class.Class* %this, i32 %s.0) // WIN64: define linkonce_odr void @"\01?thiscall_method_arg@Class@@QEAAXUSmallWithCtor@@@Z"(%class.Class* %this, i32 %s.coerce) void thiscall_method_arg(Big s) {} -- 2.7.4