From 19a8adc9bd2785698c1d5429503fd406d95b6c53 Mon Sep 17 00:00:00 2001 From: Erich Keane Date: Thu, 25 Oct 2018 18:57:19 +0000 Subject: [PATCH] Implement Function Multiversioning for Non-ELF Systems. Similar to how ICC handles CPU-Dispatch on Windows, this patch uses the resolver function directly to forward the call to the proper function. This is not nearly as efficient as IFuncs of course, but is still quite useful for large functions specifically developed for certain processors. This is unfortunately still limited to x86, since it depends on __builtin_cpu_supports and __builtin_cpu_is, which are x86 builtins. The naming for the resolver/forwarding function for cpu-dispatch was taken from ICC's implementation, which uses the unmodified name for this (no mangling additions). This is possible, since cpu-dispatch uses '.A' for the 'default' version. In 'target' multiversioning, this function keeps the '.resolver' extension in order to keep the default function keeping the default mangling. Change-Id: I4731555a39be26c7ad59a2d8fda6fa1a50f73284 Differential Revision: https://reviews.llvm.org/D53586 llvm-svn: 345298 --- clang/include/clang/AST/Decl.h | 4 + clang/include/clang/Basic/Attr.td | 4 +- clang/include/clang/Basic/TargetInfo.h | 10 +- clang/lib/AST/Decl.cpp | 4 + clang/lib/Basic/Targets/X86.h | 3 - clang/lib/CodeGen/CodeGenFunction.cpp | 34 ++- clang/lib/CodeGen/CodeGenFunction.h | 1 + clang/lib/CodeGen/CodeGenModule.cpp | 99 ++++++--- clang/lib/CodeGen/CodeGenModule.h | 6 +- clang/test/CodeGen/attr-cpuspecific.c | 239 +++++++++++++++------ clang/test/CodeGen/attr-target-mv-func-ptrs.c | 41 ++-- clang/test/CodeGen/attr-target-mv-va-args.c | 53 +++-- clang/test/CodeGen/attr-target-mv.c | 229 +++++++++++++------- clang/test/CodeGenCXX/attr-target-mv-diff-ns.cpp | 108 ++++++---- clang/test/CodeGenCXX/attr-target-mv-func-ptrs.cpp | 25 ++- clang/test/CodeGenCXX/attr-target-mv-inalloca.cpp | 81 +++++++ .../CodeGenCXX/attr-target-mv-member-funcs.cpp | 233 +++++++++++++------- .../CodeGenCXX/attr-target-mv-out-of-line-defs.cpp | 56 +++-- clang/test/CodeGenCXX/attr-target-mv-overloads.cpp | 104 +++++---- clang/test/Sema/attr-target-mv-bad-target.c | 1 - 20 files changed, 938 insertions(+), 397 deletions(-) create mode 100644 clang/test/CodeGenCXX/attr-target-mv-inalloca.cpp diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 076bd6e..f0454bc 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -2233,6 +2233,10 @@ public: /// part of the cpu_specific/cpu_dispatch functionality. bool isCPUSpecificMultiVersion() const; + /// True if this function is a multiversioned dispatch function as a part of + /// the target functionality. + bool isTargetMultiVersion() const; + void setPreviousDeclaration(FunctionDecl * PrevDecl); FunctionDecl *getCanonicalDecl() override; diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 83a95ce..b7a6ca2 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -858,7 +858,7 @@ def Constructor : InheritableAttr { } def CPUSpecific : InheritableAttr { - let Spellings = [Clang<"cpu_specific">]; + let Spellings = [Clang<"cpu_specific">, Declspec<"cpu_specific">]; let Args = [VariadicIdentifierArgument<"Cpus">]; let Subjects = SubjectList<[Function]>; let Documentation = [CPUSpecificCPUDispatchDocs]; @@ -872,7 +872,7 @@ def CPUSpecific : InheritableAttr { } def CPUDispatch : InheritableAttr { - let Spellings = [Clang<"cpu_dispatch">]; + let Spellings = [Clang<"cpu_dispatch">, Declspec<"cpu_dispatch">]; let Args = [VariadicIdentifierArgument<"Cpus">]; let Subjects = SubjectList<[Function]>; let Documentation = [CPUSpecificCPUDispatchDocs]; diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index c3ef561..4a7254e 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1082,9 +1082,15 @@ public: return false; } - /// Identify whether this taret supports multiversioning of functions, + /// Identify whether this target supports multiversioning of functions, /// which requires support for cpu_supports and cpu_is functionality. - virtual bool supportsMultiVersioning() const { return false; } + bool supportsMultiVersioning() const { + return getTriple().getArch() == llvm::Triple::x86 || + getTriple().getArch() == llvm::Triple::x86_64; + } + + /// Identify whether this target supports IFuncs. + bool supportsIFunc() const { return getTriple().isOSBinFormatELF(); } // Validate the contents of the __builtin_cpu_supports(const char*) // argument. diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index bf5e7a7..5f82d13 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -2947,6 +2947,10 @@ bool FunctionDecl::isCPUSpecificMultiVersion() const { return isMultiVersion() && hasAttr(); } +bool FunctionDecl::isTargetMultiVersion() const { + return isMultiVersion() && hasAttr(); +} + void FunctionDecl::setPreviousDeclaration(FunctionDecl *PrevDecl) { redeclarable_base::setPreviousDecl(PrevDecl); diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index a77757a..07151c7 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -290,9 +290,6 @@ public: return checkCPUKind(CPU = getCPUKind(Name)); } - bool supportsMultiVersioning() const override { - return getTriple().isOSBinFormatELF(); - } unsigned multiVersionSortPriority(StringRef Name) const override; bool setFPMath(StringRef Name) override; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 70072318..84d7b9e 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -2392,6 +2392,29 @@ CodeGenFunction::FormResolverCondition(const MultiVersionResolverOption &RO) { return Condition; } +static void CreateMultiVersionResolverReturn(CodeGenModule &CGM, + llvm::Function *Resolver, + CGBuilderTy &Builder, + llvm::Function *FuncToReturn, + bool SupportsIFunc) { + if (SupportsIFunc) { + Builder.CreateRet(FuncToReturn); + return; + } + + llvm::SmallVector Args; + llvm::for_each(Resolver->args(), + [&](llvm::Argument &Arg) { Args.push_back(&Arg); }); + + llvm::CallInst *Result = Builder.CreateCall(FuncToReturn, Args); + Result->setTailCallKind(llvm::CallInst::TCK_MustTail); + + if (Resolver->getReturnType()->isVoidTy()) + Builder.CreateRetVoid(); + else + Builder.CreateRet(Result); +} + void CodeGenFunction::EmitMultiVersionResolver( llvm::Function *Resolver, ArrayRef Options) { assert((getContext().getTargetInfo().getTriple().getArch() == @@ -2399,6 +2422,9 @@ void CodeGenFunction::EmitMultiVersionResolver( getContext().getTargetInfo().getTriple().getArch() == llvm::Triple::x86_64) && "Only implemented for x86 targets"); + + bool SupportsIFunc = getContext().getTargetInfo().supportsIFunc(); + // Main function's basic block. llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver); Builder.SetInsertPoint(CurBlock); @@ -2412,13 +2438,15 @@ void CodeGenFunction::EmitMultiVersionResolver( if (!Condition) { assert(&RO == Options.end() - 1 && "Default or Generic case must be last"); - Builder.CreateRet(RO.Function); + CreateMultiVersionResolverReturn(CGM, Resolver, Builder, RO.Function, + SupportsIFunc); return; } llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver); - llvm::IRBuilder<> RetBuilder(RetBlock); - RetBuilder.CreateRet(RO.Function); + CGBuilderTy RetBuilder(*this, RetBlock); + CreateMultiVersionResolverReturn(CGM, Resolver, RetBuilder, RO.Function, + SupportsIFunc); CurBlock = createBasicBlock("resolver_else", Resolver); Builder.CreateCondBr(Condition, RetBlock, CurBlock); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 1f653d18..262b8dc 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4274,6 +4274,7 @@ public: struct MultiVersionResolverOption { llvm::Function *Function; + FunctionDecl *FD; struct Conds { StringRef Architecture; llvm::SmallVector Features; diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 9d3187b..129a770 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -892,10 +892,11 @@ static std::string getCPUSpecificMangling(const CodeGenModule &CGM, static void AppendCPUSpecificCPUDispatchMangling(const CodeGenModule &CGM, const CPUSpecificAttr *Attr, raw_ostream &Out) { - // cpu_specific gets the current name, dispatch gets the resolver. + // cpu_specific gets the current name, dispatch gets the resolver if IFunc is + // supported. if (Attr) Out << getCPUSpecificMangling(CGM, Attr->getCurCPUName()->getName()); - else + else if (CGM.getTarget().supportsIFunc()) Out << ".resolver"; } @@ -2507,13 +2508,19 @@ void CodeGenModule::emitMultiVersionFunctions() { TA->getArchitecture(), Feats); }); - llvm::Function *ResolverFunc = cast( - GetGlobalValue((getMangledName(GD) + ".resolver").str())); + llvm::Function *ResolverFunc; + const TargetInfo &TI = getTarget(); + + if (TI.supportsIFunc() || FD->isTargetMultiVersion()) + ResolverFunc = cast( + GetGlobalValue((getMangledName(GD) + ".resolver").str())); + else + ResolverFunc = cast(GetGlobalValue(getMangledName(GD))); + if (supportsCOMDAT()) ResolverFunc->setComdat( getModule().getOrInsertComdat(ResolverFunc->getName())); - const TargetInfo &TI = getTarget(); std::stable_sort( Options.begin(), Options.end(), [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS, @@ -2533,13 +2540,21 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { llvm::Type *DeclTy = getTypes().ConvertTypeForMem(FD->getType()); StringRef ResolverName = getMangledName(GD); - llvm::Type *ResolverType = llvm::FunctionType::get( - llvm::PointerType::get(DeclTy, - Context.getTargetAddressSpace(FD->getType())), - false); - auto *ResolverFunc = cast( - GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{}, - /*ForVTable=*/false)); + + llvm::Type *ResolverType; + GlobalDecl ResolverGD; + if (getTarget().supportsIFunc()) + ResolverType = llvm::FunctionType::get( + llvm::PointerType::get(DeclTy, + Context.getTargetAddressSpace(FD->getType())), + false); + else { + ResolverType = DeclTy; + ResolverGD = GD; + } + + auto *ResolverFunc = cast(GetOrCreateLLVMFunction( + ResolverName, ResolverType, ResolverGD, /*ForVTable=*/false)); SmallVector Options; const TargetInfo &Target = getTarget(); @@ -2571,16 +2586,24 @@ void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) { CGF.EmitMultiVersionResolver(ResolverFunc, Options); } -/// If an ifunc for the specified mangled name is not in the module, create and -/// return an llvm IFunc Function with the specified type. -llvm::Constant * -CodeGenModule::GetOrCreateMultiVersionIFunc(GlobalDecl GD, llvm::Type *DeclTy, - const FunctionDecl *FD) { +/// If a dispatcher for the specified mangled name is not in the module, create +/// and return an llvm Function with the specified type. +llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver( + GlobalDecl GD, llvm::Type *DeclTy, const FunctionDecl *FD) { std::string MangledName = getMangledNameImpl(*this, GD, FD, /*OmitMultiVersionMangling=*/true); - std::string IFuncName = MangledName + ".ifunc"; - if (llvm::GlobalValue *IFuncGV = GetGlobalValue(IFuncName)) - return IFuncGV; + + // Holds the name of the resolver, in ifunc mode this is the ifunc (which has + // a separate resolver). + std::string ResolverName = MangledName; + if (getTarget().supportsIFunc()) + ResolverName += ".ifunc"; + else if (FD->isTargetMultiVersion()) + ResolverName += ".resolver"; + + // If this already exists, just return that one. + if (llvm::GlobalValue *ResolverGV = GetGlobalValue(ResolverName)) + return ResolverGV; // Since this is the first time we've created this IFunc, make sure // that we put this multiversioned function into the list to be @@ -2588,20 +2611,28 @@ CodeGenModule::GetOrCreateMultiVersionIFunc(GlobalDecl GD, llvm::Type *DeclTy, if (!FD->isCPUDispatchMultiVersion() && !FD->isCPUSpecificMultiVersion()) MultiVersionFuncs.push_back(GD); - std::string ResolverName = MangledName + ".resolver"; - llvm::Type *ResolverType = llvm::FunctionType::get( - llvm::PointerType::get(DeclTy, - Context.getTargetAddressSpace(FD->getType())), - false); - llvm::Constant *Resolver = - GetOrCreateLLVMFunction(ResolverName, ResolverType, GlobalDecl{}, - /*ForVTable=*/false); - llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create( - DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule()); - GIF->setName(IFuncName); - SetCommonAttributes(FD, GIF); + if (getTarget().supportsIFunc()) { + llvm::Type *ResolverType = llvm::FunctionType::get( + llvm::PointerType::get( + DeclTy, getContext().getTargetAddressSpace(FD->getType())), + false); + llvm::Constant *Resolver = GetOrCreateLLVMFunction( + MangledName + ".resolver", ResolverType, GlobalDecl{}, + /*ForVTable=*/false); + llvm::GlobalIFunc *GIF = llvm::GlobalIFunc::create( + DeclTy, 0, llvm::Function::ExternalLinkage, "", Resolver, &getModule()); + GIF->setName(ResolverName); + SetCommonAttributes(FD, GIF); + + return GIF; + } - return GIF; + llvm::Constant *Resolver = GetOrCreateLLVMFunction( + ResolverName, DeclTy, GlobalDecl{}, /*ForVTable=*/false); + assert(isa(Resolver) && + "Resolver should be created for the first time"); + SetCommonAttributes(FD, cast(Resolver)); + return Resolver; } /// GetOrCreateLLVMFunction - If the specified mangled name is not in the @@ -2641,7 +2672,7 @@ llvm::Constant *CodeGenModule::GetOrCreateLLVMFunction( if (TA && TA->isDefaultVersion()) UpdateMultiVersionNames(GD, FD); if (!IsForDefinition) - return GetOrCreateMultiVersionIFunc(GD, Ty, FD); + return GetOrCreateMultiVersionResolver(GD, Ty, FD); } } diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index baf3619..227e722 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -1293,9 +1293,9 @@ private: llvm::AttributeList ExtraAttrs = llvm::AttributeList(), ForDefinition_t IsForDefinition = NotForDefinition); - llvm::Constant *GetOrCreateMultiVersionIFunc(GlobalDecl GD, - llvm::Type *DeclTy, - const FunctionDecl *FD); + llvm::Constant *GetOrCreateMultiVersionResolver(GlobalDecl GD, + llvm::Type *DeclTy, + const FunctionDecl *FD); void UpdateMultiVersionNames(GlobalDecl GD, const FunctionDecl *FD); llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName, diff --git a/clang/test/CodeGen/attr-cpuspecific.c b/clang/test/CodeGen/attr-cpuspecific.c index 1b98b5d..a0a7b97 100644 --- a/clang/test/CodeGen/attr-cpuspecific.c +++ b/clang/test/CodeGen/attr-cpuspecific.c @@ -1,100 +1,209 @@ -// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LINUX +// RUN: %clang_cc1 -triple x86_64-windows-pc -fms-compatibility -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WINDOWS +#ifdef _WIN64 +#define ATTR(X) __declspec(X) +#else +#define ATTR(X) __attribute__((X)) +#endif // _MSC_VER // Each called version should have an IFunc. -// CHECK: @SingleVersion.ifunc = ifunc void (), void ()* ()* @SingleVersion.resolver -// CHECK: @TwoVersions.ifunc = ifunc void (), void ()* ()* @TwoVersions.resolver -// CHECK: @TwoVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @TwoVersionsSameAttr.resolver -// CHECK: @ThreeVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @ThreeVersionsSameAttr.resolver +// LINUX: @SingleVersion.ifunc = ifunc void (), void ()* ()* @SingleVersion.resolver +// LINUX: @TwoVersions.ifunc = ifunc void (), void ()* ()* @TwoVersions.resolver +// LINUX: @TwoVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @TwoVersionsSameAttr.resolver +// LINUX: @ThreeVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @ThreeVersionsSameAttr.resolver -__attribute__((cpu_specific(ivybridge))) +ATTR(cpu_specific(ivybridge)) void SingleVersion(void){} -// CHECK: define void @SingleVersion.S() #[[S:[0-9]+]] +// LINUX: define void @SingleVersion.S() #[[S:[0-9]+]] +// WINDOWS: define dso_local void @SingleVersion.S() #[[S:[0-9]+]] -__attribute__((cpu_specific(ivybridge))) +ATTR(cpu_specific(ivybridge)) void NotCalled(void){} -// CHECK: define void @NotCalled.S() #[[S]] +// LINUX: define void @NotCalled.S() #[[S]] +// WINDOWS: define dso_local void @NotCalled.S() #[[S:[0-9]+]] // Done before any of the implementations. -__attribute__((cpu_dispatch(ivybridge, knl))) +ATTR(cpu_dispatch(ivybridge, knl)) void TwoVersions(void); -// CHECK: define void ()* @TwoVersions.resolver() -// CHECK: call void @__cpu_indicator_init -// CHECK: ret void ()* @TwoVersions.Z -// CHECK: ret void ()* @TwoVersions.S -// CHECK: call void @llvm.trap -// CHECK: unreachable - -__attribute__((cpu_specific(ivybridge))) +// LINUX: define void ()* @TwoVersions.resolver() +// LINUX: call void @__cpu_indicator_init +// LINUX: ret void ()* @TwoVersions.Z +// LINUX: ret void ()* @TwoVersions.S +// LINUX: call void @llvm.trap +// LINUX: unreachable + +// WINDOWS: define dso_local void @TwoVersions() +// WINDOWS: call void @__cpu_indicator_init() +// WINDOWS: call void @TwoVersions.Z() +// WINDOWS-NEXT: ret void +// WINDOWS: call void @TwoVersions.S() +// WINDOWS-NEXT: ret void +// WINDOWS: call void @llvm.trap +// WINDOWS: unreachable + +ATTR(cpu_specific(ivybridge)) void TwoVersions(void){} -// CHECK: define void @TwoVersions.S() #[[S]] +// CHECK: define {{.*}}void @TwoVersions.S() #[[S]] -__attribute__((cpu_specific(knl))) +ATTR(cpu_specific(knl)) void TwoVersions(void){} -// CHECK: define void @TwoVersions.Z() #[[K:[0-9]+]] +// CHECK: define {{.*}}void @TwoVersions.Z() #[[K:[0-9]+]] -__attribute__((cpu_specific(ivybridge, knl))) +ATTR(cpu_specific(ivybridge, knl)) void TwoVersionsSameAttr(void){} -// CHECK: define void @TwoVersionsSameAttr.S() #[[S]] -// CHECK: define void @TwoVersionsSameAttr.Z() #[[K]] +// CHECK: define {{.*}}void @TwoVersionsSameAttr.S() #[[S]] +// CHECK: define {{.*}}void @TwoVersionsSameAttr.Z() #[[K]] -__attribute__((cpu_specific(atom, ivybridge, knl))) +ATTR(cpu_specific(atom, ivybridge, knl)) void ThreeVersionsSameAttr(void){} -// CHECK: define void @ThreeVersionsSameAttr.O() #[[O:[0-9]+]] -// CHECK: define void @ThreeVersionsSameAttr.S() #[[S]] -// CHECK: define void @ThreeVersionsSameAttr.Z() #[[K]] +// CHECK: define {{.*}}void @ThreeVersionsSameAttr.O() #[[O:[0-9]+]] +// CHECK: define {{.*}}void @ThreeVersionsSameAttr.S() #[[S]] +// CHECK: define {{.*}}void @ThreeVersionsSameAttr.Z() #[[K]] void usages() { SingleVersion(); - // CHECK: @SingleVersion.ifunc() + // LINUX: @SingleVersion.ifunc() + // WINDOWS: @SingleVersion() TwoVersions(); - // CHECK: @TwoVersions.ifunc() + // LINUX: @TwoVersions.ifunc() + // WINDOWS: @TwoVersions() TwoVersionsSameAttr(); - // CHECK: @TwoVersionsSameAttr.ifunc() + // LINUX: @TwoVersionsSameAttr.ifunc() + // WINDOWS: @TwoVersionsSameAttr() ThreeVersionsSameAttr(); - // CHECK: @ThreeVersionsSameAttr.ifunc() + // LINUX: @ThreeVersionsSameAttr.ifunc() + // WINDOWS: @ThreeVersionsSameAttr() } // has an extra config to emit! -__attribute__((cpu_dispatch(ivybridge, knl, atom))) +ATTR(cpu_dispatch(ivybridge, knl, atom)) void TwoVersionsSameAttr(void); -// CHECK: define void ()* @TwoVersionsSameAttr.resolver() -// CHECK: ret void ()* @TwoVersionsSameAttr.Z -// CHECK: ret void ()* @TwoVersionsSameAttr.S -// CHECK: ret void ()* @TwoVersionsSameAttr.O -// CHECK: call void @llvm.trap -// CHECK: unreachable - -__attribute__((cpu_dispatch(atom, ivybridge, knl))) +// LINUX: define void ()* @TwoVersionsSameAttr.resolver() +// LINUX: ret void ()* @TwoVersionsSameAttr.Z +// LINUX: ret void ()* @TwoVersionsSameAttr.S +// LINUX: ret void ()* @TwoVersionsSameAttr.O +// LINUX: call void @llvm.trap +// LINUX: unreachable + +// WINDOWS: define dso_local void @TwoVersionsSameAttr() +// WINDOWS: call void @TwoVersionsSameAttr.Z +// WINDOWS-NEXT: ret void +// WINDOWS: call void @TwoVersionsSameAttr.S +// WINDOWS-NEXT: ret void +// WINDOWS: call void @TwoVersionsSameAttr.O +// WINDOWS-NEXT: ret void +// WINDOWS: call void @llvm.trap +// WINDOWS: unreachable + +ATTR(cpu_dispatch(atom, ivybridge, knl)) void ThreeVersionsSameAttr(void){} -// CHECK: define void ()* @ThreeVersionsSameAttr.resolver() -// CHECK: call void @__cpu_indicator_init -// CHECK: ret void ()* @ThreeVersionsSameAttr.Z -// CHECK: ret void ()* @ThreeVersionsSameAttr.S -// CHECK: ret void ()* @ThreeVersionsSameAttr.O -// CHECK: call void @llvm.trap -// CHECK: unreachable +// LINUX: define void ()* @ThreeVersionsSameAttr.resolver() +// LINUX: call void @__cpu_indicator_init +// LINUX: ret void ()* @ThreeVersionsSameAttr.Z +// LINUX: ret void ()* @ThreeVersionsSameAttr.S +// LINUX: ret void ()* @ThreeVersionsSameAttr.O +// LINUX: call void @llvm.trap +// LINUX: unreachable + +// WINDOWS: define dso_local void @ThreeVersionsSameAttr() +// WINDOWS: call void @__cpu_indicator_init +// WINDOWS: call void @ThreeVersionsSameAttr.Z +// WINDOWS-NEXT: ret void +// WINDOWS: call void @ThreeVersionsSameAttr.S +// WINDOWS-NEXT: ret void +// WINDOWS: call void @ThreeVersionsSameAttr.O +// WINDOWS-NEXT: ret void +// WINDOWS: call void @llvm.trap +// WINDOWS: unreachable // No Cpu Specific options. -__attribute__((cpu_dispatch(atom, ivybridge, knl))) +ATTR(cpu_dispatch(atom, ivybridge, knl)) void NoSpecifics(void); -// CHECK: define void ()* @NoSpecifics.resolver() -// CHECK: call void @__cpu_indicator_init -// CHECK: ret void ()* @NoSpecifics.Z -// CHECK: ret void ()* @NoSpecifics.S -// CHECK: ret void ()* @NoSpecifics.O -// CHECK: call void @llvm.trap -// CHECK: unreachable - -__attribute__((cpu_dispatch(atom, generic, ivybridge, knl))) +// LINUX: define void ()* @NoSpecifics.resolver() +// LINUX: call void @__cpu_indicator_init +// LINUX: ret void ()* @NoSpecifics.Z +// LINUX: ret void ()* @NoSpecifics.S +// LINUX: ret void ()* @NoSpecifics.O +// LINUX: call void @llvm.trap +// LINUX: unreachable + +// WINDOWS: define dso_local void @NoSpecifics() +// WINDOWS: call void @__cpu_indicator_init +// WINDOWS: call void @NoSpecifics.Z +// WINDOWS-NEXT: ret void +// WINDOWS: call void @NoSpecifics.S +// WINDOWS-NEXT: ret void +// WINDOWS: call void @NoSpecifics.O +// WINDOWS-NEXT: ret void +// WINDOWS: call void @llvm.trap +// WINDOWS: unreachable + +ATTR(cpu_dispatch(atom, generic, ivybridge, knl)) void HasGeneric(void); -// CHECK: define void ()* @HasGeneric.resolver() -// CHECK: call void @__cpu_indicator_init -// CHECK: ret void ()* @HasGeneric.Z -// CHECK: ret void ()* @HasGeneric.S -// CHECK: ret void ()* @HasGeneric.O -// CHECK: ret void ()* @HasGeneric.A -// CHECK-NOT: call void @llvm.trap +// LINUX: define void ()* @HasGeneric.resolver() +// LINUX: call void @__cpu_indicator_init +// LINUX: ret void ()* @HasGeneric.Z +// LINUX: ret void ()* @HasGeneric.S +// LINUX: ret void ()* @HasGeneric.O +// LINUX: ret void ()* @HasGeneric.A +// LINUX-NOT: call void @llvm.trap + +// WINDOWS: define dso_local void @HasGeneric() +// WINDOWS: call void @__cpu_indicator_init +// WINDOWS: call void @HasGeneric.Z +// WINDOWS-NEXT: ret void +// WINDOWS: call void @HasGeneric.S +// WINDOWS-NEXT: ret void +// WINDOWS: call void @HasGeneric.O +// WINDOWS-NEXT: ret void +// WINDOWS: call void @HasGeneric.A +// WINDOWS-NEXT: ret void +// WINDOWS-NOT: call void @llvm.trap + +ATTR(cpu_dispatch(atom, generic, ivybridge, knl)) +void HasParams(int i, double d); +// LINUX: define void (i32, double)* @HasParams.resolver() +// LINUX: call void @__cpu_indicator_init +// LINUX: ret void (i32, double)* @HasParams.Z +// LINUX: ret void (i32, double)* @HasParams.S +// LINUX: ret void (i32, double)* @HasParams.O +// LINUX: ret void (i32, double)* @HasParams.A +// LINUX-NOT: call void @llvm.trap + +// WINDOWS: define dso_local void @HasParams(i32, double) +// WINDOWS: call void @__cpu_indicator_init +// WINDOWS: call void @HasParams.Z(i32 %0, double %1) +// WINDOWS-NEXT: ret void +// WINDOWS: call void @HasParams.S(i32 %0, double %1) +// WINDOWS-NEXT: ret void +// WINDOWS: call void @HasParams.O(i32 %0, double %1) +// WINDOWS-NEXT: ret void +// WINDOWS: call void @HasParams.A(i32 %0, double %1) +// WINDOWS-NEXT: ret void +// WINDOWS-NOT: call void @llvm.trap + +ATTR(cpu_dispatch(atom, generic, ivybridge, knl)) +int HasParamsAndReturn(int i, double d); +// LINUX: define i32 (i32, double)* @HasParamsAndReturn.resolver() +// LINUX: call void @__cpu_indicator_init +// LINUX: ret i32 (i32, double)* @HasParamsAndReturn.Z +// LINUX: ret i32 (i32, double)* @HasParamsAndReturn.S +// LINUX: ret i32 (i32, double)* @HasParamsAndReturn.O +// LINUX: ret i32 (i32, double)* @HasParamsAndReturn.A +// LINUX-NOT: call void @llvm.trap + +// WINDOWS: define dso_local i32 @HasParamsAndReturn(i32, double) +// WINDOWS: call void @__cpu_indicator_init +// WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.Z(i32 %0, double %1) +// WINDOWS-NEXT: ret i32 %[[RET]] +// WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.S(i32 %0, double %1) +// WINDOWS-NEXT: ret i32 %[[RET]] +// WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.O(i32 %0, double %1) +// WINDOWS-NEXT: ret i32 %[[RET]] +// WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.A(i32 %0, double %1) +// WINDOWS-NEXT: ret i32 %[[RET]] +// WINDOWS-NOT: call void @llvm.trap // CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" // CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" diff --git a/clang/test/CodeGen/attr-target-mv-func-ptrs.c b/clang/test/CodeGen/attr-target-mv-func-ptrs.c index 5df9a92..d1ff800 100644 --- a/clang/test/CodeGen/attr-target-mv-func-ptrs.c +++ b/clang/test/CodeGen/attr-target-mv-func-ptrs.c @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS int __attribute__((target("sse4.2"))) foo(int i) { return 0; } int __attribute__((target("arch=sandybridge"))) foo(int); int __attribute__((target("arch=ivybridge"))) foo(int i) {return 1;} @@ -16,17 +17,31 @@ int bar() { return Free(1) + Free(2); } -// CHECK: @foo.ifunc = ifunc i32 (i32), i32 (i32)* ()* @foo.resolver -// CHECK: define i32 @foo.sse4.2( -// CHECK: ret i32 0 -// CHECK: define i32 @foo.arch_ivybridge( -// CHECK: ret i32 1 -// CHECK: define i32 @foo( -// CHECK: ret i32 2 +// LINUX: @foo.ifunc = ifunc i32 (i32), i32 (i32)* ()* @foo.resolver +// LINUX: define i32 @foo.sse4.2( +// LINUX: ret i32 0 +// LINUX: define i32 @foo.arch_ivybridge( +// LINUX: ret i32 1 +// LINUX: define i32 @foo( +// LINUX: ret i32 2 -// CHECK: define i32 @bar() -// CHECK: call void @func(i32 (i32)* @foo.ifunc) -// CHECK: store i32 (i32)* @foo.ifunc -// CHECK: store i32 (i32)* @foo.ifunc +// WINDOWS: define dso_local i32 @foo.sse4.2( +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @foo.arch_ivybridge( +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @foo( +// WINDOWS: ret i32 2 -// CHECK: declare i32 @foo.arch_sandybridge( +// LINUX: define i32 @bar() +// LINUX: call void @func(i32 (i32)* @foo.ifunc) +// LINUX: store i32 (i32)* @foo.ifunc +// LINUX: store i32 (i32)* @foo.ifunc + +// WINDOWS: define dso_local i32 @bar() +// WINDOWS: call void @func(i32 (i32)* @foo.resolver) +// WINDOWS: store i32 (i32)* @foo.resolver +// WINDOWS: store i32 (i32)* @foo.resolver + +// LINUX: declare i32 @foo.arch_sandybridge( + +// WINDOWS: declare dso_local i32 @foo.arch_sandybridge( diff --git a/clang/test/CodeGen/attr-target-mv-va-args.c b/clang/test/CodeGen/attr-target-mv-va-args.c index b33f841..356b769 100644 --- a/clang/test/CodeGen/attr-target-mv-va-args.c +++ b/clang/test/CodeGen/attr-target-mv-va-args.c @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS int __attribute__((target("sse4.2"))) foo(int i, ...) { return 0; } int __attribute__((target("arch=sandybridge"))) foo(int i, ...); int __attribute__((target("arch=ivybridge"))) foo(int i, ...) {return 1;} @@ -8,19 +9,37 @@ int bar() { return foo(1, 'a', 1.1) + foo(2, 2.2, "asdf"); } -// CHECK: @foo.ifunc = ifunc i32 (i32, ...), i32 (i32, ...)* ()* @foo.resolver -// CHECK: define i32 @foo.sse4.2(i32 %i, ...) -// CHECK: ret i32 0 -// CHECK: define i32 @foo.arch_ivybridge(i32 %i, ...) -// CHECK: ret i32 1 -// CHECK: define i32 @foo(i32 %i, ...) -// CHECK: ret i32 2 -// CHECK: define i32 @bar() -// CHECK: call i32 (i32, ...) @foo.ifunc(i32 1, i32 97, double -// CHECK: call i32 (i32, ...) @foo.ifunc(i32 2, double 2.2{{[0-9Ee+]+}}, i8* getelementptr inbounds -// CHECK: define i32 (i32, ...)* @foo.resolver() comdat -// CHECK: ret i32 (i32, ...)* @foo.arch_sandybridge -// CHECK: ret i32 (i32, ...)* @foo.arch_ivybridge -// CHECK: ret i32 (i32, ...)* @foo.sse4.2 -// CHECK: ret i32 (i32, ...)* @foo -// CHECK: declare i32 @foo.arch_sandybridge(i32, ...) +// LINUX: @foo.ifunc = ifunc i32 (i32, ...), i32 (i32, ...)* ()* @foo.resolver +// LINUX: define i32 @foo.sse4.2(i32 %i, ...) +// LINUX: ret i32 0 +// LINUX: define i32 @foo.arch_ivybridge(i32 %i, ...) +// LINUX: ret i32 1 +// LINUX: define i32 @foo(i32 %i, ...) +// LINUX: ret i32 2 +// LINUX: define i32 @bar() +// LINUX: call i32 (i32, ...) @foo.ifunc(i32 1, i32 97, double +// LINUX: call i32 (i32, ...) @foo.ifunc(i32 2, double 2.2{{[0-9Ee+]+}}, i8* getelementptr inbounds + +// LINUX: define i32 (i32, ...)* @foo.resolver() comdat +// LINUX: ret i32 (i32, ...)* @foo.arch_sandybridge +// LINUX: ret i32 (i32, ...)* @foo.arch_ivybridge +// LINUX: ret i32 (i32, ...)* @foo.sse4.2 +// LINUX: ret i32 (i32, ...)* @foo +// LINUX: declare i32 @foo.arch_sandybridge(i32, ...) + +// WINDOWS: define dso_local i32 @foo.sse4.2(i32 %i, ...) +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @foo.arch_ivybridge(i32 %i, ...) +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @foo(i32 %i, ...) +// WINDOWS: ret i32 2 +// WINDOWS: define dso_local i32 @bar() +// WINDOWS: call i32 (i32, ...) @foo.resolver(i32 1, i32 97, double +// WINDOWS: call i32 (i32, ...) @foo.resolver(i32 2, double 2.2{{[0-9Ee+]+}}, i8* getelementptr inbounds + +// WINDOWS: define dso_local i32 @foo.resolver(i32, ...) comdat +// WINDOWS: musttail call i32 (i32, ...) @foo.arch_sandybridge +// WINDOWS: musttail call i32 (i32, ...) @foo.arch_ivybridge +// WINDOWS: musttail call i32 (i32, ...) @foo.sse4.2 +// WINDOWS: musttail call i32 (i32, ...) @foo +// WINDOWS: declare dso_local i32 @foo.arch_sandybridge(i32, ...) diff --git a/clang/test/CodeGen/attr-target-mv.c b/clang/test/CodeGen/attr-target-mv.c index 3ccb5b3..a28f330 100644 --- a/clang/test/CodeGen/attr-target-mv.c +++ b/clang/test/CodeGen/attr-target-mv.c @@ -1,4 +1,6 @@ -// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS + int __attribute__((target("sse4.2"))) foo(void) { return 0; } int __attribute__((target("arch=sandybridge"))) foo(void); int __attribute__((target("arch=ivybridge"))) foo(void) {return 1;} @@ -25,82 +27,155 @@ void bar3() { inline __attribute__((target("default"))) void foo_decls(void) {} inline __attribute__((target("sse4.2"))) void foo_decls(void) {} -inline __attribute__((target("default"))) void foo_multi(void) {} -inline __attribute__((target("avx,sse4.2"))) void foo_multi(void) {} -inline __attribute__((target("sse4.2,fma4"))) void foo_multi(void) {} -inline __attribute__((target("arch=ivybridge,fma4,sse4.2"))) void foo_multi(void) {} +inline __attribute__((target("default"))) void foo_multi(int i, double d) {} +inline __attribute__((target("avx,sse4.2"))) void foo_multi(int i, double d) {} +inline __attribute__((target("sse4.2,fma4"))) void foo_multi(int i, double d) {} +inline __attribute__((target("arch=ivybridge,fma4,sse4.2"))) void foo_multi(int i, double d) {} void bar4() { - foo_multi(); + foo_multi(1, 5.0); } -// CHECK: @foo.ifunc = ifunc i32 (), i32 ()* ()* @foo.resolver -// CHECK: @foo_inline.ifunc = ifunc i32 (), i32 ()* ()* @foo_inline.resolver -// CHECK: @foo_decls.ifunc = ifunc void (), void ()* ()* @foo_decls.resolver - -// CHECK: define i32 @foo.sse4.2() -// CHECK: ret i32 0 -// CHECK: define i32 @foo.arch_ivybridge() -// CHECK: ret i32 1 -// CHECK: define i32 @foo() -// CHECK: ret i32 2 -// CHECK: define i32 @bar() -// CHECK: call i32 @foo.ifunc() - -// CHECK: define i32 ()* @foo.resolver() comdat -// CHECK: call void @__cpu_indicator_init() -// CHECK: ret i32 ()* @foo.arch_sandybridge -// CHECK: ret i32 ()* @foo.arch_ivybridge -// CHECK: ret i32 ()* @foo.sse4.2 -// CHECK: ret i32 ()* @foo - -// CHECK: define i32 @bar2() -// CHECK: call i32 @foo_inline.ifunc() - -// CHECK: define i32 ()* @foo_inline.resolver() comdat -// CHECK: call void @__cpu_indicator_init() -// CHECK: ret i32 ()* @foo_inline.arch_sandybridge -// CHECK: ret i32 ()* @foo_inline.arch_ivybridge -// CHECK: ret i32 ()* @foo_inline.sse4.2 -// CHECK: ret i32 ()* @foo_inline - -// CHECK: define void @bar3() -// CHECK: call void @foo_decls.ifunc() - -// CHECK: define void ()* @foo_decls.resolver() comdat -// CHECK: ret void ()* @foo_decls.sse4.2 -// CHECK: ret void ()* @foo_decls - -// CHECK: define void @bar4() -// CHECK: call void @foo_multi.ifunc() - -// CHECK: define void ()* @foo_multi.resolver() comdat -// CHECK: and i32 %{{.*}}, 4352 -// CHECK: icmp eq i32 %{{.*}}, 4352 -// CHECK: ret void ()* @foo_multi.fma4_sse4.2 -// CHECK: icmp eq i32 %{{.*}}, 12 -// CHECK: and i32 %{{.*}}, 4352 -// CHECK: icmp eq i32 %{{.*}}, 4352 -// CHECK: ret void ()* @foo_multi.arch_ivybridge_fma4_sse4.2 -// CHECK: and i32 %{{.*}}, 768 -// CHECK: icmp eq i32 %{{.*}}, 768 -// CHECK: ret void ()* @foo_multi.avx_sse4.2 -// CHECK: ret void ()* @foo_multi - -// CHECK: declare i32 @foo.arch_sandybridge() - -// CHECK: define linkonce i32 @foo_inline.sse4.2() -// CHECK: ret i32 0 - -// CHECK: declare i32 @foo_inline.arch_sandybridge() -// -// CHECK: define linkonce i32 @foo_inline.arch_ivybridge() -// CHECK: ret i32 1 -// CHECK: define linkonce i32 @foo_inline() -// CHECK: ret i32 2 - -// CHECK: define linkonce void @foo_decls() -// CHECK: define linkonce void @foo_decls.sse4.2() - -// CHECK: define linkonce void @foo_multi.avx_sse4.2() -// CHECK: define linkonce void @foo_multi.fma4_sse4.2() -// CHECK: define linkonce void @foo_multi.arch_ivybridge_fma4_sse4.2() +// LINUX: @foo.ifunc = ifunc i32 (), i32 ()* ()* @foo.resolver +// LINUX: @foo_inline.ifunc = ifunc i32 (), i32 ()* ()* @foo_inline.resolver +// LINUX: @foo_decls.ifunc = ifunc void (), void ()* ()* @foo_decls.resolver +// LINUX: @foo_multi.ifunc = ifunc void (i32, double), void (i32, double)* ()* @foo_multi.resolver + +// LINUX: define i32 @foo.sse4.2() +// LINUX: ret i32 0 +// LINUX: define i32 @foo.arch_ivybridge() +// LINUX: ret i32 1 +// LINUX: define i32 @foo() +// LINUX: ret i32 2 +// LINUX: define i32 @bar() +// LINUX: call i32 @foo.ifunc() + +// WINDOWS: define dso_local i32 @foo.sse4.2() +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @foo.arch_ivybridge() +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @foo() +// WINDOWS: ret i32 2 +// WINDOWS: define dso_local i32 @bar() +// WINDOWS: call i32 @foo.resolver() + +// LINUX: define i32 ()* @foo.resolver() comdat +// LINUX: call void @__cpu_indicator_init() +// LINUX: ret i32 ()* @foo.arch_sandybridge +// LINUX: ret i32 ()* @foo.arch_ivybridge +// LINUX: ret i32 ()* @foo.sse4.2 +// LINUX: ret i32 ()* @foo + +// WINDOWS: define dso_local i32 @foo.resolver() comdat +// WINDOWS: call void @__cpu_indicator_init() +// WINDOWS: call i32 @foo.arch_sandybridge +// WINDOWS: call i32 @foo.arch_ivybridge +// WINDOWS: call i32 @foo.sse4.2 +// WINDOWS: call i32 @foo + +// LINUX: define i32 @bar2() +// LINUX: call i32 @foo_inline.ifunc() + +// WINDOWS: define dso_local i32 @bar2() +// WINDOWS: call i32 @foo_inline.resolver() + +// LINUX: define i32 ()* @foo_inline.resolver() comdat +// LINUX: call void @__cpu_indicator_init() +// LINUX: ret i32 ()* @foo_inline.arch_sandybridge +// LINUX: ret i32 ()* @foo_inline.arch_ivybridge +// LINUX: ret i32 ()* @foo_inline.sse4.2 +// LINUX: ret i32 ()* @foo_inline + +// WINDOWS: define dso_local i32 @foo_inline.resolver() comdat +// WINDOWS: call void @__cpu_indicator_init() +// WINDOWS: call i32 @foo_inline.arch_sandybridge +// WINDOWS: call i32 @foo_inline.arch_ivybridge +// WINDOWS: call i32 @foo_inline.sse4.2 +// WINDOWS: call i32 @foo_inline + +// LINUX: define void @bar3() +// LINUX: call void @foo_decls.ifunc() + +// WINDOWS: define dso_local void @bar3() +// WINDOWS: call void @foo_decls.resolver() + +// LINUX: define void ()* @foo_decls.resolver() comdat +// LINUX: ret void ()* @foo_decls.sse4.2 +// LINUX: ret void ()* @foo_decls + +// WINDOWS: define dso_local void @foo_decls.resolver() comdat +// WINDOWS: call void @foo_decls.sse4.2 +// Windows: call void @foo_decls + +// LINUX: define void @bar4() +// LINUX: call void @foo_multi.ifunc(i32 1, double 5.{{[0+e]*}}) + +// WINDOWS: define dso_local void @bar4() +// WINDOWS: call void @foo_multi.resolver(i32 1, double 5.{{[0+e]*}}) + +// LINUX: define void (i32, double)* @foo_multi.resolver() comdat +// LINUX: and i32 %{{.*}}, 4352 +// LINUX: icmp eq i32 %{{.*}}, 4352 +// LINUX: ret void (i32, double)* @foo_multi.fma4_sse4.2 +// LINUX: icmp eq i32 %{{.*}}, 12 +// LINUX: and i32 %{{.*}}, 4352 +// LINUX: icmp eq i32 %{{.*}}, 4352 +// LINUX: ret void (i32, double)* @foo_multi.arch_ivybridge_fma4_sse4.2 +// LINUX: and i32 %{{.*}}, 768 +// LINUX: icmp eq i32 %{{.*}}, 768 +// LINUX: ret void (i32, double)* @foo_multi.avx_sse4.2 +// LINUX: ret void (i32, double)* @foo_multi + +// WINDOWS: define dso_local void @foo_multi.resolver(i32, double) comdat +// WINDOWS: and i32 %{{.*}}, 4352 +// WINDOWS: icmp eq i32 %{{.*}}, 4352 +// WINDOWS: call void @foo_multi.fma4_sse4.2(i32 %0, double %1) +// WINDOWS-NEXT: ret void +// WINDOWS: icmp eq i32 %{{.*}}, 12 +// WINDOWS: and i32 %{{.*}}, 4352 +// WINDOWS: icmp eq i32 %{{.*}}, 4352 +// WINDOWS: call void @foo_multi.arch_ivybridge_fma4_sse4.2(i32 %0, double %1) +// WINDOWS-NEXT: ret void +// WINDOWS: and i32 %{{.*}}, 768 +// WINDOWS: icmp eq i32 %{{.*}}, 768 +// WINDOWS: call void @foo_multi.avx_sse4.2(i32 %0, double %1) +// WINDOWS-NEXT: ret void +// WINDOWS: call void @foo_multi(i32 %0, double %1) +// WINDOWS-NEXT: ret void + +// LINUX: declare i32 @foo.arch_sandybridge() + +// WINDOWS: declare dso_local i32 @foo.arch_sandybridge() + +// LINUX: define linkonce i32 @foo_inline.sse4.2() +// LINUX: ret i32 0 + +// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.sse4.2() +// WINDOWS: ret i32 0 + +// LINUX: declare i32 @foo_inline.arch_sandybridge() + +// WINDOWS: declare dso_local i32 @foo_inline.arch_sandybridge() + +// LINUX: define linkonce i32 @foo_inline.arch_ivybridge() +// LINUX: ret i32 1 +// LINUX: define linkonce i32 @foo_inline() +// LINUX: ret i32 2 + +// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.arch_ivybridge() +// WINDOWS: ret i32 1 +// WINDOWS: define linkonce_odr dso_local i32 @foo_inline() +// WINDOWS: ret i32 2 + +// LINUX: define linkonce void @foo_decls() +// LINUX: define linkonce void @foo_decls.sse4.2() + +// WINDOWS: define linkonce_odr dso_local void @foo_decls() +// WINDOWS: define linkonce_odr dso_local void @foo_decls.sse4.2() + +// LINUX: define linkonce void @foo_multi.avx_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}}) +// LINUX: define linkonce void @foo_multi.fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}}) +// LINUX: define linkonce void @foo_multi.arch_ivybridge_fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}}) + +// WINDOWS: define linkonce_odr dso_local void @foo_multi.avx_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}}) +// WINDOWS: define linkonce_odr dso_local void @foo_multi.fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}}) +// WINDOWS: define linkonce_odr dso_local void @foo_multi.arch_ivybridge_fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}}) diff --git a/clang/test/CodeGenCXX/attr-target-mv-diff-ns.cpp b/clang/test/CodeGenCXX/attr-target-mv-diff-ns.cpp index 4dc2b67..77e1ad7 100644 --- a/clang/test/CodeGenCXX/attr-target-mv-diff-ns.cpp +++ b/clang/test/CodeGenCXX/attr-target-mv-diff-ns.cpp @@ -1,5 +1,6 @@ -// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s -// Test ensures that this properly differentiates between types in different +// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS +// Test ensures that this properly differentiates between types in different // namespaces. int __attribute__((target("sse4.2"))) foo(int) { return 0; } int __attribute__((target("arch=sandybridge"))) foo(int); @@ -17,38 +18,71 @@ int bar() { return foo(1) + ns::foo(2); } -// CHECK: @_Z3fooi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z3fooi.resolver -// CHECK: @_ZN2ns3fooEi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_ZN2ns3fooEi.resolver - -// CHECK: define i32 @_Z3fooi.sse4.2(i32) -// CHECK: ret i32 0 -// CHECK: define i32 @_Z3fooi.arch_ivybridge(i32) -// CHECK: ret i32 1 -// CHECK: define i32 @_Z3fooi(i32) -// CHECK: ret i32 2 - -// CHECK: define i32 @_ZN2ns3fooEi.sse4.2(i32) -// CHECK: ret i32 0 -// CHECK: define i32 @_ZN2ns3fooEi.arch_ivybridge(i32) -// CHECK: ret i32 1 -// CHECK: define i32 @_ZN2ns3fooEi(i32) -// CHECK: ret i32 2 - -// CHECK: define i32 @_Z3barv() -// CHECK: call i32 @_Z3fooi.ifunc(i32 1) -// CHECK: call i32 @_ZN2ns3fooEi.ifunc(i32 2) - -// CHECK: define i32 (i32)* @_Z3fooi.resolver() comdat -// CHECK: ret i32 (i32)* @_Z3fooi.arch_sandybridge -// CHECK: ret i32 (i32)* @_Z3fooi.arch_ivybridge -// CHECK: ret i32 (i32)* @_Z3fooi.sse4.2 -// CHECK: ret i32 (i32)* @_Z3fooi -// -// CHECK: define i32 (i32)* @_ZN2ns3fooEi.resolver() comdat -// CHECK: ret i32 (i32)* @_ZN2ns3fooEi.arch_sandybridge -// CHECK: ret i32 (i32)* @_ZN2ns3fooEi.arch_ivybridge -// CHECK: ret i32 (i32)* @_ZN2ns3fooEi.sse4.2 -// CHECK: ret i32 (i32)* @_ZN2ns3fooEi - -// CHECK: declare i32 @_Z3fooi.arch_sandybridge(i32) -// CHECK: declare i32 @_ZN2ns3fooEi.arch_sandybridge(i32) +// LINUX: @_Z3fooi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z3fooi.resolver +// LINUX: @_ZN2ns3fooEi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_ZN2ns3fooEi.resolver + +// LINUX: define i32 @_Z3fooi.sse4.2(i32) +// LINUX: ret i32 0 +// LINUX: define i32 @_Z3fooi.arch_ivybridge(i32) +// LINUX: ret i32 1 +// LINUX: define i32 @_Z3fooi(i32) +// LINUX: ret i32 2 + +// WINDOWS: define dso_local i32 @"?foo@@YAHH@Z.sse4.2"(i32) +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @"?foo@@YAHH@Z.arch_ivybridge"(i32) +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @"?foo@@YAHH@Z"(i32) +// WINDOWS: ret i32 2 + +// LINUX: define i32 @_ZN2ns3fooEi.sse4.2(i32) +// LINUX: ret i32 0 +// LINUX: define i32 @_ZN2ns3fooEi.arch_ivybridge(i32) +// LINUX: ret i32 1 +// LINUX: define i32 @_ZN2ns3fooEi(i32) +// LINUX: ret i32 2 + +// WINDOWS: define dso_local i32 @"?foo@ns@@YAHH@Z.sse4.2"(i32) +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @"?foo@ns@@YAHH@Z.arch_ivybridge"(i32) +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @"?foo@ns@@YAHH@Z"(i32) +// WINDOWS: ret i32 2 + +// LINUX: define i32 @_Z3barv() +// LINUX: call i32 @_Z3fooi.ifunc(i32 1) +// LINUX: call i32 @_ZN2ns3fooEi.ifunc(i32 2) + +// WINDOWS: define dso_local i32 @"?bar@@YAHXZ"() +// WINDOWS: call i32 @"?foo@@YAHH@Z.resolver"(i32 1) +// WINDOWS: call i32 @"?foo@ns@@YAHH@Z.resolver"(i32 2) + +// LINUX: define i32 (i32)* @_Z3fooi.resolver() comdat +// LINUX: ret i32 (i32)* @_Z3fooi.arch_sandybridge +// LINUX: ret i32 (i32)* @_Z3fooi.arch_ivybridge +// LINUX: ret i32 (i32)* @_Z3fooi.sse4.2 +// LINUX: ret i32 (i32)* @_Z3fooi + +// WINDOWS: define dso_local i32 @"?foo@@YAHH@Z.resolver"(i32) comdat +// WINDOWS: call i32 @"?foo@@YAHH@Z.arch_sandybridge"(i32 %0) +// WINDOWS: call i32 @"?foo@@YAHH@Z.arch_ivybridge"(i32 %0) +// WINDOWS: call i32 @"?foo@@YAHH@Z.sse4.2"(i32 %0) +// WINDOWS: call i32 @"?foo@@YAHH@Z"(i32 %0) + +// LINUX: define i32 (i32)* @_ZN2ns3fooEi.resolver() comdat +// LINUX: ret i32 (i32)* @_ZN2ns3fooEi.arch_sandybridge +// LINUX: ret i32 (i32)* @_ZN2ns3fooEi.arch_ivybridge +// LINUX: ret i32 (i32)* @_ZN2ns3fooEi.sse4.2 +// LINUX: ret i32 (i32)* @_ZN2ns3fooEi + +// WINDOWS: define dso_local i32 @"?foo@ns@@YAHH@Z.resolver"(i32) comdat +// WINDOWS: call i32 @"?foo@ns@@YAHH@Z.arch_sandybridge"(i32 %0) +// WINDOWS: call i32 @"?foo@ns@@YAHH@Z.arch_ivybridge"(i32 %0) +// WINDOWS: call i32 @"?foo@ns@@YAHH@Z.sse4.2"(i32 %0) +// WINDOWS: call i32 @"?foo@ns@@YAHH@Z"(i32 %0) + +// LINUX: declare i32 @_Z3fooi.arch_sandybridge(i32) +// LINUX: declare i32 @_ZN2ns3fooEi.arch_sandybridge(i32) + +// WINDOWS: declare dso_local i32 @"?foo@@YAHH@Z.arch_sandybridge"(i32) +// WINDOWS: declare dso_local i32 @"?foo@ns@@YAHH@Z.arch_sandybridge"(i32) diff --git a/clang/test/CodeGenCXX/attr-target-mv-func-ptrs.cpp b/clang/test/CodeGenCXX/attr-target-mv-func-ptrs.cpp index 290d6b5..6336e19 100644 --- a/clang/test/CodeGenCXX/attr-target-mv-func-ptrs.cpp +++ b/clang/test/CodeGenCXX/attr-target-mv-func-ptrs.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS void temp(); void temp(int); using FP = void(*)(int); @@ -31,15 +32,23 @@ int bar() { return Free(1) + (s.*Member)(2); } +// LINUX: @_Z3fooi.ifunc +// LINUX: @_ZN1S3fooEi.ifunc -// CHECK: @_Z3fooi.ifunc -// CHECK: @_ZN1S3fooEi.ifunc - -// CHECK: define i32 @_Z3barv() +// LINUX: define i32 @_Z3barv() // Store to Free of ifunc -// CHECK: store i32 (i32)* @_Z3fooi.ifunc +// LINUX: store i32 (i32)* @_Z3fooi.ifunc // Store to Member of ifunc -// CHECK: store { i64, i64 } { i64 ptrtoint (i32 (%struct.S*, i32)* @_ZN1S3fooEi.ifunc to i64), i64 0 }, { i64, i64 }* [[MEMBER:%[a-z]+]] +// LINUX: store { i64, i64 } { i64 ptrtoint (i32 (%struct.S*, i32)* @_ZN1S3fooEi.ifunc to i64), i64 0 }, { i64, i64 }* [[MEMBER:%[a-z]+]] // Call to 'f' with the ifunc -// CHECK: call void @_Z1fPFiiEM1SFiiE(i32 (i32)* @_Z3fooi.ifunc +// LINUX: call void @_Z1fPFiiEM1SFiiE(i32 (i32)* @_Z3fooi.ifunc + +// WINDOWS: define dso_local i32 @"?bar@@YAHXZ"() +// Store to Free +// WINDOWS: store i32 (i32)* @"?foo@@YAHH@Z.resolver", i32 (i32)** +// Store to Member +// WINDOWS: store i8* bitcast (i32 (%struct.S*, i32)* @"?foo@S@@QEAAHH@Z.resolver" to i8*), i8** + +// Call to 'f' +// WINDOWS: call void @"?f@@YAXP6AHH@ZP8S@@EAAHH@Z@Z"(i32 (i32)* @"?foo@@YAHH@Z.resolver", i8* bitcast (i32 (%struct.S*, i32)* @"?foo@S@@QEAAHH@Z.resolver" to i8*)) diff --git a/clang/test/CodeGenCXX/attr-target-mv-inalloca.cpp b/clang/test/CodeGenCXX/attr-target-mv-inalloca.cpp new file mode 100644 index 0000000..0b65622 --- /dev/null +++ b/clang/test/CodeGenCXX/attr-target-mv-inalloca.cpp @@ -0,0 +1,81 @@ +// RUN: %clang_cc1 -std=c++11 -triple i686-windows-msvc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-msvc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS64 + +struct Foo { + Foo(); + Foo(const Foo &o); + ~Foo(); + int x; +}; +int __attribute__((target("default"))) bar(Foo o) { return o.x; } +int __attribute__((target("sse4.2"))) bar(Foo o) { return o.x + 1; } +int __attribute__((target("arch=ivybridge"))) bar(Foo o) { return o.x + 2; } + +void usage() { + Foo f; + bar(f); +} + +// WINDOWS: define dso_local i32 @"?bar@@YAHUFoo@@@Z"(<{ %struct.Foo }>* inalloca) +// WINDOWS: %[[O:[0-9a-zA-Z]+]] = getelementptr inbounds <{ %struct.Foo }>, <{ %struct.Foo }>* %0, i32 0, i32 0 +// WINDOWS: %[[X:[0-9a-zA-Z]+]] = getelementptr inbounds %struct.Foo, %struct.Foo* %[[O]], i32 0, i32 0 +// WINDOWS: %[[LOAD:[0-9a-zA-Z]+]] = load i32, i32* %[[X]] +// WINDOWS: ret i32 %[[LOAD]] + +// WINDOWS: define dso_local i32 @"?bar@@YAHUFoo@@@Z.sse4.2"(<{ %struct.Foo }>* inalloca) +// WINDOWS: %[[O:[0-9a-zA-Z]+]] = getelementptr inbounds <{ %struct.Foo }>, <{ %struct.Foo }>* %0, i32 0, i32 0 +// WINDOWS: %[[X:[0-9a-zA-Z]+]] = getelementptr inbounds %struct.Foo, %struct.Foo* %[[O]], i32 0, i32 0 +// WINDOWS: %[[LOAD:[0-9a-zA-Z]+]] = load i32, i32* %[[X]] +// WINDOWS: %[[ADD:[0-9a-zA-Z]+]] = add nsw i32 %[[LOAD]], 1 +// WINDOWS: ret i32 %[[ADD]] + +// WINDOWS: define dso_local i32 @"?bar@@YAHUFoo@@@Z.arch_ivybridge"(<{ %struct.Foo }>* inalloca) +// WINDOWS: %[[O:[0-9a-zA-Z]+]] = getelementptr inbounds <{ %struct.Foo }>, <{ %struct.Foo }>* %0, i32 0, i32 0 +// WINDOWS: %[[X:[0-9a-zA-Z]+]] = getelementptr inbounds %struct.Foo, %struct.Foo* %[[O]], i32 0, i32 0 +// WINDOWS: %[[LOAD:[0-9a-zA-Z]+]] = load i32, i32* %[[X]] +// WINDOWS: %[[ADD:[0-9a-zA-Z]+]] = add nsw i32 %[[LOAD]], 2 +// WINDOWS: ret i32 %[[ADD]] + +// WINDOWS: define dso_local void @"?usage@@YAXXZ"() +// WINDOWS: %[[F:[0-9a-zA-Z]+]] = alloca %struct.Foo +// WINDOWS: %[[ARGMEM:[0-9a-zA-Z]+]] = alloca inalloca <{ %struct.Foo }> +// WINDOWS: %[[CALL:[0-9a-zA-Z]+]] = call i32 @"?bar@@YAHUFoo@@@Z.resolver"(<{ %struct.Foo }>* inalloca %[[ARGMEM]]) + +// WINDOWS: define dso_local i32 @"?bar@@YAHUFoo@@@Z.resolver"(<{ %struct.Foo }>*) +// WINDOWS: %[[RET:[0-9a-zA-Z]+]] = musttail call i32 @"?bar@@YAHUFoo@@@Z.arch_ivybridge"(<{ %struct.Foo }>* %0) +// WINDOWS-NEXT: ret i32 %[[RET]] +// WINDOWS: %[[RET:[0-9a-zA-Z]+]] = musttail call i32 @"?bar@@YAHUFoo@@@Z.sse4.2"(<{ %struct.Foo }>* %0) +// WINDOWS-NEXT: ret i32 %[[RET]] +// WINDOWS: %[[RET:[0-9a-zA-Z]+]] = musttail call i32 @"?bar@@YAHUFoo@@@Z"(<{ %struct.Foo }>* %0) +// WINDOWS-NEXT: ret i32 %[[RET]] + + +// WINDOWS64: define dso_local i32 @"?bar@@YAHUFoo@@@Z"(%struct.Foo* %[[O:[0-9a-zA-Z]+]]) +// WINDOWS64: %[[X:[0-9a-zA-Z]+]] = getelementptr inbounds %struct.Foo, %struct.Foo* %[[O]], i32 0, i32 0 +// WINDOWS64: %[[LOAD:[0-9a-zA-Z]+]] = load i32, i32* %[[X]] +// WINDOWS64: ret i32 %[[LOAD]] + +// WINDOWS64: define dso_local i32 @"?bar@@YAHUFoo@@@Z.sse4.2"(%struct.Foo* %[[O:[0-9a-zA-Z]+]]) +// WINDOWS64: %[[X:[0-9a-zA-Z]+]] = getelementptr inbounds %struct.Foo, %struct.Foo* %[[O]], i32 0, i32 0 +// WINDOWS64: %[[LOAD:[0-9a-zA-Z]+]] = load i32, i32* %[[X]] +// WINDOWS64: %[[ADD:[0-9a-zA-Z]+]] = add nsw i32 %[[LOAD]], 1 +// WINDOWS64: ret i32 %[[ADD]] + +// WINDOWS64: define dso_local i32 @"?bar@@YAHUFoo@@@Z.arch_ivybridge"(%struct.Foo* %[[O:[0-9a-zA-Z]+]]) +// WINDOWS64: %[[X:[0-9a-zA-Z]+]] = getelementptr inbounds %struct.Foo, %struct.Foo* %[[O]], i32 0, i32 0 +// WINDOWS64: %[[LOAD:[0-9a-zA-Z]+]] = load i32, i32* %[[X]] +// WINDOWS64: %[[ADD:[0-9a-zA-Z]+]] = add nsw i32 %[[LOAD]], 2 +// WINDOWS64: ret i32 %[[ADD]] + +// WINDOWS64: define dso_local void @"?usage@@YAXXZ"() +// WINDOWS64: %[[F:[0-9a-zA-Z]+]] = alloca %struct.Foo +// WINDOWS64: %[[ARG:[0-9a-zA-Z.]+]] = alloca %struct.Foo +// WINDOWS64: %[[CALL:[0-9a-zA-Z]+]] = call i32 @"?bar@@YAHUFoo@@@Z.resolver"(%struct.Foo* %[[ARG]]) + +// WINDOWS64: define dso_local i32 @"?bar@@YAHUFoo@@@Z.resolver"(%struct.Foo*) +// WINDOWS64: %[[RET:[0-9a-zA-Z]+]] = musttail call i32 @"?bar@@YAHUFoo@@@Z.arch_ivybridge"(%struct.Foo* %0) +// WINDOWS64-NEXT: ret i32 %[[RET]] +// WINDOWS64: %[[RET:[0-9a-zA-Z]+]] = musttail call i32 @"?bar@@YAHUFoo@@@Z.sse4.2"(%struct.Foo* %0) +// WINDOWS64-NEXT: ret i32 %[[RET]] +// WINDOWS64: %[[RET:[0-9a-zA-Z]+]] = musttail call i32 @"?bar@@YAHUFoo@@@Z"(%struct.Foo* %0) +// WINDOWS64-NEXT: ret i32 %[[RET]] diff --git a/clang/test/CodeGenCXX/attr-target-mv-member-funcs.cpp b/clang/test/CodeGenCXX/attr-target-mv-member-funcs.cpp index 622b738..a63737e 100644 --- a/clang/test/CodeGenCXX/attr-target-mv-member-funcs.cpp +++ b/clang/test/CodeGenCXX/attr-target-mv-member-funcs.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS struct S { int __attribute__((target("sse4.2"))) foo(int) { return 0; } @@ -64,82 +65,156 @@ int templ_use() { return a.foo(1) + b.foo(2); } -// CHECK: @_ZN1SaSERKS_.ifunc = ifunc %struct.S* (%struct.S*, %struct.S*), %struct.S* (%struct.S*, %struct.S*)* ()* @_ZN1SaSERKS_.resolver -// CHECK: @_ZNK9ConvertTocv1SEv.ifunc = ifunc void (%struct.ConvertTo*), void (%struct.ConvertTo*)* ()* @_ZNK9ConvertTocv1SEv.resolver -// CHECK: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver -// CHECK: @_ZN2S23fooEi.ifunc = ifunc i32 (%struct.S2*, i32), i32 (%struct.S2*, i32)* ()* @_ZN2S23fooEi.resolver +// LINUX: @_ZN1SaSERKS_.ifunc = ifunc %struct.S* (%struct.S*, %struct.S*), %struct.S* (%struct.S*, %struct.S*)* ()* @_ZN1SaSERKS_.resolver +// LINUX: @_ZNK9ConvertTocv1SEv.ifunc = ifunc void (%struct.ConvertTo*), void (%struct.ConvertTo*)* ()* @_ZNK9ConvertTocv1SEv.resolver +// LINUX: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver +// LINUX: @_ZN2S23fooEi.ifunc = ifunc i32 (%struct.S2*, i32), i32 (%struct.S2*, i32)* ()* @_ZN2S23fooEi.resolver // Templates: -// CHECK: @_ZN5templIiE3fooEi.ifunc = ifunc i32 (%struct.templ*, i32), i32 (%struct.templ*, i32)* ()* @_ZN5templIiE3fooEi.resolver -// CHECK: @_ZN5templIdE3fooEi.ifunc = ifunc i32 (%struct.templ.0*, i32), i32 (%struct.templ.0*, i32)* ()* @_ZN5templIdE3fooEi.resolver - -// CHECK: define i32 @_Z3barv() -// CHECK: %s = alloca %struct.S, align 1 -// CHECK: %s2 = alloca %struct.S, align 1 -// CHECK: %C = alloca %struct.ConvertTo, align 1 -// CHECK: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2 -// CHECK: call void @_ZNK9ConvertTocv1SEv.ifunc(%struct.ConvertTo* %C) -// CHECK: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2 -// CHECK: call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0) - -// CHECK: define %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.resolver() comdat -// CHECK: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.arch_ivybridge -// CHECK: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_ - -// CHECK: define void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.resolver() comdat -// CHECK: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.arch_ivybridge -// CHECK: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv - -// CHECK: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat -// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge -// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge -// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2 -// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi - -// CHECK: define i32 @_Z4bar2v() -// CHECK:call i32 @_ZN2S23fooEi.ifunc -// define i32 (%struct.S2*, i32)* @_ZN2S23fooEi.resolver() comdat -// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_sandybridge -// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_ivybridge -// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.sse4.2 -// CHECK: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi - -// CHECK: define i32 @_ZN2S23fooEi.sse4.2(%struct.S2* %this, i32) -// CHECK: define i32 @_ZN2S23fooEi.arch_ivybridge(%struct.S2* %this, i32) -// CHECK: define i32 @_ZN2S23fooEi(%struct.S2* %this, i32) - -// CHECK: define i32 @_Z9templ_usev() -// CHECK: call i32 @_ZN5templIiE3fooEi.ifunc -// CHECK: call i32 @_ZN5templIdE3fooEi.ifunc - -// CHECK: define i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.resolver() comdat -// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_sandybridge -// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_ivybridge -// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.sse4.2 -// CHECK: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi - -// CHECK: define i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.resolver() comdat -// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_sandybridge -// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_ivybridge -// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.sse4.2 -// CHECK: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi - -// CHECK: define linkonce_odr i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32) -// CHECK: ret i32 0 - -// CHECK: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32) - -// CHECK: define linkonce_odr i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32) -// CHECK: ret i32 1 - -// CHECK: define linkonce_odr i32 @_ZN1S3fooEi(%struct.S* %this, i32) -// CHECK: ret i32 2 - -// CHECK: define linkonce_odr i32 @_ZN5templIiE3fooEi.sse4.2 -// CHECK: declare i32 @_ZN5templIiE3fooEi.arch_sandybridge -// CHECK: define linkonce_odr i32 @_ZN5templIiE3fooEi.arch_ivybridge -// CHECK: define linkonce_odr i32 @_ZN5templIiE3fooEi - -// CHECK: define linkonce_odr i32 @_ZN5templIdE3fooEi.sse4.2 -// CHECK: declare i32 @_ZN5templIdE3fooEi.arch_sandybridge -// CHECK: define linkonce_odr i32 @_ZN5templIdE3fooEi.arch_ivybridge -// CHECK: define linkonce_odr i32 @_ZN5templIdE3fooEi +// LINUX: @_ZN5templIiE3fooEi.ifunc = ifunc i32 (%struct.templ*, i32), i32 (%struct.templ*, i32)* ()* @_ZN5templIiE3fooEi.resolver +// LINUX: @_ZN5templIdE3fooEi.ifunc = ifunc i32 (%struct.templ.0*, i32), i32 (%struct.templ.0*, i32)* ()* @_ZN5templIdE3fooEi.resolver + +// LINUX: define i32 @_Z3barv() +// LINUX: %s = alloca %struct.S, align 1 +// LINUX: %s2 = alloca %struct.S, align 1 +// LINUX: %C = alloca %struct.ConvertTo, align 1 +// LINUX: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2 +// LINUX: call void @_ZNK9ConvertTocv1SEv.ifunc(%struct.ConvertTo* %C) +// LINUX: call dereferenceable(1) %struct.S* @_ZN1SaSERKS_.ifunc(%struct.S* %s2 +// LINUX: call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0) + +// WINDOWS: define dso_local i32 @"?bar@@YAHXZ"() +// WINDOWS: %s = alloca %struct.S, align 1 +// WINDOWS: %s2 = alloca %struct.S, align 1 +// WINDOWS: %C = alloca %struct.ConvertTo, align 1 +// WINDOWS: call dereferenceable(1) %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z.resolver"(%struct.S* %s2 +// WINDOWS: call void @"??BConvertTo@@QEBA?AUS@@XZ.resolver"(%struct.ConvertTo* %C +// WINDOWS: call dereferenceable(1) %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z.resolver"(%struct.S* %s2 +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.resolver"(%struct.S* %s, i32 0) + +// LINUX: define %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.resolver() comdat +// LINUX: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_.arch_ivybridge +// LINUX: ret %struct.S* (%struct.S*, %struct.S*)* @_ZN1SaSERKS_ + +// WINDOWS: define dso_local %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z.resolver"(%struct.S*, %struct.S*) +// WINDOWS: call %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z.arch_ivybridge" +// WINDOWS: call %struct.S* @"??4S@@QEAAAEAU0@AEBU0@@Z" + +// LINUX: define void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.resolver() comdat +// LINUX: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv.arch_ivybridge +// LINUX: ret void (%struct.ConvertTo*)* @_ZNK9ConvertTocv1SEv + +// WINDOWS: define dso_local void @"??BConvertTo@@QEBA?AUS@@XZ.resolver"(%struct.ConvertTo*, %struct.S*) +// WINDOWS: call void @"??BConvertTo@@QEBA?AUS@@XZ.arch_ivybridge" +// WINDOWS: call void @"??BConvertTo@@QEBA?AUS@@XZ" + +// LINUX: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat +// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge +// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge +// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2 +// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi + +// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z.resolver"(%struct.S*, i32) +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.arch_sandybridge" +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.arch_ivybridge" +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.sse4.2" +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z" + +// LINUX: define i32 @_Z4bar2v() +// LINUX: call i32 @_ZN2S23fooEi.ifunc + +// WINDOWS: define dso_local i32 @"?bar2@@YAHXZ"() +// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z.resolver" + +// LINUX: define i32 (%struct.S2*, i32)* @_ZN2S23fooEi.resolver() comdat +// LINUX: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_sandybridge +// LINUX: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.arch_ivybridge +// LINUX: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi.sse4.2 +// LINUX: ret i32 (%struct.S2*, i32)* @_ZN2S23fooEi + +// WINDOWS: define dso_local i32 @"?foo@S2@@QEAAHH@Z.resolver"(%struct.S2*, i32) +// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z.arch_sandybridge" +// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z.arch_ivybridge" +// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z.sse4.2" +// WINDOWS: call i32 @"?foo@S2@@QEAAHH@Z" + +// LINUX: define i32 @_ZN2S23fooEi.sse4.2(%struct.S2* %this, i32) +// LINUX: define i32 @_ZN2S23fooEi.arch_ivybridge(%struct.S2* %this, i32) +// LINUX: define i32 @_ZN2S23fooEi(%struct.S2* %this, i32) + +// WINDOWS: define dso_local i32 @"?foo@S2@@QEAAHH@Z.sse4.2"(%struct.S2* %this, i32) +// WINDOWS: define dso_local i32 @"?foo@S2@@QEAAHH@Z.arch_ivybridge"(%struct.S2* %this, i32) +// WINDOWS: define dso_local i32 @"?foo@S2@@QEAAHH@Z"(%struct.S2* %this, i32) + +// LINUX: define i32 @_Z9templ_usev() +// LINUX: call i32 @_ZN5templIiE3fooEi.ifunc +// LINUX: call i32 @_ZN5templIdE3fooEi.ifunc + +// WINDOWS: define dso_local i32 @"?templ_use@@YAHXZ"() +// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z.resolver" +// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z.resolver" + +// LINUX: define i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.resolver() comdat +// LINUX: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_sandybridge +// LINUX: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.arch_ivybridge +// LINUX: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi.sse4.2 +// LINUX: ret i32 (%struct.templ*, i32)* @_ZN5templIiE3fooEi + +// WINDOWS: define dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z.resolver"(%struct.templ*, i32) +// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z.arch_sandybridge" +// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z.arch_ivybridge" +// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z.sse4.2" +// WINDOWS: call i32 @"?foo@?$templ@H@@QEAAHH@Z" + +// LINUX: define i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.resolver() comdat +// LINUX: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_sandybridge +// LINUX: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.arch_ivybridge +// LINUX: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi.sse4.2 +// LINUX: ret i32 (%struct.templ.0*, i32)* @_ZN5templIdE3fooEi + +// WINDOWS: define dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z.resolver"(%struct.templ.0*, i32) comdat +// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z.arch_sandybridge" +// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z.arch_ivybridge" +// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z.sse4.2" +// WINDOWS: call i32 @"?foo@?$templ@N@@QEAAHH@Z" + +// LINUX: define linkonce_odr i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32) +// LINUX: ret i32 0 + +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@S@@QEAAHH@Z.sse4.2"(%struct.S* %this, i32) +// WINDOWS: ret i32 0 + +// LINUX: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32) + +// WINDOWS: declare dso_local i32 @"?foo@S@@QEAAHH@Z.arch_sandybridge"(%struct.S*, i32) + +// LINUX: define linkonce_odr i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32) +// LINUX: ret i32 1 + +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@S@@QEAAHH@Z.arch_ivybridge"(%struct.S* %this, i32) +// WINDOWS: ret i32 1 + +// LINUX: define linkonce_odr i32 @_ZN1S3fooEi(%struct.S* %this, i32) +// LINUX: ret i32 2 + +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@S@@QEAAHH@Z"(%struct.S* %this, i32) +// WINDOWS: ret i32 2 + +// LINUX: define linkonce_odr i32 @_ZN5templIiE3fooEi.sse4.2 +// LINUX: declare i32 @_ZN5templIiE3fooEi.arch_sandybridge +// LINUX: define linkonce_odr i32 @_ZN5templIiE3fooEi.arch_ivybridge +// LINUX: define linkonce_odr i32 @_ZN5templIiE3fooEi + +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z.sse4.2" +// WINDOWS: declare dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z.arch_sandybridge" +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z.arch_ivybridge" +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@H@@QEAAHH@Z" + +// LINUX: define linkonce_odr i32 @_ZN5templIdE3fooEi.sse4.2 +// LINUX: declare i32 @_ZN5templIdE3fooEi.arch_sandybridge +// LINUX: define linkonce_odr i32 @_ZN5templIdE3fooEi.arch_ivybridge +// LINUX: define linkonce_odr i32 @_ZN5templIdE3fooEi + +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z.sse4.2" +// WINDOWS: declare dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z.arch_sandybridge" +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z.arch_ivybridge" +// WINDOWS: define linkonce_odr dso_local i32 @"?foo@?$templ@N@@QEAAHH@Z" diff --git a/clang/test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp b/clang/test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp index 63353c1..1c051b3 100644 --- a/clang/test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp +++ b/clang/test/CodeGenCXX/attr-target-mv-out-of-line-defs.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS struct S { int __attribute__((target("sse4.2"))) foo(int); int __attribute__((target("arch=sandybridge"))) foo(int); @@ -15,25 +16,46 @@ int bar() { return s.foo(0); } -// CHECK: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver +// LINUX: @_ZN1S3fooEi.ifunc = ifunc i32 (%struct.S*, i32), i32 (%struct.S*, i32)* ()* @_ZN1S3fooEi.resolver -// CHECK: define i32 @_ZN1S3fooEi(%struct.S* %this, i32) -// CHECK: ret i32 2 +// LINUX: define i32 @_ZN1S3fooEi(%struct.S* %this, i32) +// LINUX: ret i32 2 -// CHECK: define i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32) -// CHECK: ret i32 0 +// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z"(%struct.S* %this, i32) +// WINDOWS: ret i32 2 -// CHECK: define i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32) -// CHECK: ret i32 1 +// LINUX: define i32 @_ZN1S3fooEi.sse4.2(%struct.S* %this, i32) +// LINUX: ret i32 0 -// CHECK: define i32 @_Z3barv() -// CHECK: %s = alloca %struct.S, align 1 -// CHECK: %call = call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0) +// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z.sse4.2"(%struct.S* %this, i32) +// WINDOWS: ret i32 0 -// CHECK: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat -// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge -// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge -// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2 -// CHECK: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi +// LINUX: define i32 @_ZN1S3fooEi.arch_ivybridge(%struct.S* %this, i32) +// LINUX: ret i32 1 -// CHECK: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32) +// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z.arch_ivybridge"(%struct.S* %this, i32) +// WINDOWS: ret i32 1 + +// LINUX: define i32 @_Z3barv() +// LINUX: %s = alloca %struct.S, align 1 +// LINUX: %call = call i32 @_ZN1S3fooEi.ifunc(%struct.S* %s, i32 0) + +// WINDOWS: define dso_local i32 @"?bar@@YAHXZ"() +// WINDOWS: %s = alloca %struct.S, align 1 +// WINDOWS: %call = call i32 @"?foo@S@@QEAAHH@Z.resolver"(%struct.S* %s, i32 0) + +// LINUX: define i32 (%struct.S*, i32)* @_ZN1S3fooEi.resolver() comdat +// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_sandybridge +// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.arch_ivybridge +// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi.sse4.2 +// LINUX: ret i32 (%struct.S*, i32)* @_ZN1S3fooEi + +// WINDOWS: define dso_local i32 @"?foo@S@@QEAAHH@Z.resolver"(%struct.S*, i32) comdat +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.arch_sandybridge"(%struct.S* %0, i32 %1) +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.arch_ivybridge"(%struct.S* %0, i32 %1) +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z.sse4.2"(%struct.S* %0, i32 %1) +// WINDOWS: call i32 @"?foo@S@@QEAAHH@Z"(%struct.S* %0, i32 %1) + +// LINUX: declare i32 @_ZN1S3fooEi.arch_sandybridge(%struct.S*, i32) + +// WINDOWS: declare dso_local i32 @"?foo@S@@QEAAHH@Z.arch_sandybridge"(%struct.S*, i32) diff --git a/clang/test/CodeGenCXX/attr-target-mv-overloads.cpp b/clang/test/CodeGenCXX/attr-target-mv-overloads.cpp index c72ea77..a213d24 100644 --- a/clang/test/CodeGenCXX/attr-target-mv-overloads.cpp +++ b/clang/test/CodeGenCXX/attr-target-mv-overloads.cpp @@ -1,4 +1,5 @@ -// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS int __attribute__((target("sse4.2"))) foo_overload(int) { return 0; } int __attribute__((target("arch=sandybridge"))) foo_overload(int); @@ -13,38 +14,69 @@ int bar2() { return foo_overload() + foo_overload(1); } -// CHECK: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver -// CHECK: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver - - -// CHECK: define i32 @_Z12foo_overloadi.sse4.2(i32) -// CHECK: ret i32 0 -// CHECK: define i32 @_Z12foo_overloadi.arch_ivybridge(i32) -// CHECK: ret i32 1 -// CHECK: define i32 @_Z12foo_overloadi(i32) -// CHECK: ret i32 2 -// CHECK: define i32 @_Z12foo_overloadv.sse4.2() -// CHECK: ret i32 0 -// CHECK: define i32 @_Z12foo_overloadv.arch_ivybridge() -// CHECK: ret i32 1 -// CHECK: define i32 @_Z12foo_overloadv() -// CHECK: ret i32 2 - -// CHECK: define i32 @_Z4bar2v() -// CHECK: call i32 @_Z12foo_overloadv.ifunc() -// CHECK: call i32 @_Z12foo_overloadi.ifunc(i32 1) - -// CHECK: define i32 ()* @_Z12foo_overloadv.resolver() comdat -// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_sandybridge -// CHECK: ret i32 ()* @_Z12foo_overloadv.arch_ivybridge -// CHECK: ret i32 ()* @_Z12foo_overloadv.sse4.2 -// CHECK: ret i32 ()* @_Z12foo_overloadv - -// CHECK: define i32 (i32)* @_Z12foo_overloadi.resolver() comdat -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_sandybridge -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.arch_ivybridge -// CHECK: ret i32 (i32)* @_Z12foo_overloadi.sse4.2 -// CHECK: ret i32 (i32)* @_Z12foo_overloadi - -// CHECK: declare i32 @_Z12foo_overloadv.arch_sandybridge() -// CHECK: declare i32 @_Z12foo_overloadi.arch_sandybridge(i32) +// LINUX: @_Z12foo_overloadv.ifunc = ifunc i32 (), i32 ()* ()* @_Z12foo_overloadv.resolver +// LINUX: @_Z12foo_overloadi.ifunc = ifunc i32 (i32), i32 (i32)* ()* @_Z12foo_overloadi.resolver + +// LINUX: define i32 @_Z12foo_overloadi.sse4.2(i32) +// LINUX: ret i32 0 +// LINUX: define i32 @_Z12foo_overloadi.arch_ivybridge(i32) +// LINUX: ret i32 1 +// LINUX: define i32 @_Z12foo_overloadi(i32) +// LINUX: ret i32 2 +// LINUX: define i32 @_Z12foo_overloadv.sse4.2() +// LINUX: ret i32 0 +// LINUX: define i32 @_Z12foo_overloadv.arch_ivybridge() +// LINUX: ret i32 1 +// LINUX: define i32 @_Z12foo_overloadv() +// LINUX: ret i32 2 + +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.sse4.2"(i32) +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.arch_ivybridge"(i32) +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z"(i32) +// WINDOWS: ret i32 2 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.sse4.2"() +// WINDOWS: ret i32 0 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.arch_ivybridge"() +// WINDOWS: ret i32 1 +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ"() +// WINDOWS: ret i32 2 + +// LINUX: define i32 @_Z4bar2v() +// LINUX: call i32 @_Z12foo_overloadv.ifunc() +// LINUX: call i32 @_Z12foo_overloadi.ifunc(i32 1) + +// WINDOWS: define dso_local i32 @"?bar2@@YAHXZ"() +// WINDOWS: call i32 @"?foo_overload@@YAHXZ.resolver"() +// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.resolver"(i32 1) + +// LINUX: define i32 ()* @_Z12foo_overloadv.resolver() comdat +// LINUX: ret i32 ()* @_Z12foo_overloadv.arch_sandybridge +// LINUX: ret i32 ()* @_Z12foo_overloadv.arch_ivybridge +// LINUX: ret i32 ()* @_Z12foo_overloadv.sse4.2 +// LINUX: ret i32 ()* @_Z12foo_overloadv + +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHXZ.resolver"() comdat +// WINDOWS: call i32 @"?foo_overload@@YAHXZ.arch_sandybridge" +// WINDOWS: call i32 @"?foo_overload@@YAHXZ.arch_ivybridge" +// WINDOWS: call i32 @"?foo_overload@@YAHXZ.sse4.2" +// WINDOWS: call i32 @"?foo_overload@@YAHXZ" + +// LINUX: define i32 (i32)* @_Z12foo_overloadi.resolver() comdat +// LINUX: ret i32 (i32)* @_Z12foo_overloadi.arch_sandybridge +// LINUX: ret i32 (i32)* @_Z12foo_overloadi.arch_ivybridge +// LINUX: ret i32 (i32)* @_Z12foo_overloadi.sse4.2 +// LINUX: ret i32 (i32)* @_Z12foo_overloadi + +// WINDOWS: define dso_local i32 @"?foo_overload@@YAHH@Z.resolver"(i32) comdat +// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_sandybridge" +// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.arch_ivybridge" +// WINDOWS: call i32 @"?foo_overload@@YAHH@Z.sse4.2" +// WINDOWS: call i32 @"?foo_overload@@YAHH@Z" + +// LINUX: declare i32 @_Z12foo_overloadv.arch_sandybridge() +// LINUX: declare i32 @_Z12foo_overloadi.arch_sandybridge(i32) + +// WINDOWS: declare dso_local i32 @"?foo_overload@@YAHXZ.arch_sandybridge"() +// WINDOWS: declare dso_local i32 @"?foo_overload@@YAHH@Z.arch_sandybridge"(i32) diff --git a/clang/test/Sema/attr-target-mv-bad-target.c b/clang/test/Sema/attr-target-mv-bad-target.c index 9cf3c5e..4d61872 100644 --- a/clang/test/Sema/attr-target-mv-bad-target.c +++ b/clang/test/Sema/attr-target-mv-bad-target.c @@ -1,4 +1,3 @@ -// RUN: %clang_cc1 -triple x86_64-windows-pc -fsyntax-only -verify %s // RUN: %clang_cc1 -triple arm-none-eabi -fsyntax-only -verify %s int __attribute__((target("sse4.2"))) redecl1(void) { return 1; } -- 2.7.4