From: Samuel Antao Date: Thu, 3 Mar 2016 16:20:23 +0000 (+0000) Subject: [OpenMP] Code generation for teams - kernel launching X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b68e2db8f9d03e804e0dc59e1aefc9796d7199dd;p=platform%2Fupstream%2Fllvm.git [OpenMP] Code generation for teams - kernel launching Summary: This patch implements the launching of a target region in the presence of a nested teams region, i.e calls tgt_target_teams with the required arguments gathered from the enclosed teams directive. The actual codegen of the region enclosed by the teams construct will be contributed in a separate patch. Reviewers: hfinkel, arpith-jacob, kkwli0, carlo.bertolli, ABataev Subscribers: cfe-commits, caomhin, fraggamuffin Differential Revision: http://reviews.llvm.org/D17019 llvm-svn: 262625 --- diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index c4eaabe..0433115 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -252,6 +252,70 @@ private: StringRef HelperName; }; +static void EmptyCodeGen(CodeGenFunction &) { + llvm_unreachable("No codegen for expressions"); +} +/// \brief API for generation of expressions captured in a innermost OpenMP +/// region. +class CGOpenMPInnerExprInfo : public CGOpenMPInlinedRegionInfo { +public: + CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) + : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, + OMPD_unknown, + /*HasCancel=*/false), + PrivScope(CGF) { + // Make sure the globals captured in the provided statement are local by + // using the privatization logic. We assume the same variable is not + // captured more than once. + for (auto &C : CS.captures()) { + if (!C.capturesVariable() && !C.capturesVariableByCopy()) + continue; + + const VarDecl *VD = C.getCapturedVar(); + if (VD->isLocalVarDeclOrParm()) + continue; + + DeclRefExpr DRE(const_cast(VD), + /*RefersToEnclosingVariableOrCapture=*/false, + VD->getType().getNonReferenceType(), VK_LValue, + SourceLocation()); + PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { + return CGF.EmitLValue(&DRE).getAddress(); + }); + } + (void)PrivScope.Privatize(); + } + + /// \brief Lookup the captured field decl for a variable. + const FieldDecl *lookup(const VarDecl *VD) const override { + if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) + return FD; + return nullptr; + } + + /// \brief Emit the captured statement body. + void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { + llvm_unreachable("No body for expressions"); + } + + /// \brief Get a variable or parameter for storing global thread id + /// inside OpenMP construct. + const VarDecl *getThreadIDVariable() const override { + llvm_unreachable("No thread id for expressions"); + } + + /// \brief Get the name of the capture helper. + StringRef getHelperName() const override { + llvm_unreachable("No helper name for expressions"); + } + + static bool classof(const CGCapturedStmtInfo *Info) { return false; } + +private: + /// Private scope to capture global variables. + CodeGenFunction::OMPPrivateScope PrivScope; +}; + /// \brief RAII for emitting code of OpenMP constructs. class InlinedOpenMPRegionRAII { CodeGenFunction &CGF; @@ -481,6 +545,10 @@ enum OpenMPRTLFunction { // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t // *arg_types); OMPRTL__tgt_target, + // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); + OMPRTL__tgt_target_teams, // Call to void __tgt_register_lib(__tgt_bin_desc *desc); OMPRTL__tgt_register_lib, // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); @@ -1153,6 +1221,24 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); break; } + case OMPRTL__tgt_target_teams: { + // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int32Ty->getPointerTo(), + CGM.Int32Ty, + CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); + break; + } case OMPRTL__tgt_register_lib: { // Build void __tgt_register_lib(__tgt_bin_desc *desc); QualType ParamTy = @@ -3972,6 +4058,102 @@ void CGOpenMPRuntime::emitTargetOutlinedFunction( DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID); } +/// \brief Emit the num_teams clause of an enclosed teams directive at the +/// target region scope. If there is no teams directive associated with the +/// target directive, or if there is no num_teams clause associated with the +/// enclosed teams directive, return nullptr. +static llvm::Value * +emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, + CodeGenFunction &CGF, + const OMPExecutableDirective &D) { + + assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " + "teams directive expected to be " + "emitted only for the host!"); + + // FIXME: For the moment we do not support combined directives with target and + // teams, so we do not expect to get any num_teams clause in the provided + // directive. Once we support that, this assertion can be replaced by the + // actual emission of the clause expression. + assert(D.getSingleClause() == nullptr && + "Not expecting clause in directive."); + + // If the current target region has a teams region enclosed, we need to get + // the number of teams to pass to the runtime function call. This is done + // by generating the expression in a inlined region. This is required because + // the expression is captured in the enclosing target environment when the + // teams directive is not combined with target. + + const CapturedStmt &CS = *cast(D.getAssociatedStmt()); + + // FIXME: Accommodate other combined directives with teams when they become + // available. + if (auto *TeamsDir = dyn_cast(CS.getCapturedStmt())) { + if (auto *NTE = TeamsDir->getSingleClause()) { + CGOpenMPInnerExprInfo CGInfo(CGF, CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); + return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty, + /*IsSigned=*/true); + } + + // If we have an enclosed teams directive but no num_teams clause we use + // the default value 0. + return CGF.Builder.getInt32(0); + } + + // No teams associated with the directive. + return nullptr; +} + +/// \brief Emit the thread_limit clause of an enclosed teams directive at the +/// target region scope. If there is no teams directive associated with the +/// target directive, or if there is no thread_limit clause associated with the +/// enclosed teams directive, return nullptr. +static llvm::Value * +emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, + CodeGenFunction &CGF, + const OMPExecutableDirective &D) { + + assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " + "teams directive expected to be " + "emitted only for the host!"); + + // FIXME: For the moment we do not support combined directives with target and + // teams, so we do not expect to get any thread_limit clause in the provided + // directive. Once we support that, this assertion can be replaced by the + // actual emission of the clause expression. + assert(D.getSingleClause() == nullptr && + "Not expecting clause in directive."); + + // If the current target region has a teams region enclosed, we need to get + // the thread limit to pass to the runtime function call. This is done + // by generating the expression in a inlined region. This is required because + // the expression is captured in the enclosing target environment when the + // teams directive is not combined with target. + + const CapturedStmt &CS = *cast(D.getAssociatedStmt()); + + // FIXME: Accommodate other combined directives with teams when they become + // available. + if (auto *TeamsDir = dyn_cast(CS.getCapturedStmt())) { + if (auto *TLE = TeamsDir->getSingleClause()) { + CGOpenMPInnerExprInfo CGInfo(CGF, CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); + return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, + /*IsSigned=*/true); + } + + // If we have an enclosed teams directive but no thread_limit clause we use + // the default value 0. + return CGF.Builder.getInt32(0); + } + + // No teams associated with the directive. + return nullptr; +} + void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, @@ -4100,7 +4282,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // Fill up the pointer arrays and transfer execution to the device. auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, hasVLACaptures, Device, OutlinedFnID, OffloadError, - OffloadErrorQType](CodeGenFunction &CGF) { + OffloadErrorQType, &D](CodeGenFunction &CGF) { unsigned PointerNumVal = BasePointers.size(); llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); llvm::Value *BasePointersArray; @@ -4240,11 +4422,34 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, else DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); - llvm::Value *OffloadingArgs[] = { - DeviceID, OutlinedFnID, PointerNum, BasePointersArray, - PointersArray, SizesArray, MapTypesArray}; - auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), - OffloadingArgs); + // Return value of the runtime offloading call. + llvm::Value *Return; + + auto *NumTeams = emitNumTeamsClauseForTargetDirective(*this, CGF, D); + auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(*this, CGF, D); + + // If we have NumTeams defined this means that we have an enclosed teams + // region. Therefore we also expect to have ThreadLimit defined. These two + // values should be defined in the presence of a teams directive, regardless + // of having any clauses associated. If the user is using teams but no + // clauses, these two values will be the default that should be passed to + // the runtime library - a 32-bit integer with the value zero. + if (NumTeams) { + assert(ThreadLimit && "Thread limit expression should be available along " + "with number of teams."); + llvm::Value *OffloadingArgs[] = { + DeviceID, OutlinedFnID, PointerNum, + BasePointersArray, PointersArray, SizesArray, + MapTypesArray, NumTeams, ThreadLimit}; + Return = CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); + } else { + llvm::Value *OffloadingArgs[] = { + DeviceID, OutlinedFnID, PointerNum, BasePointersArray, + PointersArray, SizesArray, MapTypesArray}; + Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), + OffloadingArgs); + } CGF.EmitStoreOfScalar(Return, OffloadError); }; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index ddceb1b..7f4e195 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2716,8 +2716,12 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { CapturedVars); } -void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { - llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); +void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { + OMPLexicalScope Scope(*this, S); + const CapturedStmt &CS = *cast(S.getAssociatedStmt()); + + // FIXME: We should fork teams here instead of just emit the statement. + EmitStmt(CS.getCapturedStmt()); } void CodeGenFunction::EmitOMPCancellationPointDirective( diff --git a/clang/test/OpenMP/teams_codegen.cpp b/clang/test/OpenMP/teams_codegen.cpp new file mode 100644 index 0000000..4f244a9 --- /dev/null +++ b/clang/test/OpenMP/teams_codegen.cpp @@ -0,0 +1,210 @@ +// expected-no-diagnostics +#ifndef HEADER +#define HEADER +// Test host codegen. +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +#ifdef CK1 + +int Gbla; +long long Gblb; +int &Gblc = Gbla; + +// CK1-LABEL: teams_argument_global_local +int teams_argument_global_local(int a){ + int comp = 1; + + int la = 23; + float lc = 25.0; + + // CK1: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK1: call void @{{.+}}(i{{64|32}} %{{.+}}) + #pragma omp target + #pragma omp teams + { + ++comp; + } + + // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0) + // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]], + + // CK1: call void @{{.+}}(i{{64|32}} %{{.+}}) + #pragma omp target + #pragma omp teams num_teams(la) + { + ++comp; + } + + // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]]) + // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]], + + // CK1: call void @{{.+}}(i{{64|32}} %{{.+}}) + #pragma omp target + #pragma omp teams thread_limit(la) + { + ++comp; + } + + // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) + + // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]] + // CK1-DAG: [[NTA]] = load i32, i32* @Gbla, + // CK1-DAG: [[NTB]] = load i32, i32* %{{.+}}, + + // CK1-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32 + // CK1-DAG: [[TLA]] = add nsw i64 [[TLB:%[^,]+]], [[TLC:%[^,]+]] + // CK1-DAG: [[TLC]] = fptosi float [[TLD:%[^,]+]] to i64 + // CK1-DAG: [[TLD]] = load float, float* %{{.+}}, + // CK1-DAG: [[TLB]] = load i64, i64* @Gblb, + + // CK1: call void @{{.+}}(i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}, i{{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams(Gbla+a) thread_limit(Gblb+(long long)lc) + { + ++comp; + } + + // CK1-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 {{.+}}, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) + + // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], 1 + // CK1-DAG: [[NTA]] = load i32, i32* @Gbla, + + // CK1-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 2 + // CK1-DAG: [[TLA]] = load i32, i32* @Gbla, + + // CK1: call void @{{.+}}(i{{.+}} {{.+}} + #pragma omp target + #pragma omp teams num_teams(Gblc+1) thread_limit(Gblc+2) + { + comp += Gblc; + } + + return comp; +} + +#endif // CK1 + +// Test host codegen. +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +#ifdef CK2 + +// CK2-DAG: [[SSI:%.+]] = type { i32, float } +// CK2-DAG: [[SSL:%.+]] = type { i64, float } +template +struct SS{ + T a; + float b; +}; + +SS Gbla; +SS Gblb; + +// CK2-LABEL: teams_template_arg +int teams_template_arg(void) { + int comp = 1; + + SS la; + SS lb; + + // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) + + // CK2-DAG: [[NT]] = load i32, i32* getelementptr inbounds ([[SSI]], [[SSI]]* @Gbla, i32 0, i32 0) + + // CK2-DAG: [[TL]] = trunc i64 [[TLA:%[^,]+]] to i32 + // CK2-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i64 + // CK2-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]], + // CK2-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* %{{.+}}, i32 0, i32 1 + + // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams(Gbla.a) thread_limit((long long)la.b) + { + ++comp; + } + + // CK2-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]]) + + // CK2-DAG: [[TL]] = trunc i64 [[TLD:%[^,]+]] to i32 + // CK2-DAG: [[TLD]] = load i64, i64* getelementptr inbounds ([[SSL]], [[SSL]]* @Gblb, i32 0, i32 0), + + // CK2-DAG: [[NT]] = trunc i64 [[NTA:%[^,]+]] to i32 + // CK2-DAG: [[NTA]] = fptosi float [[NTB:%[^,]+]] to i64 + // CK2-DAG: [[NTB]] = load float, float* [[NTC:%[^,]+]], + // CK2-DAG: [[NTC]] = getelementptr inbounds [[SSL]], [[SSL]]* %{{.+}}, i32 0, i32 1 + + // CK2: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}, {{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams((long long)lb.b) thread_limit(Gblb.a) + { + ++comp; + } + return comp; +} +#endif // CK2 + +// Test host codegen. +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -omptargets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -triple i386-unknown-unknown -omptargets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +#ifdef CK3 + +// CK3: [[SSI:%.+]] = type { i32, float } +// CK3-LABEL: teams_template_struct + +template +struct SS{ + T a; + float b; + + int foo(void) { + int comp = 1; + + // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 123) + + // CK3-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]], + // CK3-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0 + // CK3-DAG: [[NTB]] = load [[SSI]]*, [[SSI]]** %{{.+}}, + + // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams(a) thread_limit(X) + { + ++comp; + } + + // CK3-DAG: call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 456, i32 [[TL:%[^,]+]]) + + // CK3-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 123 + // CK3-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i32 + // CK3-DAG: [[TLB]] = load float, float* [[TLC:%[^,]+]], + // CK3-DAG: [[TLC]] = getelementptr inbounds [[SSI]], [[SSI]]* [[THIS:%[^,]+]], i32 0, i32 1 + + // CK3: call void @{{.+}}({{.+}} {{.+}}, {{.+}} {{.+}}) + #pragma omp target + #pragma omp teams num_teams(Y) thread_limit((int)b+X) + { + ++comp; + } + return comp; + } +}; + +int teams_template_struct(void) { + SS V; + return V.foo(); + +} +#endif // CK3 +#endif