From 852525de25aa24c93a8193e56096d44230c25e7a Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 2 Mar 2018 17:17:12 +0000 Subject: [PATCH] [OPENMP] Treat local variables in CUDA mode as thread local. In CUDA mode all local variables are actually thread local|threadprivate, not private, and, thus, they cannot be shared between threads|lanes. llvm-svn: 326590 --- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Sema/SemaOpenMP.cpp | 74 +++++++++++--- .../OpenMP/nvptx_target_cuda_mode_messages.cpp | 108 +++++++++++++++++++++ 3 files changed, 168 insertions(+), 16 deletions(-) create mode 100644 clang/test/OpenMP/nvptx_target_cuda_mode_messages.cpp diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d813bab..3dddb02 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1427,7 +1427,7 @@ def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group, Flags<[CC1Option HelpText<"Emit OpenMP code only for SIMD-based constructs.">; def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group, Flags<[CC1Option, NoArgumentUnused]>; def fopenmp_cuda_mode : Flag<["-"], "fopenmp-cuda-mode">, Group, Flags<[CC1Option, NoArgumentUnused]>; -def fno_openmp_cuda_mode : Flag<["-"], "fno-openmp-cuda-mode">, Group, Flags<[CC1Option, NoArgumentUnused]>; +def fno_openmp_cuda_mode : Flag<["-"], "fno-openmp-cuda-mode">, Group, Flags<[NoArgumentUnused]>; def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group; def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group; def fno_escaping_block_tail_calls : Flag<["-"], "fno-escaping-block-tail-calls">, Group, Flags<[CC1Option]>; diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index b8f4f2b..be51787 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -936,10 +936,11 @@ DSAStackTy::getTopMostTaskgroupReductionData(ValueDecl *D, SourceRange &SR, bool DSAStackTy::isOpenMPLocal(VarDecl *D, StackTy::reverse_iterator Iter) { D = D->getCanonicalDecl(); - if (!isStackEmpty() && Stack.back().first.size() > 1) { + if (!isStackEmpty()) { reverse_iterator I = Iter, E = Stack.back().first.rend(); Scope *TopScope = nullptr; - while (I != E && !isParallelOrTaskRegion(I->Directive)) + while (I != E && !isParallelOrTaskRegion(I->Directive) && + !isOpenMPTargetExecutionDirective(I->Directive)) ++I; if (I == E) return false; @@ -956,20 +957,7 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, bool FromParent) { D = getCanonicalDecl(D); DSAVarData DVar; - // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced - // in a Construct, C/C++, predetermined, p.1] - // Variables appearing in threadprivate directives are threadprivate. auto *VD = dyn_cast(D); - if ((VD && VD->getTLSKind() != VarDecl::TLS_None && - !(VD->hasAttr() && - SemaRef.getLangOpts().OpenMPUseTLS && - SemaRef.getASTContext().getTargetInfo().isTLSSupported())) || - (VD && VD->getStorageClass() == SC_Register && - VD->hasAttr() && !VD->isLocalVarDecl())) { - addDSA(D, buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(), - D->getLocation()), - OMPC_threadprivate); - } auto TI = Threadprivates.find(D); if (TI != Threadprivates.end()) { DVar.RefExpr = TI->getSecond().RefExpr.getPointer(); @@ -981,6 +969,62 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(ValueDecl *D, bool FromParent) { VD->getAttr()->getLocation()); DVar.CKind = OMPC_threadprivate; addDSA(D, DVar.RefExpr, OMPC_threadprivate); + return DVar; + } + // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced + // in a Construct, C/C++, predetermined, p.1] + // Variables appearing in threadprivate directives are threadprivate. + if ((VD && VD->getTLSKind() != VarDecl::TLS_None && + !(VD->hasAttr() && + SemaRef.getLangOpts().OpenMPUseTLS && + SemaRef.getASTContext().getTargetInfo().isTLSSupported())) || + (VD && VD->getStorageClass() == SC_Register && + VD->hasAttr() && !VD->isLocalVarDecl())) { + DVar.RefExpr = buildDeclRefExpr( + SemaRef, VD, D->getType().getNonReferenceType(), D->getLocation()); + DVar.CKind = OMPC_threadprivate; + addDSA(D, DVar.RefExpr, OMPC_threadprivate); + return DVar; + } + if (SemaRef.getLangOpts().OpenMPCUDAMode && VD && + VD->isLocalVarDeclOrParm() && !isStackEmpty() && + !isLoopControlVariable(D).first) { + auto IterTarget = + std::find_if(Stack.back().first.rbegin(), Stack.back().first.rend(), + [](const SharingMapTy &Data) { + return isOpenMPTargetExecutionDirective(Data.Directive); + }); + if (IterTarget != Stack.back().first.rend()) { + auto ParentIterTarget = std::next(IterTarget, 1); + auto Iter = Stack.back().first.rbegin(); + while (Iter != ParentIterTarget) { + if (isOpenMPLocal(VD, Iter)) { + DVar.RefExpr = + buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(), + D->getLocation()); + DVar.CKind = OMPC_threadprivate; + return DVar; + } + std::advance(Iter, 1); + } + if (!isClauseParsingMode() || IterTarget != Stack.back().first.rbegin()) { + auto DSAIter = IterTarget->SharingMap.find(D); + if (DSAIter != IterTarget->SharingMap.end() && + isOpenMPPrivate(DSAIter->getSecond().Attributes)) { + DVar.RefExpr = DSAIter->getSecond().RefExpr.getPointer(); + DVar.CKind = OMPC_threadprivate; + return DVar; + } else if (!SemaRef.IsOpenMPCapturedByRef( + D, std::distance(ParentIterTarget, + Stack.back().first.rend()))) { + DVar.RefExpr = + buildDeclRefExpr(SemaRef, VD, D->getType().getNonReferenceType(), + IterTarget->ConstructLoc); + DVar.CKind = OMPC_threadprivate; + return DVar; + } + } + } } if (isStackEmpty()) diff --git a/clang/test/OpenMP/nvptx_target_cuda_mode_messages.cpp b/clang/test/OpenMP/nvptx_target_cuda_mode_messages.cpp new file mode 100644 index 0000000..eecc26c --- /dev/null +++ b/clang/test/OpenMP/nvptx_target_cuda_mode_messages.cpp @@ -0,0 +1,108 @@ +// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-cuda-mode -fopenmp-host-ir-file-path %t-ppc-host.bc -o - + +template +struct TT { + tx X; + ty Y; +}; + +int foo(int n, double *ptr) { + int a = 0; + short aa = 0; + float b[10]; + double c[5][10]; + TT d; + +#pragma omp target firstprivate(a) map(tofrom: b) // expected-note 2 {{defined as threadprivate or thread local}} + { + int c; // expected-note {{defined as threadprivate or thread local}} +#pragma omp parallel shared(a, b, c, aa) // expected-error 3 {{threadprivate or thread local variable cannot be shared}} + b[a] = a; +#pragma omp parallel for + for (int i = 0; i < 10; ++i) // expected-note {{defined as threadprivate or thread local}} +#pragma omp parallel shared(i) // expected-error {{threadprivate or thread local variable cannot be shared}} + ++i; + } + +#pragma omp target map(aa, b, c, d) + { + int e; // expected-note {{defined as threadprivate or thread local}} +#pragma omp parallel private(b, e) // expected-error {{threadprivate or thread local variable cannot be private}} + { + aa += 1; + b[2] = 1.0; + c[1][2] = 1.0; + d.X = 1; + d.Y = 1; + } + } + +#pragma omp target private(ptr) + { + ptr[0]++; + } + + return a; +} + +template +tx ftemplate(int n) { + tx a = 0; + tx b[10]; + +#pragma omp target reduction(+ \ + : a, b) // expected-note {{defined as threadprivate or thread local}} + { + int e; // expected-note {{defined as threadprivate or thread local}} +#pragma omp parallel shared(a, e) // expected-error 2 {{threadprivate or thread local variable cannot be shared}} + a += 1; + b[2] += 1; + } + + return a; +} + +static int fstatic(int n) { + int a = 0; + char aaa = 0; + int b[10]; + +#pragma omp target firstprivate(a, aaa, b) + { + a += 1; + aaa += 1; + b[2] += 1; + } + + return a; +} + +struct S1 { + double a; + + int r1(int n) { + int b = n + 1; + +#pragma omp target firstprivate(b) // expected-note {{defined as threadprivate or thread local}} + { + int c; // expected-note {{defined as threadprivate or thread local}} +#pragma omp parallel shared(b, c) // expected-error 2 {{threadprivate or thread local variable cannot be shared}} + this->a = (double)b + 1.5; + } + + return (int)b; + } +}; + +int bar(int n, double *ptr) { + int a = 0; + a += foo(n, ptr); + S1 S; + a += S.r1(n); + a += fstatic(n); + a += ftemplate(n); // expected-note {{in instantiation of function template specialization 'ftemplate' requested here}} + + return a; +} + -- 2.7.4