From 8be5a0fe12bb9114bb82986b1dcb9205699aa085 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 31 Dec 2019 12:41:57 -0500 Subject: [PATCH] [OPENMP]Emit artificial threprivate vars as threadlocal, if possible. It may improve performance for declare reduction constructs. --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 9 +++++++-- .../OpenMP/master_taskloop_reduction_codegen.cpp | 4 ++-- .../master_taskloop_simd_reduction_codegen.cpp | 4 ++-- .../parallel_master_taskloop_reduction_codegen.cpp | 4 ++-- ...llel_master_taskloop_simd_reduction_codegen.cpp | 4 ++-- clang/test/OpenMP/taskloop_reduction_codegen.cpp | 23 +++++++++------------- .../OpenMP/taskloop_simd_reduction_codegen.cpp | 4 ++-- 7 files changed, 26 insertions(+), 26 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 00f8fb5..59f352d 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3043,10 +3043,15 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) { std::string Suffix = getName({"artificial", ""}); - std::string CacheSuffix = getName({"cache", ""}); llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); + if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && + CGM.getTarget().isTLSSupported()) { + cast(GAddr)->setThreadLocal(/*Val=*/true); + return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); + } + std::string CacheSuffix = getName({"cache", ""}); llvm::Value *Args[] = { emitUpdateLocation(CGF, SourceLocation()), getThreadID(CGF, SourceLocation()), @@ -3060,7 +3065,7 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, CGF.EmitRuntimeCall( createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args), VarLVType->getPointerTo(/*AddrSpace=*/0)), - CGM.getPointerAlign()); + CGM.getContext().getTypeAlignInChars(VarType)); } void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, diff --git a/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp index 59ac23f..70b8334 100644 --- a/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp +++ b/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -fnoopenmp-use-tls -std=c++98 | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -fnoopenmp-use-tls -std=c++98 | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics diff --git a/clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp index ca0c0fe..1b88310 100644 --- a/clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 -fnoopenmp-use-tls | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 -fnoopenmp-use-tls | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics diff --git a/clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp index 7a0baa0..dc157ab 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 -fnoopenmp-use-tls | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 -fnoopenmp-use-tls | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp index a2c85fd..9c57e11 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 -fnoopenmp-use-tls | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 -fnoopenmp-use-tls | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics diff --git a/clang/test/OpenMP/taskloop_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_reduction_codegen.cpp index 16bafb3..fec3e62 100644 --- a/clang/test/OpenMP/taskloop_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskloop_reduction_codegen.cpp @@ -4,6 +4,10 @@ // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics +// CHECK: [[RED_SIZE1:@reduction_size[.].+]] = common thread_local global i64 0 +// CHECK: [[RED1:@reduction[.].+]] = common thread_local global i8* null +// CHECK: [[RED_SIZE2:@reduction_size[.].+]] = common thread_local global i64 0 + struct S { float a; S() : a(0.0f) {} @@ -163,10 +167,7 @@ sum = 0.0; // CHECK: ret void // CHECK: define internal void @[[RED_INIT2]](i8* %0) -// CHECK: call i8* @__kmpc_threadprivate_cached( -// CHECK: [[ORIG_PTR_ADDR:%.+]] = call i8* @__kmpc_threadprivate_cached( -// CHECK: [[ORIG_PTR_REF:%.+]] = bitcast i8* [[ORIG_PTR_ADDR]] to i8** -// CHECK: load i8*, i8** [[ORIG_PTR_REF]], +// CHECK: load i8*, i8** [[RED1]], // CHECK: call void [[OMP_INIT1:@.+]]( // CHECK: ret void @@ -177,12 +178,12 @@ sum = 0.0; // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64( // CHECK: define internal void @[[RED_FINI2]](i8* %0) -// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: load i64, i64* [[RED_SIZE1]] // CHECK: call void @ // CHECK: ret void // CHECK: define internal void @[[RED_COMB2]](i8* %0, i8* %1) -// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: load i64, i64* [[RED_SIZE1]] // CHECK: call void [[OMP_COMB1]]( // CHECK: ret void @@ -196,26 +197,20 @@ sum = 0.0; // CHECK: ret void // CHECK: define internal void @[[RED_INIT4]](i8* %0) -// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: load i64, i64* [[RED_SIZE2]] // CHECK: store float 0.000000e+00, float* % // CHECK: ret void // CHECK: define internal void @[[RED_COMB4]](i8* %0, i8* %1) -// CHECK: call i8* @__kmpc_threadprivate_cached( +// CHECK: load i64, i64* [[RED_SIZE2]] // CHECK: fadd float % // CHECK: store float %{{.+}}, float* % // CHECK: ret void -// CHECK-NOT: call i8* @__kmpc_threadprivate_cached( // CHECK: call i8* @__kmpc_task_reduction_get_th_data( -// CHECK: call i8* @__kmpc_threadprivate_cached( -// CHECK: call i8* @__kmpc_threadprivate_cached( // CHECK: call i8* @__kmpc_task_reduction_get_th_data( -// CHECK-NOT: call i8* @__kmpc_threadprivate_cached( // CHECK: call i8* @__kmpc_task_reduction_get_th_data( -// CHECK: call i8* @__kmpc_threadprivate_cached( // CHECK: call i8* @__kmpc_task_reduction_get_th_data( -// CHECK-NOT: call i8* @__kmpc_threadprivate_cached( // CHECK-DAG: distinct !DISubprogram(linkageName: "[[TASK]]", scope: ! // CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT1]]" diff --git a/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp index ba87c36..4cb8d5f 100644 --- a/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp @@ -1,6 +1,6 @@ -// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -fnoopenmp-use-tls -std=c++98 | FileCheck %s -// RUN: %clang_cc1 -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -fnoopenmp-use-tls -std=c++98 | FileCheck --check-prefix SIMD-ONLY0 %s // SIMD-ONLY0-NOT: {{__kmpc|__tgt}} // expected-no-diagnostics -- 2.7.4