From 6bc2732f71e48c2d7a1d6182b6fc2437f533616d Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 5 Oct 2018 15:27:47 +0000 Subject: [PATCH] [OPENMP][NVPTX] Fix emission of __kmpc_global_thread_num() for non-SPMD mode. __kmpc_global_thread_num() should be called before initialization of the runtime. llvm-svn: 343857 --- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 11 +++++++---- clang/test/OpenMP/nvptx_teams_codegen.cpp | 4 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index f0f0a73..b75891e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -1083,12 +1083,15 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D, CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST) : EST(EST), WST(WST) {} void Enter(CodeGenFunction &CGF) override { - static_cast(CGF.CGM.getOpenMPRuntime()) - .emitNonSPMDEntryHeader(CGF, EST, WST); + auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); + RT.emitNonSPMDEntryHeader(CGF, EST, WST); + // Skip target region initialization. + RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true); } void Exit(CodeGenFunction &CGF) override { - static_cast(CGF.CGM.getOpenMPRuntime()) - .emitNonSPMDEntryFooter(CGF, EST); + auto &RT = static_cast(CGF.CGM.getOpenMPRuntime()); + RT.clearLocThreadIdInsertPt(CGF); + RT.emitNonSPMDEntryFooter(CGF, EST); } } Action(EST, WST); CodeGen.setAction(Action); diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp index 2c50f03..4e3f267 100644 --- a/clang/test/OpenMP/nvptx_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -105,7 +105,6 @@ int main (int argc, char **argv) { // CK2: [[AADDR:%.+]] = alloca i{{[0-9]+}}, // CK2: [[BADDR:%.+]] = alloca i{{[0-9]+}}, // CK2: [[ARGCADDR:%.+]] = alloca i{{[0-9]+}}, -// CK2: {{%.+}} = call i32 @__kmpc_global_thread_num( // CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]], // CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]], // CK2: store i{{[0-9]+}} [[ARGC_IN]], i{{[0-9]+}}* [[ARGCADDR]], @@ -117,6 +116,7 @@ int main (int argc, char **argv) { // CK2-32: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ARGCADDR]] // CK2: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CK2: store i{{[0-9]+}} [[ARG]], i{{[0-9]+}}* [[ARGCADDR]], +// CK2: {{%.+}} = call i32 @__kmpc_global_thread_num( // CK2: store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]], // CK2: [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[ARGCADDR_PTR]], // CK2: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[ARGCADDR_PTR_REF]], @@ -129,7 +129,6 @@ int main (int argc, char **argv) { // CK2: [[AADDR:%.+]] = alloca i{{[0-9]+}}, // CK2: [[BADDR:%.+]] = alloca i{{[0-9]+}}, // CK2: [[ARGCADDR:%.+]] = alloca i{{[0-9]+}}**, -// CK2: {{%.+}} = call i32 @__kmpc_global_thread_num( // CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]], // CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]], // CK2: store i{{[0-9]+}}** [[ARGC]], i{{[0-9]+}}*** [[ARGCADDR]], @@ -137,6 +136,7 @@ int main (int argc, char **argv) { // CK2: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]] // CK2: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 // CK2: store i{{[0-9]+}}** [[ARG]], i{{[0-9]+}}*** [[ARGCADDR]], +// CK2: {{%.+}} = call i32 @__kmpc_global_thread_num( // CK2: store i{{[0-9]+}}*** [[ARGCADDR]], i{{[0-9]+}}**** [[ARGCADDR_PTR]], // CK2: [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}***, i{{[0-9]+}}**** [[ARGCADDR_PTR]], // CK2: store i{{[0-9]+}}** null, i{{[0-9]+}}*** [[ARGCADDR_PTR_REF]], -- 2.7.4