From 79712097c779f399fccd7fe739d5595829fd19e8 Mon Sep 17 00:00:00 2001 From: Carlo Bertolli Date: Wed, 28 Feb 2018 20:48:35 +0000 Subject: [PATCH] [OpenMP] Extend NVPTX SPMD implementation of combined constructs Differential Revision: https://reviews.llvm.org/D43852 This patch extends the SPMD implementation to all target constructs and guards this implementation under a new flag. llvm-svn: 326368 --- clang/include/clang/Basic/LangOptions.def | 1 + clang/include/clang/Driver/Options.td | 2 + clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 33 ++---- clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 + clang/lib/Driver/ToolChains/Clang.cpp | 5 + clang/lib/Frontend/CompilerInvocation.cpp | 4 + .../test/OpenMP/nvptx_target_parallel_codegen.cpp | 14 +-- .../nvptx_target_parallel_num_threads_codegen.cpp | 14 +-- .../nvptx_target_parallel_proc_bind_codegen.cpp | 16 +-- .../nvptx_target_parallel_reduction_codegen.cpp | 10 +- clang/test/OpenMP/nvptx_target_simd_codegen.cpp | 74 +++++++++++++ ...arget_teams_distribute_parallel_for_codegen.cpp | 123 +++++++++++++++++++++ ..._teams_distribute_parallel_for_simd_codegen.cpp | 123 +++++++++++++++++++++ .../nvptx_target_teams_distribute_simd_codegen.cpp | 99 +++++++++++++++++ .../test/OpenMP/target_parallel_debug_codegen.cpp | 4 +- .../OpenMP/target_parallel_for_debug_codegen.cpp | 4 +- 16 files changed, 476 insertions(+), 52 deletions(-) create mode 100644 clang/test/OpenMP/nvptx_target_simd_codegen.cpp create mode 100644 clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp create mode 100644 clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp create mode 100644 clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index f2a0920..68edee2 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -197,6 +197,7 @@ LANGOPT(OpenMP , 32, 0, "OpenMP support and version of OpenMP (31, 40 LANGOPT(OpenMPSimd , 1, 0, "Use SIMD only OpenMP support.") LANGOPT(OpenMPUseTLS , 1, 0, "Use TLS for threadprivates or runtime calls") LANGOPT(OpenMPIsDevice , 1, 0, "Generate code only for OpenMP target device") +LANGOPT(OpenMPCUDAMode , 1, 0, "Generate code for OpenMP pragmas in SIMT/SPMD mode") LANGOPT(RenderScript , 1, 0, "RenderScript") LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 464956f..3aadfcb 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1424,6 +1424,8 @@ def fnoopenmp_relocatable_target : Flag<["-"], "fnoopenmp-relocatable-target">, def fopenmp_simd : Flag<["-"], "fopenmp-simd">, Group, Flags<[CC1Option, NoArgumentUnused]>, HelpText<"Emit OpenMP code only for SIMD-based constructs.">; def fno_openmp_simd : Flag<["-"], "fno-openmp-simd">, Group, Flags<[CC1Option, NoArgumentUnused]>; +def fopenmp_cuda_mode : Flag<["-"], "fopenmp-cuda-mode">, Group, Flags<[CC1Option, NoArgumentUnused]>; +def fno_openmp_cuda_mode : Flag<["-"], "fno-openmp-cuda-mode">, Group, Flags<[CC1Option, NoArgumentUnused]>; def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group; def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group; def force__cpusubtype__ALL : Flag<["-"], "force_cpusubtype_ALL">; diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 674e843..964dc5e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -271,21 +271,10 @@ bool CGOpenMPRuntimeNVPTX::isInSpmdExecutionMode() const { } static CGOpenMPRuntimeNVPTX::ExecutionMode -getExecutionModeForDirective(CodeGenModule &CGM, - const OMPExecutableDirective &D) { - OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); - switch (DirectiveKind) { - case OMPD_target: - case OMPD_target_teams: - return CGOpenMPRuntimeNVPTX::ExecutionMode::Generic; - case OMPD_target_parallel: - case OMPD_target_parallel_for: - case OMPD_target_parallel_for_simd: - return CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd; - default: - llvm_unreachable("Unsupported directive on NVPTX device."); - } - llvm_unreachable("Unsupported directive on NVPTX device."); +getExecutionMode(CodeGenModule &CGM) { + return CGM.getLangOpts().OpenMPCUDAMode + ? CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd + : CGOpenMPRuntimeNVPTX::ExecutionMode::Generic; } void CGOpenMPRuntimeNVPTX::emitGenericKernel(const OMPExecutableDirective &D, @@ -819,8 +808,7 @@ void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction( assert(!ParentName.empty() && "Invalid target region parent name!"); - CGOpenMPRuntimeNVPTX::ExecutionMode Mode = - getExecutionModeForDirective(CGM, D); + CGOpenMPRuntimeNVPTX::ExecutionMode Mode = getExecutionMode(CGM); switch (Mode) { case CGOpenMPRuntimeNVPTX::ExecutionMode::Generic: emitGenericKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, @@ -1051,10 +1039,13 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall( // TODO: Do something with IfCond when support for the 'if' clause // is added on Spmd target directives. llvm::SmallVector OutlinedFnArgs; - OutlinedFnArgs.push_back( - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); - OutlinedFnArgs.push_back( - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo())); + + Address ZeroAddr = CGF.CreateMemTemp( + CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), + ".zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.push_back(ZeroAddr.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); } diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index bf9a257..dec3720 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -4260,6 +4260,7 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( static void emitTargetTeamsDistributeParallelForRegion( CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, PrePostActionTy &Action) { + Action.Enter(CGF); auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, S.getDistInc()); @@ -4310,6 +4311,7 @@ static void emitTargetTeamsDistributeParallelForSimdRegion( CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForSimdDirective &S, PrePostActionTy &Action) { + Action.Enter(CGF); auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined, S.getDistInc()); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 3683121..4888c8a 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3970,6 +3970,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fnoopenmp_use_tls, /*Default=*/true)) CmdArgs.push_back("-fnoopenmp-use-tls"); Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_version_EQ); + + // When in OpenMP offloading mode with NVPTX target, forward + // cuda-mode flag + Args.AddLastArg(CmdArgs, options::OPT_fopenmp_cuda_mode, + options::OPT_fno_openmp_cuda_mode); break; default: // By default, if Clang doesn't know how to generate useful OpenMP code diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index a217d35..7e060bb 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2526,6 +2526,10 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK, << Opts.OMPHostIRFile; } + // set CUDA mode for OpenMP target NVPTX if specified in options + Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && T.isNVPTX() && + Args.hasArg(options::OPT_fopenmp_cuda_mode); + // Record whether the __DEPRECATED define was requested. Opts.Deprecated = Args.hasFlag(OPT_fdeprecated_macro, OPT_fno_deprecated_macro, diff --git a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp index 7d16624..64d195c 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp @@ -1,9 +1,9 @@ // Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -62,7 +62,7 @@ int bar(int n){ // CHECK: br label {{%?}}[[EXEC:.+]] // // CHECK: [[EXEC]] - // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null, i16* [[AA]]) + // CHECK: {{call|invoke}} void [[OP1:@.+]]({{.+}}, {{.+}}, i16* [[AA]]) // CHECK: br label {{%?}}[[DONE:.+]] // // CHECK: [[DONE]] @@ -104,7 +104,7 @@ int bar(int n){ // CHECK: br label {{%?}}[[EXEC:.+]] // // CHECK: [[EXEC]] - // CHECK: {{call|invoke}} void [[OP2:@.+]](i32* null, i32* null, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]]) + // CHECK: {{call|invoke}} void [[OP2:@.+]]({{.+}}, {{.+}}, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]]) // CHECK: br label {{%?}}[[DONE:.+]] // // CHECK: [[DONE]] diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp index bc423c1..73d3bf8 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp @@ -1,9 +1,9 @@ // Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -51,7 +51,7 @@ int bar(int n){ // // CHECK: [[EXEC]] // CHECK-NOT: call void @__kmpc_push_num_threads - // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null, i16* [[AA]]) + // CHECK: {{call|invoke}} void [[OP1:@.+]]({{.+}}, {{.+}}, i16* [[AA]]) // CHECK: br label {{%?}}[[DONE:.+]] // // CHECK: [[DONE]] @@ -94,7 +94,7 @@ int bar(int n){ // // CHECK: [[EXEC]] // CHECK-NOT: call void @__kmpc_push_num_threads - // CHECK: {{call|invoke}} void [[OP2:@.+]](i32* null, i32* null, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]]) + // CHECK: {{call|invoke}} void [[OP2:@.+]]({{.+}}, {{.+}}, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]]) // CHECK: br label {{%?}}[[DONE:.+]] // // CHECK: [[DONE]] diff --git a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp index 91c6de1..eb166b7 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp @@ -1,9 +1,9 @@ // Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -52,7 +52,7 @@ int bar(int n){ // // CHECK: [[EXEC]] // CHECK-NOT: call void @__kmpc_push_proc_bind - // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null + // CHECK: {{call|invoke}} void [[OP1:@.+]]( // CHECK: br label {{%?}}[[DONE:.+]] // // CHECK: [[DONE]] @@ -73,7 +73,7 @@ int bar(int n){ // // CHECK: [[EXEC]] // CHECK-NOT: call void @__kmpc_push_proc_bind - // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null + // CHECK: {{call|invoke}} void [[OP1:@.+]]( // CHECK: br label {{%?}}[[DONE:.+]] // // CHECK: [[DONE]] @@ -93,7 +93,7 @@ int bar(int n){ // // CHECK: [[EXEC]] // CHECK-NOT: call void @__kmpc_push_proc_bind - // CHECK: {{call|invoke}} void [[OP1:@.+]](i32* null, i32* null + // CHECK: {{call|invoke}} void [[OP1:@.+]]( // CHECK: br label {{%?}}[[DONE:.+]] // // CHECK: [[DONE]] diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp index d636240..b12801e 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp @@ -1,9 +1,9 @@ // Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 -// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 // expected-no-diagnostics #ifndef HEADER #define HEADER diff --git a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp new file mode 100644 index 0000000..9bb7617 --- /dev/null +++ b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp @@ -0,0 +1,74 @@ +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode. +// CHECK-DAG: {{@__omp_offloading_.+l24}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l29}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l34}}_exec_mode = weak constant i8 0 + +#define N 1000 + +template +tx ftemplate(int n) { + tx a[N]; + short aa[N]; + tx b[10]; + + #pragma omp target simd + for(int i = 0; i < n; i++) { + a[i] = 1; + } + + #pragma omp target simd + for(int i = 0; i < n; i++) { + aa[i] += 1; + } + + #pragma omp target simd + for(int i = 0; i < 10; i++) { + b[i] += 1; + } + + return a[0]; +} + +int bar(int n){ + int a = 0; + + a += ftemplate(n); + + return a; +} + +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l24}}( +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK-NOT: call void @__kmpc_for_static_init +// CHECK-NOT: call void @__kmpc_for_static_fini +// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: ret void + +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l29}}( +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK-NOT: call void @__kmpc_for_static_init +// CHECK-NOT: call void @__kmpc_for_static_fini +// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: ret void + +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+l34}}( +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK-NOT: call void @__kmpc_for_static_init +// CHECK-NOT: call void @__kmpc_for_static_fini +// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: ret void + +#endif diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp new file mode 100644 index 0000000..fc3f253 --- /dev/null +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -0,0 +1,123 @@ +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode. +// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0 + +#define N 1000 +#define M 10 + +template +tx ftemplate(int n) { + tx a[N]; + short aa[N]; + tx b[10]; + tx c[M][M]; + tx f = n; + tx l; + int k; + +#pragma omp target teams distribute parallel for lastprivate(l) dist_schedule(static,128) schedule(static,32) + for(int i = 0; i < n; i++) { + a[i] = 1; + l = i; + } + + #pragma omp target teams distribute parallel for map(tofrom: aa) num_teams(M) thread_limit(64) + for(int i = 0; i < n; i++) { + aa[i] += 1; + } + +#pragma omp target teams distribute parallel for map(tofrom:a, aa, b) if(target: n>40) proc_bind(spread) + for(int i = 0; i < 10; i++) { + b[i] += 1; + } + +#pragma omp target teams distribute parallel for collapse(2) firstprivate(f) private(k) num_threads(M) + for(int i = 0; i < M; i++) { + for(int j = 0; j < M; j++) { + k = M; + c[i][j] = i+j*f+k; + } + } + + return a[0]; +} + +int bar(int n){ + int a = 0; + + a += ftemplate(n); + + return a; +} + +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, +// CHECK: {{call|invoke}} void [[OUTL1:@.+]]( +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: ret void + +// CHECK: define internal void [[OUTL1]]( +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, +// CHECK: {{call|invoke}} void [[OUTL2:@.+]]( +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: ret void + +// CHECK: define internal void [[OUTL2]]( +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, +// CHECK: {{call|invoke}} void [[OUTL3:@.+]]( +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: ret void + +// CHECK: define internal void [[OUTL3]]( +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]]) +// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}}, +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], +// CHECK: {{call|invoke}} void [[OUTL4:@.+]]( +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: ret void + +// CHECK: define internal void [[OUTL4]]( +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +#endif diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp new file mode 100644 index 0000000..c508bc9 --- /dev/null +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -0,0 +1,123 @@ +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode. +// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0 + +#define N 1000 +#define M 10 + +template +tx ftemplate(int n) { + tx a[N]; + short aa[N]; + tx b[10]; + tx c[M][M]; + tx f = n; + tx l; + int k; + +#pragma omp target teams distribute parallel for simd lastprivate(l) dist_schedule(static,128) schedule(static,32) + for(int i = 0; i < n; i++) { + a[i] = 1; + l = i; + } + + #pragma omp target teams distribute parallel for simd map(tofrom: aa) num_teams(M) thread_limit(64) + for(int i = 0; i < n; i++) { + aa[i] += 1; + } + +#pragma omp target teams distribute parallel for simd map(tofrom:a, aa, b) if(target: n>40) proc_bind(spread) + for(int i = 0; i < 10; i++) { + b[i] += 1; + } + +#pragma omp target teams distribute parallel for simd collapse(2) firstprivate(f) private(k) num_threads(M) + for(int i = 0; i < M; i++) { + for(int j = 0; j < M; j++) { + k = M; + c[i][j] = i+j*f+k; + } + } + + return a[0]; +} + +int bar(int n){ + int a = 0; + + a += ftemplate(n); + + return a; +} + +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, +// CHECK: {{call|invoke}} void [[OUTL1:@.+]]( +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: ret void + +// CHECK: define internal void [[OUTL1]]( +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 33, +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, +// CHECK: {{call|invoke}} void [[OUTL2:@.+]]( +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: ret void + +// CHECK: define internal void [[OUTL2]]( +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, +// CHECK: {{call|invoke}} void [[OUTL3:@.+]]( +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: ret void + +// CHECK: define internal void [[OUTL3]]( +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]]) +// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}}, +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], +// CHECK: {{call|invoke}} void [[OUTL4:@.+]]( +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: ret void + +// CHECK: define internal void [[OUTL4]]( +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 34, +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: ret void + +#endif diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp new file mode 100644 index 0000000..a78a01a --- /dev/null +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp @@ -0,0 +1,99 @@ +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode. +// CHECK-DAG: {{@__omp_offloading_.+l30}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l36}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l41}}_exec_mode = weak constant i8 0 +// CHECK-DAG: {{@__omp_offloading_.+l46}}_exec_mode = weak constant i8 0 + +#define N 1000 +#define M 10 + +template +tx ftemplate(int n) { + tx a[N]; + short aa[N]; + tx b[10]; + tx c[M][M]; + tx f = n; + tx l; + int k; + +#pragma omp target teams distribute simd lastprivate(l) dist_schedule(static,128) + for(int i = 0; i < n; i++) { + a[i] = 1; + l = i; + } + + #pragma omp target teams distribute simd map(tofrom: aa) num_teams(M) thread_limit(64) + for(int i = 0; i < n; i++) { + aa[i] += 1; + } + +#pragma omp target teams distribute simd map(tofrom:a, aa, b) if(target: n>40) + for(int i = 0; i < 10; i++) { + b[i] += 1; + } + +#pragma omp target teams distribute simd collapse(2) firstprivate(f) private(k) + for(int i = 0; i < M; i++) { + for(int j = 0; j < M; j++) { + k = M; + c[i][j] = i+j*f+k; + } + } + + return a[0]; +} + +int bar(int n){ + int a = 0; + + a += ftemplate(n); + + return a; +} + +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: ret void + +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: ret void + +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+}}( +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: ret void + +// CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]]) +// CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}}, +// CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], +// CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align +// CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 92, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]], +// CHECK: call void @__kmpc_for_static_fini( +// CHECK: call void @__kmpc_spmd_kernel_deinit() +// CHECK: ret void + +#endif diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp index 4355abf..0ecd6a9 100644 --- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s // expected-no-diagnostics int main() { diff --git a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp index 9b119f2..0102179 100644 --- a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s // expected-no-diagnostics int main() { -- 2.7.4