From 4860f6cb25bd20b0fc2f20403602526df650d7ed Mon Sep 17 00:00:00 2001 From: Igor Kirillov Date: Wed, 27 Oct 2021 16:21:54 +0100 Subject: [PATCH] [OpenMP] Fix: opposite attributes could be set by -fno-inline After the changes introduced by D106799 it is possible to tag outlined function with both AlwaysInline and NoInline attributes using -fno-inline command line options. This issue is similiar to D107649. Differential Revision: https://reviews.llvm.org/D112645 --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 6 +-- clang/test/OpenMP/parallel_if_codegen_PR51349.cpp | 53 +++++++++++++++++++++++ 2 files changed, 56 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 3dbebc5..2cf0193 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -518,8 +518,10 @@ static llvm::Function *emitOutlinedFunctionPrologue( F->setDoesNotRecurse(); // Always inline the outlined function if optimizations are enabled. - if (CGM.getCodeGenOpts().OptimizationLevel != 0) + if (CGM.getCodeGenOpts().OptimizationLevel != 0) { + F->removeFnAttr(llvm::Attribute::NoInline); F->addFnAttr(llvm::Attribute::AlwaysInline); + } // Generate the function. CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, @@ -5364,8 +5366,6 @@ static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, CGF.CapturedStmtInfo = &CapStmtInfo; llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); Fn->setDoesNotRecurse(); - if (CGM.getCodeGenOpts().OptimizationLevel != 0) - Fn->addFnAttr(llvm::Attribute::AlwaysInline); return Fn; } diff --git a/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp b/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp index a00b5ec..9821560 100644 --- a/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp +++ b/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp @@ -1,5 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs // RUN: %clang_cc1 -x c++ -O1 -fopenmp-version=45 -disable-llvm-optzns -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK +// RUN: %clang_cc1 -x c++ -O1 -fopenmp-version=45 -disable-llvm-optzns -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm -fno-inline %s -o - | FileCheck %s --check-prefix=CHECK-NOINLINE // expected-no-diagnostics #ifndef HEADER @@ -8,6 +9,8 @@ void foo() { #pragma omp parallel if(0) ; +#pragma omp parallel + ; } #endif @@ -23,6 +26,7 @@ void foo() { // CHECK-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK-NEXT: ret void // // @@ -36,3 +40,52 @@ void foo() { // CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] // CHECK-NEXT: ret void // +// +// CHECK: Function Attrs: alwaysinline norecurse nounwind +// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1 +// CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NEXT: ret void +// +// +// CHECK-NOINLINE: Function Attrs: mustprogress noinline nounwind +// CHECK-NOINLINE-LABEL: define {{[^@]+}}@_Z3foov +// CHECK-NOINLINE-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NOINLINE-NEXT: entry: +// CHECK-NOINLINE-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK-NOINLINE-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NOINLINE-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-NOINLINE-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +// CHECK-NOINLINE-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA3:![0-9]+]] +// CHECK-NOINLINE-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK-NOINLINE-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] +// CHECK-NOINLINE-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +// CHECK-NOINLINE-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) +// CHECK-NOINLINE-NEXT: ret void +// +// +// CHECK-NOINLINE: Function Attrs: noinline norecurse nounwind +// CHECK-NOINLINE-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK-NOINLINE-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NOINLINE-NEXT: entry: +// CHECK-NOINLINE-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NOINLINE-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NOINLINE-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7:![0-9]+]] +// CHECK-NOINLINE-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NOINLINE-NEXT: ret void +// +// +// CHECK-NOINLINE: Function Attrs: alwaysinline norecurse nounwind +// CHECK-NOINLINE-LABEL: define {{[^@]+}}@.omp_outlined..1 +// CHECK-NOINLINE-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NOINLINE-NEXT: entry: +// CHECK-NOINLINE-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NOINLINE-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NOINLINE-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NOINLINE-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA7]] +// CHECK-NOINLINE-NEXT: ret void +// -- 2.7.4