From c3d3c22a5834ac8afe9618ef9eed5dac370c080d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 2 Apr 2020 11:52:23 -0400 Subject: [PATCH] AMDGPU: Hack out noinline on functions using LDS globals This is a workaround for clang adding noinline to all functions at -O0. Previously, we would just add alwaysinline, and the verifier would complain about having both noinline and alwaysinline. We currently can't truly codegen this case as a freestanding function, so override the user forcing noinline. --- llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp | 7 +++++++ .../AMDGPU/force-alwaysinline-lds-global-address.ll | 17 +++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp index ff2bda6..2294754 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp @@ -71,6 +71,13 @@ void AMDGPUAlwaysInline::recursivelyVisitUsers( if (Instruction *I = dyn_cast(U)) { Function *F = I->getParent()->getParent(); if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) { + // FIXME: This is a horrible hack. We should always respect noinline, + // and just let us hit the error when we can't handle this. + // + // Unfortunately, clang adds noinline to all functions at -O0. We have + // to override this here. until that's fixed. + F->removeFnAttr(Attribute::NoInline); + FuncsToAlwaysInline.insert(F); Stack.push_back(F); } diff --git a/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll b/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll index f525ca5..8ab59dc 100644 --- a/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll +++ b/llvm/test/CodeGen/AMDGPU/force-alwaysinline-lds-global-address.ll @@ -74,4 +74,21 @@ define i32 @recursive_call_lds(i32 %arg0) { ret i32 %call } +; Test we don't break the IR and have both alwaysinline and noinline +; FIXME: We should really not override noinline. + +; ALL-LABEL: define i32 @load_lds_simple_noinline() #0 { +define i32 @load_lds_simple_noinline() noinline { + %load = load i32, i32 addrspace(3)* @lds0, align 4 + ret i32 %load +} + +; ALL-LABEL: define i32 @recursive_call_lds_noinline(i32 %arg0) #0 { +define i32 @recursive_call_lds_noinline(i32 %arg0) noinline { + %load = load i32, i32 addrspace(3)* @lds0, align 4 + %add = add i32 %arg0, %load + %call = call i32 @recursive_call_lds(i32 %add) + ret i32 %call +} + ; ALL: attributes #0 = { alwaysinline } -- 2.7.4