From 215cfa01f2ba6f11d0ca0a2da87038aa81244b73 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Juan=20Manuel=20MARTINEZ=20CAAMA=C3=91O?= Date: Fri, 24 Mar 2023 09:28:52 +0100 Subject: [PATCH] [AMDGPU][printf] Run AMDGPUPrintfRuntimeBindingPass in -O0 AMDGPUPrintfRuntimeBindingPass is not run in the IR optimization pipeline with -O0. This means that with OpenCL the printf definition coming from device_libs gets linked with the user's code, which blocks AMDGPUPrintfRuntimeBindingPass from working after the linkage is done. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D146720 --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 ++- llvm/test/CodeGen/AMDGPU/opencl-printf-pipeline.ll | 30 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/opencl-printf-pipeline.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index ecb31df..7255b14 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -687,11 +687,12 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PB.registerPipelineEarlySimplificationEPCallback( [this](ModulePassManager &PM, OptimizationLevel Level) { + PM.addPass(AMDGPUPrintfRuntimeBindingPass()); + if (Level == OptimizationLevel::O0) return; PM.addPass(AMDGPUUnifyMetadataPass()); - PM.addPass(AMDGPUPrintfRuntimeBindingPass()); if (InternalizeSymbols) { PM.addPass(InternalizePass(mustPreserveGV)); diff --git a/llvm/test/CodeGen/AMDGPU/opencl-printf-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opencl-printf-pipeline.ll new file mode 100644 index 0000000..b54976c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/opencl-printf-pipeline.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -mtriple=amdgcn--amdhsa -passes=amdgpu-printf-runtime-binding -S < %s | FileCheck %s +; RUN: opt -mtriple=amdgcn--amdhsa -O0 -S < %s | FileCheck %s + +; Check that the call to printf is removed when AMDGPUPrintfRuntimeBindingPass is executed +; And that this pass is executed in the -O0 and -OX pipelines + +@.str = private unnamed_addr addrspace(4) constant [7 x i8] c"hello\0A\00", align 1 + +define void @foo() { +; CHECK-LABEL: define void @foo() { +; CHECK-NEXT: [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 4) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne ptr addrspace(1) [[PRINTF_ALLOC_FN]], null +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[PRINTBUFFID:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTF_ALLOC_FN]], i32 0 +; CHECK-NEXT: [[PRINTBUFFIDCAST:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFID]] to ptr addrspace(1) +; CHECK-NEXT: store i32 1, ptr addrspace(1) [[PRINTBUFFIDCAST]], align 4 +; CHECK-NEXT: [[PRINTBUFFGEP:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTF_ALLOC_FN]], i32 4 +; CHECK-NEXT: br label [[TMP3]] +; CHECK: 3: +; CHECK-NEXT: ret void +; + %call = call i32 @printf(ptr addrspace(4) @.str) + ret void +} + +declare hidden i32 @printf(ptr addrspace(4), ...) -- 2.7.4