[AMDGPU][printf] Run AMDGPUPrintfRuntimeBindingPass in -O0
authorJuan Manuel MARTINEZ CAAMAÑO <juamarti@amd.com>
Fri, 24 Mar 2023 08:28:52 +0000 (09:28 +0100)
committerJuan Manuel MARTINEZ CAAMAÑO <juamarti@amd.com>
Mon, 27 Mar 2023 07:43:36 +0000 (09:43 +0200)
AMDGPUPrintfRuntimeBindingPass is not run in the IR optimization
pipeline with -O0.

This means that with OpenCL the printf definition coming from
device_libs gets linked with the user's code, which blocks
AMDGPUPrintfRuntimeBindingPass from working after the linkage is done.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D146720

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/test/CodeGen/AMDGPU/opencl-printf-pipeline.ll [new file with mode: 0644]

index ecb31df..7255b14 100644 (file)
@@ -687,11 +687,12 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
 
   PB.registerPipelineEarlySimplificationEPCallback(
       [this](ModulePassManager &PM, OptimizationLevel Level) {
+        PM.addPass(AMDGPUPrintfRuntimeBindingPass());
+
         if (Level == OptimizationLevel::O0)
           return;
 
         PM.addPass(AMDGPUUnifyMetadataPass());
-        PM.addPass(AMDGPUPrintfRuntimeBindingPass());
 
         if (InternalizeSymbols) {
           PM.addPass(InternalizePass(mustPreserveGV));
diff --git a/llvm/test/CodeGen/AMDGPU/opencl-printf-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opencl-printf-pipeline.ll
new file mode 100644 (file)
index 0000000..b54976c
--- /dev/null
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -mtriple=amdgcn--amdhsa -passes=amdgpu-printf-runtime-binding -S < %s | FileCheck %s
+; RUN: opt -mtriple=amdgcn--amdhsa -O0 -S < %s | FileCheck %s
+
+; Check that the call to printf is removed when AMDGPUPrintfRuntimeBindingPass is executed
+;   And that this pass is executed in the -O0 and -OX pipelines
+
+@.str = private unnamed_addr addrspace(4) constant [7 x i8] c"hello\0A\00", align 1
+
+define void @foo() {
+; CHECK-LABEL: define void @foo() {
+; CHECK-NEXT:    [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 4)
+; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
+; CHECK:       .split:
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne ptr addrspace(1) [[PRINTF_ALLOC_FN]], null
+; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    [[PRINTBUFFID:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTF_ALLOC_FN]], i32 0
+; CHECK-NEXT:    [[PRINTBUFFIDCAST:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFID]] to ptr addrspace(1)
+; CHECK-NEXT:    store i32 1, ptr addrspace(1) [[PRINTBUFFIDCAST]], align 4
+; CHECK-NEXT:    [[PRINTBUFFGEP:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTF_ALLOC_FN]], i32 4
+; CHECK-NEXT:    br label [[TMP3]]
+; CHECK:       3:
+; CHECK-NEXT:    ret void
+;
+  %call = call i32 @printf(ptr addrspace(4) @.str)
+  ret void
+}
+
+declare hidden i32 @printf(ptr addrspace(4), ...)