From a0c8f5fefa11d42cf3efb0a6ae9aabaf3a289f9e Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 15 Sep 2022 15:12:14 -0700 Subject: [PATCH] [SDAG] Print divergence in SDNode::dump If target does not support divergence the field is set to false and not printed. Differential Revision: https://reviews.llvm.org/D133984 --- .../CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 3 ++ llvm/test/CodeGen/AMDGPU/sdag-print-divergence.ll | 33 ++++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/sdag-print-divergence.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 6ba0166..74092d9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -1059,6 +1059,9 @@ LLVM_DUMP_METHOD void SDNode::dumprFull(const SelectionDAG *G) const { void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { printr(OS, G); + // Under VerboseDAGDumping divergence will be printed always. + if (isDivergent() && !VerboseDAGDumping) + OS << " # D:1"; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { if (i) OS << ", "; else OS << " "; printOperand(OS, G, getOperand(i)); diff --git a/llvm/test/CodeGen/AMDGPU/sdag-print-divergence.ll b/llvm/test/CodeGen/AMDGPU/sdag-print-divergence.ll new file mode 100644 index 0000000..c4b7957 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sdag-print-divergence.ll @@ -0,0 +1,33 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -O0 -verify-machineinstrs < %s -debug-only=isel -o /dev/null |& FileCheck --check-prefixes=GCN,GCN-DEFAULT %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -O0 -verify-machineinstrs < %s -debug-only=isel -dag-dump-verbose -o /dev/null |& FileCheck --check-prefixes=GCN,GCN-VERBOSE %s + +; REQUIRES: asserts + +; GCN-LABEL: === test_sdag_dump +; GCN: Initial selection DAG: %bb.0 'test_sdag_dump:entry' +; GCN: SelectionDAG has 10 nodes: + +; GCN-DEFAULT: t0: ch = EntryToken +; GCN-DEFAULT: t2: f32,ch = CopyFromReg t0, Register:f32 %0 +; GCN-DEFAULT: t5: f32 = fadd t2, t2 +; GCN-DEFAULT: t4: f32,ch = CopyFromReg # D:1 t0, Register:f32 %1 +; GCN-DEFAULT: t6: f32 = fadd # D:1 t5, t4 +; GCN-DEFAULT: t8: ch,glue = CopyToReg # D:1 t0, Register:f32 $vgpr0, t6 +; GCN-DEFAULT: t9: ch = RETURN_TO_EPILOG # D:1 t8, Register:f32 $vgpr0, t8:1 + +; GCN-VERBOSE: t0: ch = EntryToken # D:0 +; GCN-VERBOSE: t2: f32,ch = CopyFromReg [ORD=1] # D:0 t0, Register:f32 %0 # D:0 +; GCN-VERBOSE: t5: f32 = fadd [ORD=2] # D:0 t2, t2 +; GCN-VERBOSE: t4: f32,ch = CopyFromReg [ORD=1] # D:1 t0, Register:f32 %1 # D:0 +; GCN-VERBOSE: t6: f32 = fadd [ORD=3] # D:1 t5, t4 +; GCN-VERBOSE: t8: ch,glue = CopyToReg [ORD=4] # D:1 t0, Register:f32 $vgpr0 # D:0, t6 +; GCN-VERBOSE: t9: ch = RETURN_TO_EPILOG [ORD=4] # D:1 t8, Register:f32 $vgpr0 # D:0, t8:1 + +define amdgpu_ps float @test_sdag_dump(float inreg %scalar, float %vector) { +entry: + %sadd = fadd float %scalar, %scalar + %ret = fadd float %sadd, %vector + ret float %ret +} + +declare i32 @llvm.amdgcn.workitem.id.x() -- 2.7.4