From 5832950adbfcd7a5b16922a87a2cde257b7fac43 Mon Sep 17 00:00:00 2001 From: hsmahesha Date: Tue, 23 Jun 2020 16:19:34 +0530 Subject: [PATCH] [AMDGPU/MemOpsCluster] Compute `width` for `MIMG` instruction class. Summary: `width` computation is missing for newly added `MIMG` instruction class. Add it. Reviewers: foad, rampitec, arsenm Reviewed By: foad Subscribers: MatzeB, javed.absar, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D81649 --- llvm/lib/CodeGen/MachineScheduler.cpp | 11 ++++++++++- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 3 +++ llvm/test/CodeGen/AMDGPU/cluster_stores.ll | 22 ++++++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 0f21c97..cf75d53 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -1573,8 +1573,13 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( bool OffsetIsScalable; unsigned Width; if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, - OffsetIsScalable, Width, TRI)) + OffsetIsScalable, Width, TRI)) { MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset, Width)); + + LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: " + << Offset << ", OffsetIsScalable: " << OffsetIsScalable + << ", Width: " << Width << "\n"); + } #ifndef NDEBUG for (auto *Op : BaseOps) assert(Op); @@ -1630,6 +1635,10 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps( << ")\n"); DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial)); } + + LLVM_DEBUG(dbgs() << " Curr cluster length: " << ClusterLength + << ", Curr cluster bytes: " << CurrentClusterBytes + << "\n"); } } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0dcaecd..77bfd88 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -396,6 +396,9 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth( BaseOps.push_back(getNamedOperand(LdSt, AMDGPU::OpName::vaddr)); } Offset = 0; + // Get appropriate operand, and compute width accordingly. + DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); + Width = getOpSize(LdSt, DataOpIdx); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll index 67d6f68..bc3bcfe 100644 --- a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll +++ b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll @@ -5,6 +5,14 @@ ; CHECK-LABEL: {{^}}cluster_load_cluster_store: define amdgpu_kernel void @cluster_load_cluster_store(i32* noalias %lb, i32* noalias %sb) { bb: + +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 8 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 8 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4 + ; DBG: Cluster ld/st SU(1) - SU(2) ; DBG: Cluster ld/st SU([[L1:[0-9]+]]) - SU([[L2:[0-9]+]]) @@ -45,6 +53,13 @@ bb: ; CHECK-LABEL: {{^}}cluster_load_valu_cluster_store: define amdgpu_kernel void @cluster_load_valu_cluster_store(i32* noalias %lb, i32* noalias %sb) { bb: +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 8 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 8 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4 + ; DBG: Cluster ld/st SU(1) - SU(2) ; DBG: Cluster ld/st SU([[L1:[0-9]+]]) - SU([[L2:[0-9]+]]) @@ -86,6 +101,8 @@ bb: ; Cluster loads from the same texture with different coordinates ; CHECK-LABEL: {{^}}cluster_image_load: +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 ; DBG: {{^}}Cluster ld/st [[SU1:SU\([0-9]+\)]] - [[SU2:SU\([0-9]+\)]] ; DBG: {{^}}[[SU1]]: {{.*}} IMAGE_LOAD ; DBG: {{^}}[[SU2]]: {{.*}} IMAGE_LOAD @@ -106,6 +123,9 @@ entry: ; Don't cluster loads from different textures ; CHECK-LABEL: {{^}}no_cluster_image_load: +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 ; DBG-NOT: {{^}}Cluster ld/st define amdgpu_ps void @no_cluster_image_load(<8 x i32> inreg %src1, <8 x i32> inreg %src2, <8 x i32> inreg %dst, i32 %x, i32 %y) { entry: @@ -118,6 +138,8 @@ entry: ; Cluster loads from the same texture and sampler with different coordinates ; CHECK-LABEL: {{^}}cluster_image_sample: +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 +; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16 ; DBG: {{^}}Cluster ld/st [[SU1:SU\([0-9]+\)]] - [[SU2:SU\([0-9]+\)]] ; DBG: {{^}}[[SU1]]: {{.*}} IMAGE_SAMPLE ; DBG: {{^}}[[SU2]]: {{.*}} IMAGE_SAMPLE -- 2.7.4