[AMDGPU/MemOpsCluster] Compute `width` for `MIMG` instruction class.

author hsmahesha <mahesha.comp@gmail.com>

Tue, 23 Jun 2020 10:49:34 +0000 (16:19 +0530)

committer Your Name <mahesha.comp@gmail.com>

Tue, 23 Jun 2020 12:02:17 +0000 (17:32 +0530)
author hsmahesha <mahesha.comp@gmail.com>
Tue, 23 Jun 2020 10:49:34 +0000 (16:19 +0530)
committer Your Name <mahesha.comp@gmail.com>
Tue, 23 Jun 2020 12:02:17 +0000 (17:32 +0530)
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp

index 0f21c97..cf75d53 100644 (file)
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1573,8 +1573,13 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
      bool OffsetIsScalable;
      unsigned Width;
      if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
-                                           OffsetIsScalable, Width, TRI))
+                                           OffsetIsScalable, Width, TRI)) {
        MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset, Width));
+
+      LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
+                        << Offset << ", OffsetIsScalable: " << OffsetIsScalable
+                        << ", Width: " << Width << "\n");
+    }
  #ifndef NDEBUG
      for (auto *Op : BaseOps)
        assert(Op);
@@ -1630,6 +1635,10 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
                          << ")\n");
        DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
      }
+
+    LLVM_DEBUG(dbgs() << "  Curr cluster length: " << ClusterLength
+                      << ", Curr cluster bytes: " << CurrentClusterBytes
+                      << "\n");
    }
  }
  
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

index 0dcaecd..77bfd88 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -396,6 +396,9 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth(
        BaseOps.push_back(getNamedOperand(LdSt, AMDGPU::OpName::vaddr));
      }
      Offset = 0;
+    // Get appropriate operand, and compute width accordingly.
+    DataOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
+    Width = getOpSize(LdSt, DataOpIdx);
      return true;
    }
  
diff --git a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll

index 67d6f68..bc3bcfe 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll
+++ b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll
@@ -5,6 +5,14 @@
  ; CHECK-LABEL: {{^}}cluster_load_cluster_store:
  define amdgpu_kernel void @cluster_load_cluster_store(i32* noalias %lb, i32* noalias %sb) {
  bb:
+
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 8
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 8
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4
+
  ; DBG: Cluster ld/st SU(1) - SU(2)
  
  ; DBG: Cluster ld/st SU([[L1:[0-9]+]]) - SU([[L2:[0-9]+]])
@@ -45,6 +53,13 @@ bb:
  ; CHECK-LABEL: {{^}}cluster_load_valu_cluster_store:
  define amdgpu_kernel void @cluster_load_valu_cluster_store(i32* noalias %lb, i32* noalias %sb) {
  bb:
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 8
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 8
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 4
+
  ; DBG: Cluster ld/st SU(1) - SU(2)
  
  ; DBG: Cluster ld/st SU([[L1:[0-9]+]]) - SU([[L2:[0-9]+]])
@@ -86,6 +101,8 @@ bb:
  
  ; Cluster loads from the same texture with different coordinates
  ; CHECK-LABEL: {{^}}cluster_image_load:
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16
  ; DBG: {{^}}Cluster ld/st [[SU1:SU\([0-9]+\)]] - [[SU2:SU\([0-9]+\)]]
  ; DBG: {{^}}[[SU1]]: {{.*}} IMAGE_LOAD
  ; DBG: {{^}}[[SU2]]: {{.*}} IMAGE_LOAD
@@ -106,6 +123,9 @@ entry:
  
  ; Don't cluster loads from different textures
  ; CHECK-LABEL: {{^}}no_cluster_image_load:
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16
  ; DBG-NOT: {{^}}Cluster ld/st
  define amdgpu_ps void @no_cluster_image_load(<8 x i32> inreg %src1, <8 x i32> inreg %src2, <8 x i32> inreg %dst, i32 %x, i32 %y) {
  entry:
@@ -118,6 +138,8 @@ entry:
  
  ; Cluster loads from the same texture and sampler with different coordinates
  ; CHECK-LABEL: {{^}}cluster_image_sample:
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16
+; DBG: Num BaseOps: {{[1-9]+}}, Offset: {{[0-9]+}}, OffsetIsScalable: {{[01]}}, Width: 16
  ; DBG: {{^}}Cluster ld/st [[SU1:SU\([0-9]+\)]] - [[SU2:SU\([0-9]+\)]]
  ; DBG: {{^}}[[SU1]]: {{.*}} IMAGE_SAMPLE
  ; DBG: {{^}}[[SU2]]: {{.*}} IMAGE_SAMPLE
author	hsmahesha <mahesha.comp@gmail.com>
	Tue, 23 Jun 2020 10:49:34 +0000 (16:19 +0530)
committer	Your Name <mahesha.comp@gmail.com>
	Tue, 23 Jun 2020 12:02:17 +0000 (17:32 +0530)
llvm/lib/CodeGen/MachineScheduler.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/cluster_stores.ll		patch \| blob \| history