[AMDGPU] Fix high occupancy calculation and print it

author Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>

Wed, 31 Jul 2019 01:07:10 +0000 (01:07 +0000)

committer Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>

Wed, 31 Jul 2019 01:07:10 +0000 (01:07 +0000)
author Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Wed, 31 Jul 2019 01:07:10 +0000 (01:07 +0000)
committer Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Wed, 31 Jul 2019 01:07:10 +0000 (01:07 +0000)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

index 743ac64..a429c7c 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -507,6 +507,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
        Twine(CurrentProgramInfo.NumVGPRsForWavesPerEU), false);
  
      OutStreamer->emitRawComment(
+      " Occupancy: " +
+      Twine(CurrentProgramInfo.Occupancy), false);
+
+    OutStreamer->emitRawComment(
        " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
  
      OutStreamer->emitRawComment(
@@ -1057,6 +1061,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
        // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
        S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) |
        S_00B84C_EXCP_EN(0);
+
+  ProgInfo.Occupancy = STM.computeOccupancy(MF, ProgInfo.LDSSize,
+                                            ProgInfo.NumSGPRsForWavesPerEU,
+                                            ProgInfo.NumVGPRsForWavesPerEU);
  }
  
  static unsigned getRsrcReg(CallingConv::ID CallConv) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

index 716c387..f9a9679 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -175,6 +175,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
    HasFminFmaxLegacy(true),
    EnablePromoteAlloca(false),
    HasTrigReducedRange(false),
+  MaxWavesPerEU(10),
    LocalMemorySize(0),
    WavefrontSize(0)
    { }
@@ -278,6 +279,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
      InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
      TLInfo(TM, *this),
      FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0) {
+  MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this);
    CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
    Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
    RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
@@ -566,7 +568,7 @@ bool GCNSubtarget::hasMadF16() const {
  
  unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
    if (getGeneration() >= AMDGPUSubtarget::GFX10)
-    return 10;
+    return getMaxWavesPerEU();
  
    if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
      if (SGPRs <= 80)
@@ -616,6 +618,20 @@ unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
    return 2; // VCC.
  }
  
+unsigned GCNSubtarget::computeOccupancy(const MachineFunction &MF,
+                                        unsigned LDSSize,
+                                        unsigned NumSGPRs,
+                                        unsigned NumVGPRs) const {
+  unsigned Occupancy =
+    std::min(getMaxWavesPerEU(),
+             getOccupancyWithLocalMemSize(LDSSize, MF.getFunction()));
+  if (NumSGPRs)
+    Occupancy = std::min(Occupancy, getOccupancyWithNumSGPRs(NumSGPRs));
+  if (NumVGPRs)
+    Occupancy = std::min(Occupancy, getOccupancyWithNumVGPRs(NumVGPRs));
+  return Occupancy;
+}
+
  unsigned GCNSubtarget::getMaxNumSGPRs(const MachineFunction &MF) const {
    const Function &F = MF.getFunction();
    const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

index 1f30d76..bc10091 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -75,6 +75,7 @@ protected:
    bool HasFminFmaxLegacy;
    bool EnablePromoteAlloca;
    bool HasTrigReducedRange;
+  unsigned MaxWavesPerEU;
    int LocalMemorySize;
    unsigned WavefrontSize;
  
@@ -223,7 +224,9 @@ public:
    /// subtarget.
    virtual unsigned getMinWavesPerEU() const = 0;
  
-  unsigned getMaxWavesPerEU() const { return 10; }
+  /// \returns Maximum number of waves per execution unit supported by the
+  /// subtarget without any kind of limitation.
+  unsigned getMaxWavesPerEU() const { return MaxWavesPerEU; }
  
    /// Creates value range metadata on an workitemid.* inrinsic call or load.
    bool makeLIDRangeMetadata(Instruction *I) const;
@@ -245,6 +248,9 @@ public:
  
  class GCNSubtarget : public AMDGPUGenSubtargetInfo,
                       public AMDGPUSubtarget {
+
+  using AMDGPUSubtarget::getMaxWavesPerEU;
+
  public:
    enum TrapHandlerAbi {
      TrapHandlerAbiNone = 0,
@@ -881,12 +887,6 @@ public:
      return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
    }
  
-  /// \returns Maximum number of waves per execution unit supported by the
-  /// subtarget without any kind of limitation.
-  unsigned getMaxWavesPerEU() const {
-    return AMDGPU::IsaInfo::getMaxWavesPerEU(this);
-  }
-
    /// \returns Number of waves per work group supported by the subtarget and
    /// limited by given \p FlatWorkGroupSize.
    unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
@@ -1036,6 +1036,13 @@ public:
    /// VGPRs
    unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
  
+  /// Return occupancy for the given function. Used LDS and a number of
+  /// registers if provided.
+  /// Note, occupancy can be affected by the scratch allocation as well, but
+  /// we do not have enough information to compute it.
+  unsigned computeOccupancy(const MachineFunction &MF, unsigned LDSSize = 0,
+                            unsigned NumSGPRs = 0, unsigned NumVGPRs = 0) const;
+
    /// \returns true if the flat_scratch register should be initialized with the
    /// pointer to the wave's scratch memory rather than a size and offset.
    bool flatScratchIsPointer() const {
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

index 46da974..d9068d6 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -53,8 +53,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
    FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
    WavesPerEU = ST.getWavesPerEU(F);
  
-  Occupancy = getMaxWavesPerEU();
-  limitOccupancy(MF);
+  Occupancy = ST.computeOccupancy(MF, getLDSSize());
    CallingConv::ID CC = F.getCallingConv();
  
    if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.h b/llvm/lib/Target/AMDGPU/SIProgramInfo.h

index 168f05f..94ebe69 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
@@ -51,6 +51,9 @@ struct SIProgramInfo {
      // Number of VGPRs that meets number of waves per execution unit request.
      uint32_t NumVGPRsForWavesPerEU = 0;
  
+    // Final occupancy.
+    uint32_t Occupancy = 0;
+
      // Whether there is recursion, dynamic allocas, indirect calls or some other
      // reason there may be statically unknown stack usage.
      bool DynamicCallStack = false;
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll

index 2d2f9ce..0eed325 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
@@ -74,10 +74,7 @@ entry:
  
  ; CHECK:   .name:       num_spilled_vgprs
  ; CHECK:   .symbol:     num_spilled_vgprs.kd
-; GFX700:   .vgpr_spill_count: 14
-; GFX803:   .vgpr_spill_count: 14
-; GFX900:   .vgpr_spill_count: 14
-; GFX1010:  .vgpr_spill_count: 0
+; CHECK:   .vgpr_spill_count: 14
  define amdgpu_kernel void @num_spilled_vgprs() #1 {
    %val0 = load volatile float, float addrspace(1)* @var
    %val1 = load volatile float, float addrspace(1)* @var
diff --git a/llvm/test/CodeGen/AMDGPU/nsa-reassign.ll b/llvm/test/CodeGen/AMDGPU/nsa-reassign.ll

index 011aef8..a668a19 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/nsa-reassign.ll
+++ b/llvm/test/CodeGen/AMDGPU/nsa-reassign.ll
@@ -21,8 +21,8 @@ main_body:
  }
  
  ; GCN-LABEL: {{^}}sample_contig_nsa_10vgprs:
-; GCN-DAG: image_sample_c_l v{{[0-9]+}}, v[{{[0-9:]+}}],
-; GCN-DAG: image_sample v{{[0-9]+}}, v[{{[0-9:]+}}],
+; GCN-DAG: image_sample_c_l v{{[0-9]+}}, [{{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}],
+; GCN-DAG: image_sample v{{[0-9]+}}, [{{v[0-9]+, v[0-9]+, v[0-9]+}}],
  define amdgpu_ps <2 x float> @sample_contig_nsa_10vgprs(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s1, float %t1, float %r1, float %lod, float %r2, float %s2, float %t2) #0 {
  main_body:
    %zcompare.1 = fadd float %zcompare, 1.0
diff --git a/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll b/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll

new file mode 100644 (file)

index 0000000..4f509c0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/occupancy-levels.ll
@@ -0,0 +1,288 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GCN,GFX1010,GFX1010W32 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefixes=GCN,GFX1010,GFX1010W64 %s
+
+; GCN-LABEL: {{^}}max_occupancy:
+; GFX9:       ; Occupancy: 10
+; GFX1010:    ; Occupancy: 20
+define amdgpu_kernel void @max_occupancy() {
+  ret void
+}
+
+; GCN-LABEL: {{^}}limited_occupancy_3:
+; GFX9:       ; Occupancy: 3
+; GFX1010W64: ; Occupancy: 3
+; GFX1010W32: ; Occupancy: 4
+define amdgpu_kernel void @limited_occupancy_3() #0 {
+  ret void
+}
+
+; GCN-LABEL: {{^}}limited_occupancy_18:
+; GFX9:       ; Occupancy: 10
+; GFX1010:    ; Occupancy: 18
+define amdgpu_kernel void @limited_occupancy_18() #1 {
+  ret void
+}
+
+; GCN-LABEL: {{^}}limited_occupancy_19:
+; GFX9:       ; Occupancy: 10
+; GFX1010:    ; Occupancy: 18
+define amdgpu_kernel void @limited_occupancy_19() #2 {
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_24_vgprs:
+; GFX9:       ; Occupancy: 10
+; GFX1010:    ; Occupancy: 20
+define amdgpu_kernel void @used_24_vgprs() {
+  call void asm sideeffect "", "~{v23}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_28_vgprs:
+; GFX9:       ; Occupancy: 9
+; GFX1010W64: ; Occupancy: 18
+; GFX1010W32: ; Occupancy: 20
+define amdgpu_kernel void @used_28_vgprs() {
+  call void asm sideeffect "", "~{v27}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_32_vgprs:
+; GFX9:       ; Occupancy: 8
+; GFX1010W64: ; Occupancy: 16
+; GFX1010W32: ; Occupancy: 20
+define amdgpu_kernel void @used_32_vgprs() {
+  call void asm sideeffect "", "~{v31}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_36_vgprs:
+; GFX9:       ; Occupancy: 7
+; GFX1010W64: ; Occupancy: 14
+; GFX1010W32: ; Occupancy: 20
+define amdgpu_kernel void @used_36_vgprs() {
+  call void asm sideeffect "", "~{v35}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_40_vgprs:
+; GFX9:       ; Occupancy: 6
+; GFX1010W64: ; Occupancy: 12
+; GFX1010W32: ; Occupancy: 20
+define amdgpu_kernel void @used_40_vgprs() {
+  call void asm sideeffect "", "~{v39}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_44_vgprs:
+; GFX9:       ; Occupancy: 5
+; GFX1010W64: ; Occupancy: 11
+; GFX1010W32: ; Occupancy: 20
+define amdgpu_kernel void @used_44_vgprs() {
+  call void asm sideeffect "", "~{v43}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_48_vgprs:
+; GFX9:       ; Occupancy: 5
+; GFX1010W64: ; Occupancy: 10
+; GFX1010W32: ; Occupancy: 20
+define amdgpu_kernel void @used_48_vgprs() {
+  call void asm sideeffect "", "~{v47}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_56_vgprs:
+; GFX9:       ; Occupancy: 4
+; GFX1010W64: ; Occupancy: 9
+; GFX1010W32: ; Occupancy: 18
+define amdgpu_kernel void @used_56_vgprs() {
+  call void asm sideeffect "", "~{v55}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_64_vgprs:
+; GFX9:       ; Occupancy: 4
+; GFX1010W64: ; Occupancy: 8
+; GFX1010W32: ; Occupancy: 16
+define amdgpu_kernel void @used_64_vgprs() {
+  call void asm sideeffect "", "~{v63}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_72_vgprs:
+; GFX9:       ; Occupancy: 3
+; GFX1010W64: ; Occupancy: 7
+; GFX1010W32: ; Occupancy: 14
+define amdgpu_kernel void @used_72_vgprs() {
+  call void asm sideeffect "", "~{v71}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_80_vgprs:
+; GFX9:       ; Occupancy: 3
+; GFX1010W64: ; Occupancy: 6
+; GFX1010W32: ; Occupancy: 12
+define amdgpu_kernel void @used_80_vgprs() {
+  call void asm sideeffect "", "~{v79}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_84_vgprs:
+; GFX9:       ; Occupancy: 3
+; GFX1010W64: ; Occupancy: 6
+; GFX1010W32: ; Occupancy: 11
+define amdgpu_kernel void @used_84_vgprs() {
+  call void asm sideeffect "", "~{v83}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_88_vgprs:
+; GFX9:       ; Occupancy: 2
+; GFX1010W64: ; Occupancy: 5
+; GFX1010W32: ; Occupancy: 11
+define amdgpu_kernel void @used_88_vgprs() {
+  call void asm sideeffect "", "~{v87}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_96_vgprs:
+; GFX9:       ; Occupancy: 2
+; GFX1010W64: ; Occupancy: 5
+; GFX1010W32: ; Occupancy: 10
+define amdgpu_kernel void @used_96_vgprs() {
+  call void asm sideeffect "", "~{v95}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_100_vgprs:
+; GFX9:       ; Occupancy: 2
+; GFX1010W64: ; Occupancy: 5
+; GFX1010W32: ; Occupancy: 9
+define amdgpu_kernel void @used_100_vgprs() {
+  call void asm sideeffect "", "~{v99}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_112_vgprs:
+; GFX9:       ; Occupancy: 2
+; GFX1010W64: ; Occupancy: 4
+; GFX1010W32: ; Occupancy: 9
+define amdgpu_kernel void @used_112_vgprs() {
+  call void asm sideeffect "", "~{v111}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_128_vgprs:
+; GFX9:       ; Occupancy: 2
+; GFX1010W64: ; Occupancy: 4
+; GFX1010W32: ; Occupancy: 8
+define amdgpu_kernel void @used_128_vgprs() {
+  call void asm sideeffect "", "~{v127}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_144_vgprs:
+; GFX9:       ; Occupancy: 1
+; GFX1010W64: ; Occupancy: 3
+; GFX1010W32: ; Occupancy: 7
+define amdgpu_kernel void @used_144_vgprs() {
+  call void asm sideeffect "", "~{v143}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_168_vgprs:
+; GFX9:       ; Occupancy: 1
+; GFX1010W64: ; Occupancy: 3
+; GFX1010W32: ; Occupancy: 6
+define amdgpu_kernel void @used_168_vgprs() {
+  call void asm sideeffect "", "~{v167}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_200_vgprs:
+; GFX9:       ; Occupancy: 1
+; GFX1010W64: ; Occupancy: 2
+; GFX1010W32: ; Occupancy: 5
+define amdgpu_kernel void @used_200_vgprs() {
+  call void asm sideeffect "", "~{v199}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_256_vgprs:
+; GFX9:       ; Occupancy: 1
+; GFX1010W64: ; Occupancy: 2
+; GFX1010W32: ; Occupancy: 4
+define amdgpu_kernel void @used_256_vgprs() {
+  call void asm sideeffect "", "~{v255}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_80_sgprs:
+; GFX9:       ; Occupancy: 10
+; GFX1010:    ; Occupancy: 20
+define amdgpu_kernel void @used_80_sgprs() {
+  call void asm sideeffect "", "~{s79}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_88_sgprs:
+; GFX9:       ; Occupancy: 9
+; GFX1010:    ; Occupancy: 20
+define amdgpu_kernel void @used_88_sgprs() {
+  call void asm sideeffect "", "~{s87}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_100_sgprs:
+; GFX9:       ; Occupancy: 8
+; GFX1010:    ; Occupancy: 20
+define amdgpu_kernel void @used_100_sgprs() {
+  call void asm sideeffect "", "~{s99}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_101_sgprs:
+; GFX9:       ; Occupancy: 7
+; GFX1010:    ; Occupancy: 20
+define amdgpu_kernel void @used_101_sgprs() {
+  call void asm sideeffect "", "~{s100}" ()
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_lds_6552:
+; GFX9:       ; Occupancy: 10
+; GFX1010:    ; Occupancy: 20
+@lds6552 = internal addrspace(3) global [6552 x i8] undef, align 4
+define amdgpu_kernel void @used_lds_6552() {
+  %p = bitcast [6552 x i8] addrspace(3)* @lds6552 to i8 addrspace(3)*
+  store volatile i8 1, i8 addrspace(3)* %p
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_lds_6556:
+; GFX9:       ; Occupancy: 9
+; GFX1010W64: ; Occupancy: 19
+; GFX1010W32: ; Occupancy: 20
+@lds6556 = internal addrspace(3) global [6556 x i8] undef, align 4
+define amdgpu_kernel void @used_lds_6556() {
+  %p = bitcast [6556 x i8] addrspace(3)* @lds6556 to i8 addrspace(3)*
+  store volatile i8 1, i8 addrspace(3)* %p
+  ret void
+}
+
+; GCN-LABEL: {{^}}used_lds_13112:
+; GFX9:       ; Occupancy: 4
+; GFX1010W64: ; Occupancy: 9
+; GFX1010W32: ; Occupancy: 19
+@lds13112 = internal addrspace(3) global [13112 x i8] undef, align 4
+define amdgpu_kernel void @used_lds_13112() {
+  %p = bitcast [13112 x i8] addrspace(3)* @lds13112 to i8 addrspace(3)*
+  store volatile i8 1, i8 addrspace(3)* %p
+  ret void
+}
+
+attributes #0 = { "amdgpu-waves-per-eu"="2,3" }
+attributes #1 = { "amdgpu-waves-per-eu"="18,18" }
+attributes #2 = { "amdgpu-waves-per-eu"="19,19" }
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll

index f4715a2..a71ca5d 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -920,7 +920,7 @@ main_body:
  
  ; GCN-LABEL: {{^}}test_vgprblocks_w64_attr:
  ; Test that the wave size can be overridden in function attributes and that the block size is correct as a result
-; GFX10DEFWAVE: ; VGPRBlocks: 11
+; GFX10DEFWAVE: ; VGPRBlocks: 2
  define amdgpu_gs float @test_vgprblocks_w64_attr(float %a, float %b, float %c, float %d, float %e,
                                          float %f, float %g, float %h, float %i, float %j, float %k, float %l) #4 {
  main_body:
author	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
	Wed, 31 Jul 2019 01:07:10 +0000 (01:07 +0000)
committer	Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
	Wed, 31 Jul 2019 01:07:10 +0000 (01:07 +0000)
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIProgramInfo.h		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/nsa-reassign.ll		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/occupancy-levels.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/AMDGPU/wave32.ll		patch \| blob \| history