From 70cb57d7da3108f4ea9cd5bc0d3b08accd109f0e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 4 Mar 2021 17:19:49 -0500 Subject: [PATCH] AMDGPU/GlobalISel: Improve private addressing mode matching This enables the look-through-copy to hack around not correctly regbankselecting constants to match the use bank. --- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 30 +- .../GlobalISel/extractelement-stack-lower.ll | 1628 +++++++++----------- .../AMDGPU/GlobalISel/insertelement-stack-lower.ll | 507 +++--- .../AMDGPU/GlobalISel/inst-select-load-private.mir | 31 +- .../CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll | 32 +- 5 files changed, 954 insertions(+), 1274 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 8e83100..8c587b5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3712,23 +3712,19 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const { Optional FI; Register VAddr = Root.getReg(); if (const MachineInstr *RootDef = MRI->getVRegDef(Root.getReg())) { - if (isBaseWithConstantOffset(Root, *MRI)) { - const MachineOperand &LHS = RootDef->getOperand(1); - const MachineOperand &RHS = RootDef->getOperand(2); - const MachineInstr *LHSDef = MRI->getVRegDef(LHS.getReg()); - const MachineInstr *RHSDef = MRI->getVRegDef(RHS.getReg()); - if (LHSDef && RHSDef) { - int64_t PossibleOffset = - RHSDef->getOperand(1).getCImm()->getSExtValue(); - if (SIInstrInfo::isLegalMUBUFImmOffset(PossibleOffset) && - (!STI.privateMemoryResourceIsRangeChecked() || - KnownBits->signBitIsZero(LHS.getReg()))) { - if (LHSDef->getOpcode() == AMDGPU::G_FRAME_INDEX) - FI = LHSDef->getOperand(1).getIndex(); - else - VAddr = LHS.getReg(); - Offset = PossibleOffset; - } + Register PtrBase; + int64_t ConstOffset; + std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(VAddr, *MRI); + if (ConstOffset != 0) { + if (SIInstrInfo::isLegalMUBUFImmOffset(ConstOffset) && + (!STI.privateMemoryResourceIsRangeChecked() || + KnownBits->signBitIsZero(PtrBase))) { + const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase); + if (PtrBaseDef->getOpcode() == AMDGPU::G_FRAME_INDEX) + FI = PtrBaseDef->getOperand(1).getIndex(); + else + VAddr = PtrBase; + Offset = ConstOffset; } } else if (RootDef->getOpcode() == AMDGPU::G_FRAME_INDEX) { FI = RootDef->getOperand(1).getIndex(); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll index 3ab8af2..abb71f3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll @@ -11,58 +11,31 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: s_mov_b32 s6, s33 ; GCN-NEXT: s_add_u32 s33, s32, 0x3fc0 ; GCN-NEXT: s_and_b32 s33, s33, 0xffffc000 -; GCN-NEXT: v_add_co_u32_e32 v12, vcc, 64, v0 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v1, vcc -; GCN-NEXT: global_load_dwordx4 v[4:7], v[12:13], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[8:11], v[12:13], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[12:15], v[12:13], off offset:48 -; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v1, vcc ; GCN-NEXT: s_movk_i32 s4, 0x80 -; GCN-NEXT: v_mov_b32_e32 v17, s5 -; GCN-NEXT: v_mov_b32_e32 v16, s4 -; GCN-NEXT: s_movk_i32 s4, 0xc0 -; GCN-NEXT: s_add_u32 s32, s32, 0x10000 -; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill -; GCN-NEXT: v_lshrrev_b32_e64 v3, 6, s33 -; GCN-NEXT: v_add_u32_e32 v3, 0x100, v3 -; GCN-NEXT: v_add_u32_e32 v60, 16, v3 -; GCN-NEXT: v_add_co_u32_e32 v52, vcc, v0, v16 -; GCN-NEXT: v_addc_co_u32_e32 v53, vcc, v1, v17, vcc -; GCN-NEXT: v_mov_b32_e32 v17, s5 -; GCN-NEXT: v_mov_b32_e32 v16, s4 -; GCN-NEXT: v_add_co_u32_e32 v56, vcc, v0, v16 -; GCN-NEXT: v_addc_co_u32_e32 v57, vcc, v1, v17, vcc +; GCN-NEXT: global_load_dwordx4 v[8:11], v[3:4], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[12:15], v[3:4], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[56:59], v[3:4], off offset:48 +; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_mov_b32_e32 v4, s5 +; GCN-NEXT: v_add_co_u32_e32 v3, vcc, v0, v3 +; GCN-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v4, vcc ; GCN-NEXT: global_load_dwordx4 v[16:19], v[0:1], off ; GCN-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 ; GCN-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:32 @@ -70,254 +43,220 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:64 ; GCN-NEXT: global_load_dwordx4 v[36:39], v[0:1], off offset:128 ; GCN-NEXT: global_load_dwordx4 v[40:43], v[0:1], off offset:192 -; GCN-NEXT: global_load_dwordx4 v[44:47], v[52:53], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[48:51], v[52:53], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[52:55], v[52:53], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[12:15], v[56:57], off offset:16 -; GCN-NEXT: v_add_u32_e32 v0, 20, v3 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:644 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:648 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:652 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:656 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:660 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:664 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:668 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:672 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:676 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:680 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:684 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:688 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:692 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:696 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:700 ; 4-byte Folded Spill -; GCN-NEXT: global_load_dwordx4 v[12:15], v[56:57], off offset:32 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill -; GCN-NEXT: global_load_dwordx4 v[56:59], v[56:57], off offset:48 -; GCN-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 24, v3 -; GCN-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 28, v3 -; GCN-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 32, v3 -; GCN-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 36, v3 -; GCN-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 40, v3 -; GCN-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 44, v3 -; GCN-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 48, v3 -; GCN-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 52, v3 -; GCN-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 56, v3 -; GCN-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 60, v3 -; GCN-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 64, v3 -; GCN-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x44, v3 -; GCN-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x48, v3 -; GCN-NEXT: buffer_store_dword v34, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x4c, v3 -; GCN-NEXT: buffer_store_dword v35, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x50, v3 -; GCN-NEXT: buffer_store_dword v20, v60, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x54, v3 -; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x58, v3 -; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x5c, v3 -; GCN-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x60, v3 -; GCN-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x64, v3 -; GCN-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x68, v3 -; GCN-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x6c, v3 -; GCN-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v34, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v35, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v0, 0x70, v3 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v12, v32 -; GCN-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v13, v33 -; GCN-NEXT: v_add_u32_e32 v0, 0x74, v3 -; GCN-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v14, v34 -; GCN-NEXT: v_add_u32_e32 v0, 0x78, v3 -; GCN-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v15, v35 -; GCN-NEXT: v_add_u32_e32 v0, 0x7c, v3 -; GCN-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x80, v3 -; GCN-NEXT: buffer_store_dword v36, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x84, v3 -; GCN-NEXT: buffer_store_dword v37, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x88, v3 -; GCN-NEXT: buffer_store_dword v38, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x8c, v3 -; GCN-NEXT: buffer_store_dword v39, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x90, v3 -; GCN-NEXT: buffer_store_dword v44, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x94, v3 -; GCN-NEXT: buffer_store_dword v45, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x98, v3 -; GCN-NEXT: buffer_store_dword v46, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x9c, v3 -; GCN-NEXT: buffer_store_dword v47, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa0, v3 -; GCN-NEXT: buffer_store_dword v48, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa4, v3 -; GCN-NEXT: buffer_store_dword v49, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa8, v3 -; GCN-NEXT: buffer_store_dword v50, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xac, v3 -; GCN-NEXT: buffer_store_dword v51, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb0, v3 -; GCN-NEXT: buffer_store_dword v52, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb4, v3 -; GCN-NEXT: buffer_store_dword v53, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb8, v3 -; GCN-NEXT: buffer_store_dword v54, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xbc, v3 -; GCN-NEXT: buffer_store_dword v55, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc0, v3 -; GCN-NEXT: buffer_store_dword v40, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc4, v3 -; GCN-NEXT: buffer_store_dword v41, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc8, v3 -; GCN-NEXT: buffer_store_dword v42, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xcc, v3 -; GCN-NEXT: buffer_store_dword v43, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 4, v3 -; GCN-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 8, v3 -; GCN-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 12, v3 -; GCN-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:256 -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:644 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:648 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:652 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:656 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:660 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:664 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:668 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:672 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:676 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:680 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:684 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:688 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:692 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:696 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:700 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v0, 0xd0, v3 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v4, v8 -; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v5, v9 -; GCN-NEXT: v_add_u32_e32 v0, 0xd4, v3 -; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v6, v10 -; GCN-NEXT: v_add_u32_e32 v0, 0xd8, v3 -; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v7, v11 -; GCN-NEXT: v_add_u32_e32 v0, 0xdc, v3 -; GCN-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v0, 0xe0, v3 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v8, v12 -; GCN-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v9, v13 -; GCN-NEXT: v_add_u32_e32 v0, 0xe4, v3 -; GCN-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v10, v14 -; GCN-NEXT: v_add_u32_e32 v0, 0xe8, v3 -; GCN-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v11, v15 -; GCN-NEXT: v_add_u32_e32 v0, 0xec, v3 -; GCN-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xf0, v3 -; GCN-NEXT: buffer_store_dword v56, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xf4, v3 -; GCN-NEXT: buffer_store_dword v57, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xf8, v3 -; GCN-NEXT: buffer_store_dword v58, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xfc, v3 -; GCN-NEXT: buffer_store_dword v59, v0, s[0:3], 0 offen +; GCN-NEXT: global_load_dwordx4 v[44:47], v[3:4], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[48:51], v[3:4], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[52:55], v[3:4], off offset:48 +; GCN-NEXT: s_movk_i32 s4, 0xc0 +; GCN-NEXT: v_mov_b32_e32 v6, s5 +; GCN-NEXT: v_mov_b32_e32 v5, s4 +; GCN-NEXT: v_add_co_u32_e32 v60, vcc, v0, v5 +; GCN-NEXT: v_addc_co_u32_e32 v61, vcc, v1, v6, vcc ; GCN-NEXT: v_and_b32_e32 v0, 63, v2 +; GCN-NEXT: v_lshrrev_b32_e64 v1, 6, s33 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GCN-NEXT: v_add_u32_e32 v0, v3, v0 +; GCN-NEXT: v_add_u32_e32 v1, 0x100, v1 +; GCN-NEXT: v_add_u32_e32 v0, v1, v0 +; GCN-NEXT: s_add_u32 s32, s32, 0x10000 +; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:644 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:648 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:652 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:656 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:660 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:664 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:668 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:672 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:676 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:680 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:684 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:688 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:692 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:696 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:700 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[4:7], v[60:61], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[52:55], v[60:61], off offset:32 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[52:55], v[60:61], off offset:48 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:256 +; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:260 +; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:264 +; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:268 +; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:272 +; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:276 +; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:280 +; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:284 +; GCN-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:288 +; GCN-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:292 +; GCN-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:296 +; GCN-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:300 +; GCN-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:304 +; GCN-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:308 +; GCN-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:312 +; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:316 +; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:320 +; GCN-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:324 +; GCN-NEXT: buffer_store_dword v34, off, s[0:3], s33 offset:328 +; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:332 +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:384 +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:388 +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:392 +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:396 +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:336 +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:340 +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:344 +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:348 +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:352 +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:356 +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:360 +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:364 +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:368 +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:372 +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:376 +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:380 +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:400 +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:404 +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:408 +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:412 +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:416 +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:420 +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:424 +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:428 +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:644 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:648 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:652 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:656 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:660 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:664 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:668 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:672 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:676 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:680 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:684 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:688 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:692 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:696 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:700 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v12, v20 +; GCN-NEXT: v_mov_b32_e32 v13, v21 +; GCN-NEXT: v_mov_b32_e32 v14, v22 +; GCN-NEXT: v_mov_b32_e32 v15, v23 +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:432 +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:436 +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:440 +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:444 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:448 +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:452 +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:456 +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:460 +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:464 +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:468 +; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:472 +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:476 +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v8, v11 +; GCN-NEXT: v_mov_b32_e32 v9, v12 +; GCN-NEXT: v_mov_b32_e32 v10, v13 +; GCN-NEXT: v_mov_b32_e32 v11, v14 +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:480 +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:484 +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:488 +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:492 +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v12, v15 +; GCN-NEXT: v_mov_b32_e32 v13, v16 +; GCN-NEXT: v_mov_b32_e32 v14, v17 +; GCN-NEXT: v_mov_b32_e32 v15, v18 +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:496 +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:500 +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:504 +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:508 ; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b32 s33, s6 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -333,58 +272,31 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: s_mov_b32 s6, s33 ; GCN-NEXT: s_add_u32 s33, s32, 0x3fc0 ; GCN-NEXT: s_and_b32 s33, s33, 0xffffc000 -; GCN-NEXT: v_add_co_u32_e32 v12, vcc, 64, v0 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v1, vcc -; GCN-NEXT: global_load_dwordx4 v[4:7], v[12:13], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[8:11], v[12:13], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[12:15], v[12:13], off offset:48 -; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v1, vcc ; GCN-NEXT: s_movk_i32 s4, 0x80 -; GCN-NEXT: v_mov_b32_e32 v17, s5 -; GCN-NEXT: v_mov_b32_e32 v16, s4 -; GCN-NEXT: s_movk_i32 s4, 0xc0 -; GCN-NEXT: s_add_u32 s32, s32, 0x10000 -; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill -; GCN-NEXT: v_lshrrev_b32_e64 v3, 6, s33 -; GCN-NEXT: v_add_u32_e32 v3, 0x100, v3 -; GCN-NEXT: v_add_u32_e32 v60, 16, v3 -; GCN-NEXT: v_add_co_u32_e32 v52, vcc, v0, v16 -; GCN-NEXT: v_addc_co_u32_e32 v53, vcc, v1, v17, vcc -; GCN-NEXT: v_mov_b32_e32 v17, s5 -; GCN-NEXT: v_mov_b32_e32 v16, s4 -; GCN-NEXT: v_add_co_u32_e32 v56, vcc, v0, v16 -; GCN-NEXT: v_addc_co_u32_e32 v57, vcc, v1, v17, vcc +; GCN-NEXT: global_load_dwordx4 v[8:11], v[3:4], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[12:15], v[3:4], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[56:59], v[3:4], off offset:48 +; GCN-NEXT: s_mov_b32 s5, 0 +; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_mov_b32_e32 v4, s5 +; GCN-NEXT: v_add_co_u32_e32 v3, vcc, v0, v3 +; GCN-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v4, vcc ; GCN-NEXT: global_load_dwordx4 v[16:19], v[0:1], off ; GCN-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 ; GCN-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:32 @@ -392,259 +304,225 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:64 ; GCN-NEXT: global_load_dwordx4 v[36:39], v[0:1], off offset:128 ; GCN-NEXT: global_load_dwordx4 v[40:43], v[0:1], off offset:192 -; GCN-NEXT: global_load_dwordx4 v[44:47], v[52:53], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[48:51], v[52:53], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[52:55], v[52:53], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[12:15], v[56:57], off offset:16 -; GCN-NEXT: v_add_u32_e32 v0, 20, v3 -; GCN-NEXT: v_add_u32_e32 v1, 0xd0, v3 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:644 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:648 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:652 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:656 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:660 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:664 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:668 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:672 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:676 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:680 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:684 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:688 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:692 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:696 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:700 ; 4-byte Folded Spill -; GCN-NEXT: global_load_dwordx4 v[12:15], v[56:57], off offset:32 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill -; GCN-NEXT: global_load_dwordx4 v[56:59], v[56:57], off offset:48 -; GCN-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 24, v3 -; GCN-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 28, v3 -; GCN-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 32, v3 -; GCN-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 36, v3 -; GCN-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 40, v3 -; GCN-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 44, v3 -; GCN-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 48, v3 -; GCN-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 52, v3 -; GCN-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 56, v3 -; GCN-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 60, v3 -; GCN-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 64, v3 -; GCN-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x44, v3 -; GCN-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x48, v3 -; GCN-NEXT: buffer_store_dword v34, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x4c, v3 -; GCN-NEXT: buffer_store_dword v35, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x50, v3 -; GCN-NEXT: buffer_store_dword v20, v60, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x54, v3 -; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x58, v3 -; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x5c, v3 -; GCN-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x60, v3 -; GCN-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x64, v3 -; GCN-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x68, v3 -; GCN-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x6c, v3 -; GCN-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v24, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v25, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v26, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v27, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v28, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v29, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v34, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v35, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v0, 0x70, v3 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v12, v32 -; GCN-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v13, v33 -; GCN-NEXT: v_add_u32_e32 v0, 0x74, v3 -; GCN-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v14, v34 -; GCN-NEXT: v_add_u32_e32 v0, 0x78, v3 -; GCN-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v15, v35 -; GCN-NEXT: v_add_u32_e32 v0, 0x7c, v3 -; GCN-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x80, v3 -; GCN-NEXT: buffer_store_dword v36, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x84, v3 -; GCN-NEXT: buffer_store_dword v37, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x88, v3 -; GCN-NEXT: buffer_store_dword v38, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x8c, v3 -; GCN-NEXT: buffer_store_dword v39, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x90, v3 -; GCN-NEXT: buffer_store_dword v44, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x94, v3 -; GCN-NEXT: buffer_store_dword v45, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x98, v3 -; GCN-NEXT: buffer_store_dword v46, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x9c, v3 -; GCN-NEXT: buffer_store_dword v47, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa0, v3 -; GCN-NEXT: buffer_store_dword v48, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa4, v3 -; GCN-NEXT: buffer_store_dword v49, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa8, v3 -; GCN-NEXT: buffer_store_dword v50, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xac, v3 -; GCN-NEXT: buffer_store_dword v51, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb0, v3 -; GCN-NEXT: buffer_store_dword v52, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb4, v3 -; GCN-NEXT: buffer_store_dword v53, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb8, v3 -; GCN-NEXT: buffer_store_dword v54, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xbc, v3 -; GCN-NEXT: buffer_store_dword v55, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc0, v3 -; GCN-NEXT: buffer_store_dword v40, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc4, v3 -; GCN-NEXT: buffer_store_dword v41, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc8, v3 -; GCN-NEXT: buffer_store_dword v42, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xcc, v3 -; GCN-NEXT: buffer_store_dword v43, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 4, v3 -; GCN-NEXT: buffer_store_dword v17, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 8, v3 -; GCN-NEXT: buffer_store_dword v18, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 12, v3 -; GCN-NEXT: buffer_store_dword v19, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:256 -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:644 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:648 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:652 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:656 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:660 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:664 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:668 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:672 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:676 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:680 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:684 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:688 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:692 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:696 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:700 ; 4-byte Folded Reload +; GCN-NEXT: global_load_dwordx4 v[44:47], v[3:4], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[48:51], v[3:4], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[52:55], v[3:4], off offset:48 +; GCN-NEXT: s_movk_i32 s4, 0xc0 +; GCN-NEXT: v_mov_b32_e32 v6, s5 +; GCN-NEXT: v_mov_b32_e32 v5, s4 +; GCN-NEXT: v_add_co_u32_e32 v60, vcc, v0, v5 +; GCN-NEXT: v_addc_co_u32_e32 v61, vcc, v1, v6, vcc ; GCN-NEXT: v_lshrrev_b32_e32 v0, 1, v2 ; GCN-NEXT: v_and_b32_e32 v0, 63, v0 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GCN-NEXT: v_add_u32_e32 v0, v3, v0 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v4, v8 -; GCN-NEXT: buffer_store_dword v4, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v5, v9 -; GCN-NEXT: v_add_u32_e32 v1, 0xd4, v3 -; GCN-NEXT: buffer_store_dword v5, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v6, v10 -; GCN-NEXT: v_add_u32_e32 v1, 0xd8, v3 -; GCN-NEXT: buffer_store_dword v6, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v7, v11 -; GCN-NEXT: v_add_u32_e32 v1, 0xdc, v3 -; GCN-NEXT: buffer_store_dword v7, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v1, 0xe0, v3 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v8, v12 -; GCN-NEXT: buffer_store_dword v8, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v9, v13 -; GCN-NEXT: v_add_u32_e32 v1, 0xe4, v3 -; GCN-NEXT: buffer_store_dword v9, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v10, v14 -; GCN-NEXT: v_add_u32_e32 v1, 0xe8, v3 -; GCN-NEXT: buffer_store_dword v10, v1, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v11, v15 -; GCN-NEXT: v_add_u32_e32 v1, 0xec, v3 -; GCN-NEXT: buffer_store_dword v11, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xf0, v3 -; GCN-NEXT: buffer_store_dword v56, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xf4, v3 -; GCN-NEXT: buffer_store_dword v57, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xf8, v3 -; GCN-NEXT: buffer_store_dword v58, v1, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, 0xfc, v3 -; GCN-NEXT: buffer_store_dword v59, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload ; GCN-NEXT: v_and_b32_e32 v1, 1, v2 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 4, v1 +; GCN-NEXT: s_add_u32 s32, s32, 0x10000 +; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:644 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:648 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:652 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:656 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:660 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:664 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:668 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:672 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:676 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:680 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:684 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:688 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:692 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:696 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:700 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[4:7], v[60:61], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[52:55], v[60:61], off offset:32 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[52:55], v[60:61], off offset:48 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:256 +; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:260 +; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:264 +; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:268 +; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:272 +; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:276 +; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:280 +; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:284 +; GCN-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:288 +; GCN-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:292 +; GCN-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:296 +; GCN-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:300 +; GCN-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:304 +; GCN-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:308 +; GCN-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:312 +; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:316 +; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:320 +; GCN-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:324 +; GCN-NEXT: buffer_store_dword v34, off, s[0:3], s33 offset:328 +; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:332 +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:384 +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:388 +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:392 +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:396 +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:336 +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:340 +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:344 +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:348 +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:352 +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:356 +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:360 +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:364 +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:368 +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:372 +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:376 +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:380 +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:400 +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:404 +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:408 +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:412 +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:416 +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:420 +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:424 +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:428 +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:644 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:648 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:652 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:656 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:660 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:664 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:668 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:672 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:676 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:680 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:684 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:688 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:692 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:696 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:700 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v12, v20 +; GCN-NEXT: v_mov_b32_e32 v13, v21 +; GCN-NEXT: v_mov_b32_e32 v14, v22 +; GCN-NEXT: v_mov_b32_e32 v15, v23 +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:432 +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:436 +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:440 +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:444 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:448 +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:452 +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:456 +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:460 +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:464 +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:468 +; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:472 +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:476 +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v8, v11 +; GCN-NEXT: v_mov_b32_e32 v9, v12 +; GCN-NEXT: v_mov_b32_e32 v10, v13 +; GCN-NEXT: v_mov_b32_e32 v11, v14 +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:480 +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:484 +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:488 +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:492 +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v12, v15 +; GCN-NEXT: v_mov_b32_e32 v13, v16 +; GCN-NEXT: v_mov_b32_e32 v14, v17 +; GCN-NEXT: v_mov_b32_e32 v15, v18 +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:496 +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:500 +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:504 +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:508 +; GCN-NEXT: v_lshrrev_b32_e64 v15, 6, s33 +; GCN-NEXT: v_add_u32_e32 v15, 0x100, v15 +; GCN-NEXT: v_add_u32_e32 v0, v15, v0 +; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b32 s33, s6 -; GCN-NEXT: s_waitcnt vmcnt(13) +; GCN-NEXT: s_waitcnt vmcnt(14) ; GCN-NEXT: v_lshrrev_b32_e32 v0, v1, v0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -661,61 +539,31 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: s_add_u32 s33, s32, 0x3fc0 ; GCN-NEXT: s_and_b32 s33, s33, 0xffffc000 ; GCN-NEXT: v_add_co_u32_e32 v3, vcc, 64, v0 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 ; 4-byte Folded Spill ; GCN-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v1, vcc -; GCN-NEXT: global_load_dwordx4 v[7:10], v[3:4], off offset:16 -; GCN-NEXT: global_load_dwordx4 v[11:14], v[3:4], off offset:32 -; GCN-NEXT: global_load_dwordx4 v[56:59], v[3:4], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[15:18], v[0:1], off ; GCN-NEXT: s_movk_i32 s4, 0x80 +; GCN-NEXT: global_load_dwordx4 v[8:11], v[3:4], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[12:15], v[3:4], off offset:32 +; GCN-NEXT: global_load_dwordx4 v[56:59], v[3:4], off offset:48 ; GCN-NEXT: s_mov_b32 s5, 0 ; GCN-NEXT: v_mov_b32_e32 v3, s4 ; GCN-NEXT: v_mov_b32_e32 v4, s5 ; GCN-NEXT: v_add_co_u32_e32 v3, vcc, v0, v3 -; GCN-NEXT: s_movk_i32 s4, 0xc0 -; GCN-NEXT: v_mov_b32_e32 v6, s5 ; GCN-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v4, vcc -; GCN-NEXT: v_mov_b32_e32 v5, s4 -; GCN-NEXT: v_add_co_u32_e32 v60, vcc, v0, v5 -; GCN-NEXT: v_addc_co_u32_e32 v61, vcc, v1, v6, vcc -; GCN-NEXT: v_lshrrev_b32_e64 v62, 6, s33 -; GCN-NEXT: v_add_u32_e32 v62, 0x100, v62 -; GCN-NEXT: v_add_u32_e32 v2, 16, v62 -; GCN-NEXT: s_add_u32 s32, s32, 0x14000 -; GCN-NEXT: s_sub_u32 s32, s32, 0x14000 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:644 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:648 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:652 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:656 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:660 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:664 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:668 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:672 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:676 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:680 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:684 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:688 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:692 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:696 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:700 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:704 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[16:19], v[0:1], off ; GCN-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:16 ; GCN-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:32 ; GCN-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:48 @@ -725,292 +573,218 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: global_load_dwordx4 v[44:47], v[3:4], off offset:16 ; GCN-NEXT: global_load_dwordx4 v[48:51], v[3:4], off offset:32 ; GCN-NEXT: global_load_dwordx4 v[52:55], v[3:4], off offset:48 -; GCN-NEXT: global_load_dwordx4 v[15:18], v[60:61], off offset:16 -; GCN-NEXT: v_add_u32_e32 v0, 20, v62 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:708 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:712 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:716 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:720 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:724 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:728 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:732 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:736 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:740 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:744 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:748 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:752 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:756 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:760 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:764 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:768 ; 4-byte Folded Spill -; GCN-NEXT: global_load_dwordx4 v[15:18], v[60:61], off offset:32 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill -; GCN-NEXT: global_load_dwordx4 v[15:18], v[60:61], off offset:48 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v20, v2, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v21, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 24, v62 -; GCN-NEXT: buffer_store_dword v22, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 28, v62 -; GCN-NEXT: buffer_store_dword v23, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 32, v62 -; GCN-NEXT: buffer_store_dword v24, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 36, v62 -; GCN-NEXT: buffer_store_dword v25, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 40, v62 -; GCN-NEXT: buffer_store_dword v26, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 44, v62 -; GCN-NEXT: buffer_store_dword v27, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 48, v62 -; GCN-NEXT: buffer_store_dword v28, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 52, v62 -; GCN-NEXT: buffer_store_dword v29, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 56, v62 -; GCN-NEXT: buffer_store_dword v30, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 60, v62 -; GCN-NEXT: buffer_store_dword v31, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 64, v62 -; GCN-NEXT: buffer_store_dword v32, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x44, v62 -; GCN-NEXT: buffer_store_dword v33, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x48, v62 -; GCN-NEXT: buffer_store_dword v34, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x4c, v62 -; GCN-NEXT: buffer_store_dword v35, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x50, v62 -; GCN-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x54, v62 -; GCN-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x58, v62 -; GCN-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x5c, v62 -; GCN-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x60, v62 -; GCN-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x64, v62 -; GCN-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x68, v62 -; GCN-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x6c, v62 -; GCN-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x70, v62 -; GCN-NEXT: buffer_store_dword v56, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x74, v62 -; GCN-NEXT: buffer_store_dword v57, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x78, v62 -; GCN-NEXT: buffer_store_dword v58, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x7c, v62 -; GCN-NEXT: buffer_store_dword v59, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x80, v62 -; GCN-NEXT: buffer_store_dword v36, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x84, v62 -; GCN-NEXT: buffer_store_dword v37, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x88, v62 -; GCN-NEXT: buffer_store_dword v38, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x8c, v62 -; GCN-NEXT: buffer_store_dword v39, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x90, v62 -; GCN-NEXT: buffer_store_dword v44, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x94, v62 -; GCN-NEXT: buffer_store_dword v45, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x98, v62 -; GCN-NEXT: buffer_store_dword v46, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0x9c, v62 -; GCN-NEXT: buffer_store_dword v47, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa0, v62 -; GCN-NEXT: buffer_store_dword v48, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa4, v62 -; GCN-NEXT: buffer_store_dword v49, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xa8, v62 -; GCN-NEXT: buffer_store_dword v50, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xac, v62 -; GCN-NEXT: buffer_store_dword v51, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb0, v62 -; GCN-NEXT: buffer_store_dword v52, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb4, v62 -; GCN-NEXT: buffer_store_dword v53, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xb8, v62 -; GCN-NEXT: buffer_store_dword v54, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xbc, v62 -; GCN-NEXT: buffer_store_dword v55, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc0, v62 -; GCN-NEXT: buffer_store_dword v40, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc4, v62 -; GCN-NEXT: buffer_store_dword v41, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xc8, v62 -; GCN-NEXT: buffer_store_dword v42, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xcc, v62 -; GCN-NEXT: buffer_store_dword v43, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:644 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:648 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:652 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:656 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:660 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:664 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:668 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:672 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:676 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:680 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:684 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:688 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:692 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:696 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:700 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:704 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v0, 4, v62 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 8, v62 -; GCN-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 12, v62 -; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:256 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:708 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:712 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:716 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:720 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:724 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:728 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:732 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:736 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:740 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:744 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:748 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:752 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:756 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:760 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:764 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:768 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v0, 0xd0, v62 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v4, v5 -; GCN-NEXT: v_mov_b32_e32 v5, v6 -; GCN-NEXT: buffer_store_dword v4, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xd4, v62 -; GCN-NEXT: v_mov_b32_e32 v6, v7 -; GCN-NEXT: buffer_store_dword v5, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xd8, v62 -; GCN-NEXT: buffer_store_dword v6, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v7, v8 -; GCN-NEXT: v_add_u32_e32 v0, 0xdc, v62 -; GCN-NEXT: buffer_store_dword v7, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v0, 0xe0, v62 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v8, v9 -; GCN-NEXT: v_mov_b32_e32 v9, v10 -; GCN-NEXT: buffer_store_dword v8, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xe4, v62 -; GCN-NEXT: v_mov_b32_e32 v10, v11 -; GCN-NEXT: buffer_store_dword v9, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xe8, v62 -; GCN-NEXT: buffer_store_dword v10, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v11, v12 -; GCN-NEXT: v_add_u32_e32 v0, 0xec, v62 -; GCN-NEXT: buffer_store_dword v11, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload -; GCN-NEXT: v_add_u32_e32 v0, 0xf0, v62 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v12, v13 -; GCN-NEXT: v_mov_b32_e32 v13, v14 -; GCN-NEXT: buffer_store_dword v12, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xf4, v62 -; GCN-NEXT: v_mov_b32_e32 v14, v15 -; GCN-NEXT: buffer_store_dword v13, v0, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v0, 0xf8, v62 -; GCN-NEXT: buffer_store_dword v14, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v15, v16 -; GCN-NEXT: v_add_u32_e32 v0, 0xfc, v62 -; GCN-NEXT: buffer_store_dword v15, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_and_b32_e32 v0, 31, v0 +; GCN-NEXT: s_movk_i32 s4, 0xc0 +; GCN-NEXT: v_mov_b32_e32 v6, s5 +; GCN-NEXT: v_mov_b32_e32 v5, s4 +; GCN-NEXT: v_add_co_u32_e32 v60, vcc, v0, v5 +; GCN-NEXT: v_addc_co_u32_e32 v61, vcc, v1, v6, vcc +; GCN-NEXT: v_and_b32_e32 v0, 31, v2 +; GCN-NEXT: v_lshrrev_b32_e64 v2, 6, s33 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GCN-NEXT: v_add_u32_e32 v0, v62, v0 -; GCN-NEXT: v_add_u32_e32 v1, 4, v0 -; GCN-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GCN-NEXT: v_add_u32_e32 v2, 0x100, v2 +; GCN-NEXT: v_add_u32_e32 v1, v2, v0 +; GCN-NEXT: s_add_u32 s32, s32, 0x10000 +; GCN-NEXT: s_sub_u32 s32, s32, 0x10000 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:640 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:644 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:648 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:652 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:656 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:660 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:664 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:668 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:672 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:676 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:680 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:684 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:688 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:692 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:696 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:700 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[4:7], v[60:61], off offset:16 +; GCN-NEXT: global_load_dwordx4 v[52:55], v[60:61], off offset:32 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill +; GCN-NEXT: global_load_dwordx4 v[52:55], v[60:61], off offset:48 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill +; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:256 +; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:260 +; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:264 +; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:268 +; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:272 +; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:276 +; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:280 +; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:284 +; GCN-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:288 +; GCN-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:292 +; GCN-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:296 +; GCN-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:300 +; GCN-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:304 +; GCN-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:308 +; GCN-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:312 +; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:316 +; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:320 +; GCN-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:324 +; GCN-NEXT: buffer_store_dword v34, off, s[0:3], s33 offset:328 +; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:332 +; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:384 +; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:388 +; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:392 +; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:396 +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:336 +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:340 +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:344 +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:348 +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:352 +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:356 +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:360 +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:364 +; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:368 +; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:372 +; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:376 +; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:380 +; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:400 +; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:404 +; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:408 +; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:412 +; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:416 +; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:420 +; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:424 +; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:428 +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:640 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:644 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:648 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:652 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:656 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:660 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:664 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:668 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:672 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:676 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:680 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], s33 offset:684 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], s33 offset:688 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], s33 offset:692 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], s33 offset:696 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v23, off, s[0:3], s33 offset:700 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v12, v20 +; GCN-NEXT: v_mov_b32_e32 v13, v21 +; GCN-NEXT: v_mov_b32_e32 v14, v22 +; GCN-NEXT: v_mov_b32_e32 v15, v23 +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:432 +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:436 +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:440 +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:444 +; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:448 +; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:452 +; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:456 +; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:460 +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:464 +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:468 +; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:472 +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:476 +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:588 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:592 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:596 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:600 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:604 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:608 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:612 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:616 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:620 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:624 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:628 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:632 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:636 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v8, v11 +; GCN-NEXT: v_mov_b32_e32 v9, v12 +; GCN-NEXT: v_mov_b32_e32 v10, v13 +; GCN-NEXT: v_mov_b32_e32 v11, v14 +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:480 +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:484 +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:488 +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:492 +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], s33 offset:512 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s33 offset:516 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s33 offset:520 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s33 offset:524 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s33 offset:528 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s33 offset:532 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s33 offset:536 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], s33 offset:540 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], s33 offset:544 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], s33 offset:548 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], s33 offset:552 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], s33 offset:556 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], s33 offset:560 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:564 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v12, v15 +; GCN-NEXT: v_mov_b32_e32 v13, v16 +; GCN-NEXT: v_mov_b32_e32 v14, v17 +; GCN-NEXT: v_mov_b32_e32 v15, v18 +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:496 +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:500 +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:504 +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:508 +; GCN-NEXT: buffer_load_dword v0, v1, s[0:3], 0 offen +; GCN-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen offset:4 +; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s33 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b32 s33, s6 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll index b28af50..40b38c6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll @@ -10,15 +10,15 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out. ; GCN-NEXT: s_add_u32 s0, s0, s7 ; GCN-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 ; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10 -; GCN-NEXT: v_mov_b32_e32 v16, 0x100 ; GCN-NEXT: s_addc_u32 s1, s1, 0 -; GCN-NEXT: v_add_u32_e32 v31, 64, v16 +; GCN-NEXT: v_mov_b32_e32 v16, 0x100 +; GCN-NEXT: v_mov_b32_e32 v64, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_load_dwordx16 s[36:51], s[10:11], 0x0 ; GCN-NEXT: s_load_dwordx16 s[52:67], s[10:11], 0x40 ; GCN-NEXT: s_load_dwordx16 s[12:27], s[10:11], 0x80 -; GCN-NEXT: v_add_u32_e32 v32, 0x44, v16 -; GCN-NEXT: v_add_u32_e32 v33, 0x48, v16 +; GCN-NEXT: s_and_b32 s4, s7, 63 +; GCN-NEXT: s_lshl_b32 s4, s4, 2 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v0, s36 ; GCN-NEXT: v_mov_b32_e32 v1, s37 @@ -38,328 +38,217 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out. ; GCN-NEXT: v_mov_b32_e32 v15, s51 ; GCN-NEXT: s_load_dwordx16 s[36:51], s[10:11], 0xc0 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:256 -; GCN-NEXT: v_add_u32_e32 v0, 4, v16 -; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v1, s52 -; GCN-NEXT: buffer_store_dword v1, v31, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v1, s53 -; GCN-NEXT: buffer_store_dword v1, v32, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v1, s54 -; GCN-NEXT: buffer_store_dword v1, v33, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v34, 0x4c, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s55 -; GCN-NEXT: buffer_store_dword v1, v34, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v35, 0x50, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s56 -; GCN-NEXT: buffer_store_dword v1, v35, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v36, 0x54, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s57 -; GCN-NEXT: buffer_store_dword v1, v36, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v37, 0x58, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s58 -; GCN-NEXT: buffer_store_dword v1, v37, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v38, 0x5c, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s59 -; GCN-NEXT: buffer_store_dword v1, v38, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v39, 0x60, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s60 -; GCN-NEXT: buffer_store_dword v1, v39, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v40, 0x64, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s61 -; GCN-NEXT: buffer_store_dword v1, v40, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v41, 0x68, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s62 -; GCN-NEXT: buffer_store_dword v1, v41, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v42, 0x6c, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s63 -; GCN-NEXT: buffer_store_dword v1, v42, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v43, 0x70, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s64 -; GCN-NEXT: buffer_store_dword v1, v43, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v44, 0x74, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s65 -; GCN-NEXT: buffer_store_dword v1, v44, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v45, 0x78, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s66 -; GCN-NEXT: buffer_store_dword v1, v45, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v46, 0x7c, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s67 -; GCN-NEXT: buffer_store_dword v1, v46, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v47, 0x80, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s12 -; GCN-NEXT: buffer_store_dword v1, v47, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v48, 0x84, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s13 -; GCN-NEXT: buffer_store_dword v1, v48, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v49, 0x88, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s14 -; GCN-NEXT: buffer_store_dword v1, v49, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v50, 0x8c, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s15 -; GCN-NEXT: buffer_store_dword v1, v50, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v51, 0x90, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s16 -; GCN-NEXT: buffer_store_dword v1, v51, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v52, 0x94, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s17 -; GCN-NEXT: buffer_store_dword v1, v52, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v53, 0x98, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s18 -; GCN-NEXT: buffer_store_dword v1, v53, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v54, 0x9c, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s19 -; GCN-NEXT: buffer_store_dword v1, v54, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v55, 0xa0, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s20 -; GCN-NEXT: buffer_store_dword v1, v55, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v56, 0xa4, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s21 -; GCN-NEXT: buffer_store_dword v1, v56, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v57, 0xa8, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s22 -; GCN-NEXT: buffer_store_dword v1, v57, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v58, 0xac, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s23 -; GCN-NEXT: buffer_store_dword v1, v58, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v59, 0xb0, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s24 -; GCN-NEXT: buffer_store_dword v1, v59, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v60, 0xb4, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s25 -; GCN-NEXT: buffer_store_dword v1, v60, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v61, 0xb8, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s26 -; GCN-NEXT: buffer_store_dword v1, v61, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v62, 0xbc, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s27 -; GCN-NEXT: buffer_store_dword v1, v62, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v63, 0xc0, v16 +; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:260 +; GCN-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:264 +; GCN-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:268 +; GCN-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:272 +; GCN-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:276 +; GCN-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:280 +; GCN-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:284 +; GCN-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:288 +; GCN-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:292 +; GCN-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:296 +; GCN-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:300 +; GCN-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:304 +; GCN-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:308 +; GCN-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:312 +; GCN-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:316 +; GCN-NEXT: v_mov_b32_e32 v0, s52 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:320 +; GCN-NEXT: v_mov_b32_e32 v0, s53 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:324 +; GCN-NEXT: v_mov_b32_e32 v0, s54 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:328 +; GCN-NEXT: v_mov_b32_e32 v0, s55 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:332 +; GCN-NEXT: v_mov_b32_e32 v0, s56 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:336 +; GCN-NEXT: v_mov_b32_e32 v0, s57 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:340 +; GCN-NEXT: v_mov_b32_e32 v0, s58 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:344 +; GCN-NEXT: v_mov_b32_e32 v0, s59 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:348 +; GCN-NEXT: v_mov_b32_e32 v0, s60 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:352 +; GCN-NEXT: v_mov_b32_e32 v0, s61 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:356 +; GCN-NEXT: v_mov_b32_e32 v0, s62 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:360 +; GCN-NEXT: v_mov_b32_e32 v0, s63 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:364 +; GCN-NEXT: v_mov_b32_e32 v0, s64 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:368 +; GCN-NEXT: v_mov_b32_e32 v0, s65 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:372 +; GCN-NEXT: v_mov_b32_e32 v0, s66 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:376 +; GCN-NEXT: v_mov_b32_e32 v0, s67 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:380 +; GCN-NEXT: v_mov_b32_e32 v0, s12 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:384 +; GCN-NEXT: v_mov_b32_e32 v0, s13 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:388 +; GCN-NEXT: v_mov_b32_e32 v0, s14 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:392 +; GCN-NEXT: v_mov_b32_e32 v0, s15 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:396 +; GCN-NEXT: v_mov_b32_e32 v0, s16 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:400 +; GCN-NEXT: v_mov_b32_e32 v0, s17 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:404 +; GCN-NEXT: v_mov_b32_e32 v0, s18 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:408 +; GCN-NEXT: v_mov_b32_e32 v0, s19 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:412 +; GCN-NEXT: v_mov_b32_e32 v0, s20 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:416 +; GCN-NEXT: v_mov_b32_e32 v0, s21 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:420 +; GCN-NEXT: v_mov_b32_e32 v0, s22 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:424 +; GCN-NEXT: v_mov_b32_e32 v0, s23 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:428 +; GCN-NEXT: v_mov_b32_e32 v0, s24 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:432 +; GCN-NEXT: v_mov_b32_e32 v0, s25 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:436 +; GCN-NEXT: v_mov_b32_e32 v0, s26 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:440 +; GCN-NEXT: v_mov_b32_e32 v0, s27 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:444 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v1, s36 -; GCN-NEXT: buffer_store_dword v1, v63, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v64, 0xc4, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s37 -; GCN-NEXT: buffer_store_dword v1, v64, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v65, 0xc8, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s38 -; GCN-NEXT: buffer_store_dword v1, v65, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v66, 0xcc, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s39 -; GCN-NEXT: buffer_store_dword v1, v66, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v67, 0xd0, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s40 -; GCN-NEXT: buffer_store_dword v1, v67, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v68, 0xd4, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s41 -; GCN-NEXT: buffer_store_dword v1, v68, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v69, 0xd8, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s42 -; GCN-NEXT: buffer_store_dword v1, v69, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v70, 0xdc, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s43 -; GCN-NEXT: buffer_store_dword v1, v70, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v71, 0xe0, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s44 -; GCN-NEXT: buffer_store_dword v1, v71, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v72, 0xe4, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s45 -; GCN-NEXT: buffer_store_dword v1, v72, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v73, 0xe8, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s46 -; GCN-NEXT: buffer_store_dword v1, v73, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v74, 0xec, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s47 -; GCN-NEXT: buffer_store_dword v1, v74, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v75, 0xf0, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s48 -; GCN-NEXT: buffer_store_dword v1, v75, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v76, 0xf4, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s49 -; GCN-NEXT: s_and_b32 s4, s7, 63 -; GCN-NEXT: buffer_store_dword v1, v76, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v77, 0xf8, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s50 -; GCN-NEXT: v_add_u32_e32 v17, 8, v16 -; GCN-NEXT: buffer_store_dword v1, v77, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v78, 0xfc, v16 -; GCN-NEXT: v_mov_b32_e32 v1, s51 -; GCN-NEXT: s_lshl_b32 s4, s4, 2 -; GCN-NEXT: buffer_store_dword v2, v17, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v18, 12, v16 -; GCN-NEXT: v_add_u32_e32 v19, 16, v16 -; GCN-NEXT: v_add_u32_e32 v20, 20, v16 -; GCN-NEXT: v_add_u32_e32 v21, 24, v16 -; GCN-NEXT: v_add_u32_e32 v22, 28, v16 -; GCN-NEXT: v_add_u32_e32 v23, 32, v16 -; GCN-NEXT: v_add_u32_e32 v24, 36, v16 -; GCN-NEXT: v_add_u32_e32 v25, 40, v16 -; GCN-NEXT: v_add_u32_e32 v26, 44, v16 -; GCN-NEXT: v_add_u32_e32 v27, 48, v16 -; GCN-NEXT: v_add_u32_e32 v28, 52, v16 -; GCN-NEXT: v_add_u32_e32 v29, 56, v16 -; GCN-NEXT: v_add_u32_e32 v30, 60, v16 -; GCN-NEXT: buffer_store_dword v1, v78, s[0:3], 0 offen -; GCN-NEXT: v_add_u32_e32 v1, s4, v16 -; GCN-NEXT: v_mov_b32_e32 v2, s6 -; GCN-NEXT: buffer_store_dword v3, v18, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v4, v19, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v5, v20, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v6, v21, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v7, v22, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v8, v23, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v9, v24, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v10, v25, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v11, v26, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v12, v27, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v13, v28, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v14, v29, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v15, v30, s[0:3], 0 offen -; GCN-NEXT: buffer_store_dword v2, v1, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen -; GCN-NEXT: s_nop 0 -; GCN-NEXT: buffer_load_dword v2, v17, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v3, v18, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v4, v19, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v5, v20, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v6, v21, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v7, v22, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v8, v23, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v9, v24, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v10, v25, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v11, v26, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v12, v27, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v13, v28, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v14, v29, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v15, v30, s[0:3], 0 offen -; GCN-NEXT: ; kill: killed $vgpr30 -; GCN-NEXT: ; kill: killed $vgpr19 -; GCN-NEXT: ; kill: killed $vgpr23 -; GCN-NEXT: ; kill: killed $vgpr27 -; GCN-NEXT: ; kill: killed $vgpr20 -; GCN-NEXT: ; kill: killed $vgpr24 -; GCN-NEXT: ; kill: killed $vgpr28 -; GCN-NEXT: ; kill: killed $vgpr17 -; GCN-NEXT: ; kill: killed $vgpr21 -; GCN-NEXT: ; kill: killed $vgpr25 -; GCN-NEXT: ; kill: killed $vgpr0 -; GCN-NEXT: ; kill: killed $vgpr29 -; GCN-NEXT: ; kill: killed $vgpr18 -; GCN-NEXT: ; kill: killed $vgpr22 -; GCN-NEXT: ; kill: killed $vgpr26 -; GCN-NEXT: buffer_load_dword v16, v31, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v17, v32, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v18, v33, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v19, v34, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v20, v35, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v21, v36, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v22, v37, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v23, v38, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v24, v39, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v25, v40, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v26, v41, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v27, v42, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v28, v43, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v29, v44, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v30, v45, s[0:3], 0 offen -; GCN-NEXT: ; kill: killed $vgpr45 -; GCN-NEXT: ; kill: killed $vgpr34 -; GCN-NEXT: ; kill: killed $vgpr38 -; GCN-NEXT: ; kill: killed $vgpr42 -; GCN-NEXT: ; kill: killed $vgpr31 -; GCN-NEXT: ; kill: killed $vgpr35 -; GCN-NEXT: ; kill: killed $vgpr39 -; GCN-NEXT: ; kill: killed $vgpr43 -; GCN-NEXT: ; kill: killed $vgpr32 -; GCN-NEXT: ; kill: killed $vgpr36 -; GCN-NEXT: ; kill: killed $vgpr40 -; GCN-NEXT: ; kill: killed $vgpr44 -; GCN-NEXT: ; kill: killed $vgpr33 -; GCN-NEXT: ; kill: killed $vgpr37 -; GCN-NEXT: ; kill: killed $vgpr41 -; GCN-NEXT: buffer_load_dword v31, v46, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v32, v47, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v33, v48, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v34, v49, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v35, v50, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v36, v51, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v37, v52, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v38, v53, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v39, v54, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v40, v55, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v41, v56, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v42, v57, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v43, v58, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v44, v59, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v45, v60, s[0:3], 0 offen -; GCN-NEXT: ; kill: killed $vgpr60 -; GCN-NEXT: ; kill: killed $vgpr49 -; GCN-NEXT: ; kill: killed $vgpr53 -; GCN-NEXT: ; kill: killed $vgpr57 -; GCN-NEXT: ; kill: killed $vgpr46 -; GCN-NEXT: ; kill: killed $vgpr50 -; GCN-NEXT: ; kill: killed $vgpr54 -; GCN-NEXT: ; kill: killed $vgpr58 -; GCN-NEXT: ; kill: killed $vgpr47 -; GCN-NEXT: ; kill: killed $vgpr51 -; GCN-NEXT: ; kill: killed $vgpr55 -; GCN-NEXT: ; kill: killed $vgpr59 -; GCN-NEXT: ; kill: killed $vgpr48 -; GCN-NEXT: ; kill: killed $vgpr52 -; GCN-NEXT: ; kill: killed $vgpr56 -; GCN-NEXT: buffer_load_dword v46, v61, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v47, v62, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v48, v63, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v49, v64, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v50, v65, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v51, v66, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v52, v67, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v53, v68, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v54, v69, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v55, v70, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v56, v71, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v57, v72, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v58, v73, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v59, v74, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v60, v75, s[0:3], 0 offen -; GCN-NEXT: ; kill: killed $vgpr64 -; GCN-NEXT: ; kill: killed $vgpr68 -; GCN-NEXT: ; kill: killed $vgpr72 -; GCN-NEXT: ; kill: killed $vgpr61 -; GCN-NEXT: ; kill: killed $vgpr65 -; GCN-NEXT: ; kill: killed $vgpr69 -; GCN-NEXT: ; kill: killed $vgpr73 -; GCN-NEXT: ; kill: killed $vgpr62 -; GCN-NEXT: ; kill: killed $vgpr66 -; GCN-NEXT: ; kill: killed $vgpr70 -; GCN-NEXT: ; kill: killed $vgpr74 -; GCN-NEXT: ; kill: killed $vgpr63 -; GCN-NEXT: ; kill: killed $vgpr67 -; GCN-NEXT: ; kill: killed $vgpr71 -; GCN-NEXT: ; kill: killed $vgpr75 -; GCN-NEXT: buffer_load_dword v61, v76, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v62, v77, s[0:3], 0 offen -; GCN-NEXT: buffer_load_dword v63, v78, s[0:3], 0 offen -; GCN-NEXT: ; kill: killed $vgpr76 -; GCN-NEXT: ; kill: killed $vgpr77 -; GCN-NEXT: ; kill: killed $vgpr78 +; GCN-NEXT: v_mov_b32_e32 v0, s36 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:448 +; GCN-NEXT: v_mov_b32_e32 v0, s37 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:452 +; GCN-NEXT: v_mov_b32_e32 v0, s38 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:456 +; GCN-NEXT: v_mov_b32_e32 v0, s39 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:460 +; GCN-NEXT: v_mov_b32_e32 v0, s40 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:464 +; GCN-NEXT: v_mov_b32_e32 v0, s41 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:468 +; GCN-NEXT: v_mov_b32_e32 v0, s42 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:472 +; GCN-NEXT: v_mov_b32_e32 v0, s43 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:476 +; GCN-NEXT: v_mov_b32_e32 v0, s44 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:480 +; GCN-NEXT: v_mov_b32_e32 v0, s45 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:484 +; GCN-NEXT: v_mov_b32_e32 v0, s46 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:488 +; GCN-NEXT: v_mov_b32_e32 v0, s47 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:492 +; GCN-NEXT: v_mov_b32_e32 v0, s48 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:496 +; GCN-NEXT: v_mov_b32_e32 v0, s49 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:500 +; GCN-NEXT: v_mov_b32_e32 v0, s50 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:504 +; GCN-NEXT: v_mov_b32_e32 v0, s51 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:508 +; GCN-NEXT: v_add_u32_e32 v0, s4, v16 +; GCN-NEXT: v_mov_b32_e32 v1, s6 +; GCN-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen ; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:256 -; GCN-NEXT: v_mov_b32_e32 v64, 0 -; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: s_nop 0 +; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:260 +; GCN-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:264 +; GCN-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:268 +; GCN-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:272 +; GCN-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:276 +; GCN-NEXT: buffer_load_dword v6, off, s[0:3], 0 offset:280 +; GCN-NEXT: buffer_load_dword v7, off, s[0:3], 0 offset:284 +; GCN-NEXT: buffer_load_dword v8, off, s[0:3], 0 offset:288 +; GCN-NEXT: buffer_load_dword v9, off, s[0:3], 0 offset:292 +; GCN-NEXT: buffer_load_dword v10, off, s[0:3], 0 offset:296 +; GCN-NEXT: buffer_load_dword v11, off, s[0:3], 0 offset:300 +; GCN-NEXT: buffer_load_dword v12, off, s[0:3], 0 offset:304 +; GCN-NEXT: buffer_load_dword v13, off, s[0:3], 0 offset:308 +; GCN-NEXT: buffer_load_dword v14, off, s[0:3], 0 offset:312 +; GCN-NEXT: buffer_load_dword v15, off, s[0:3], 0 offset:316 +; GCN-NEXT: buffer_load_dword v16, off, s[0:3], 0 offset:320 +; GCN-NEXT: buffer_load_dword v17, off, s[0:3], 0 offset:324 +; GCN-NEXT: buffer_load_dword v18, off, s[0:3], 0 offset:328 +; GCN-NEXT: buffer_load_dword v19, off, s[0:3], 0 offset:332 +; GCN-NEXT: buffer_load_dword v20, off, s[0:3], 0 offset:336 +; GCN-NEXT: buffer_load_dword v21, off, s[0:3], 0 offset:340 +; GCN-NEXT: buffer_load_dword v22, off, s[0:3], 0 offset:344 +; GCN-NEXT: buffer_load_dword v23, off, s[0:3], 0 offset:348 +; GCN-NEXT: buffer_load_dword v24, off, s[0:3], 0 offset:352 +; GCN-NEXT: buffer_load_dword v25, off, s[0:3], 0 offset:356 +; GCN-NEXT: buffer_load_dword v26, off, s[0:3], 0 offset:360 +; GCN-NEXT: buffer_load_dword v27, off, s[0:3], 0 offset:364 +; GCN-NEXT: buffer_load_dword v28, off, s[0:3], 0 offset:368 +; GCN-NEXT: buffer_load_dword v29, off, s[0:3], 0 offset:372 +; GCN-NEXT: buffer_load_dword v30, off, s[0:3], 0 offset:376 +; GCN-NEXT: buffer_load_dword v31, off, s[0:3], 0 offset:380 +; GCN-NEXT: buffer_load_dword v32, off, s[0:3], 0 offset:384 +; GCN-NEXT: buffer_load_dword v33, off, s[0:3], 0 offset:388 +; GCN-NEXT: buffer_load_dword v34, off, s[0:3], 0 offset:392 +; GCN-NEXT: buffer_load_dword v35, off, s[0:3], 0 offset:396 +; GCN-NEXT: buffer_load_dword v36, off, s[0:3], 0 offset:400 +; GCN-NEXT: buffer_load_dword v37, off, s[0:3], 0 offset:404 +; GCN-NEXT: buffer_load_dword v38, off, s[0:3], 0 offset:408 +; GCN-NEXT: buffer_load_dword v39, off, s[0:3], 0 offset:412 +; GCN-NEXT: buffer_load_dword v40, off, s[0:3], 0 offset:416 +; GCN-NEXT: buffer_load_dword v41, off, s[0:3], 0 offset:420 +; GCN-NEXT: buffer_load_dword v42, off, s[0:3], 0 offset:424 +; GCN-NEXT: buffer_load_dword v43, off, s[0:3], 0 offset:428 +; GCN-NEXT: buffer_load_dword v44, off, s[0:3], 0 offset:432 +; GCN-NEXT: buffer_load_dword v45, off, s[0:3], 0 offset:436 +; GCN-NEXT: buffer_load_dword v46, off, s[0:3], 0 offset:440 +; GCN-NEXT: buffer_load_dword v47, off, s[0:3], 0 offset:444 +; GCN-NEXT: buffer_load_dword v48, off, s[0:3], 0 offset:448 +; GCN-NEXT: buffer_load_dword v49, off, s[0:3], 0 offset:452 +; GCN-NEXT: buffer_load_dword v50, off, s[0:3], 0 offset:456 +; GCN-NEXT: buffer_load_dword v51, off, s[0:3], 0 offset:460 +; GCN-NEXT: buffer_load_dword v52, off, s[0:3], 0 offset:464 +; GCN-NEXT: buffer_load_dword v53, off, s[0:3], 0 offset:468 +; GCN-NEXT: buffer_load_dword v54, off, s[0:3], 0 offset:472 +; GCN-NEXT: buffer_load_dword v55, off, s[0:3], 0 offset:476 +; GCN-NEXT: buffer_load_dword v56, off, s[0:3], 0 offset:480 +; GCN-NEXT: buffer_load_dword v57, off, s[0:3], 0 offset:484 +; GCN-NEXT: buffer_load_dword v58, off, s[0:3], 0 offset:488 +; GCN-NEXT: buffer_load_dword v59, off, s[0:3], 0 offset:492 +; GCN-NEXT: buffer_load_dword v60, off, s[0:3], 0 offset:496 +; GCN-NEXT: buffer_load_dword v61, off, s[0:3], 0 offset:500 +; GCN-NEXT: buffer_load_dword v62, off, s[0:3], 0 offset:504 +; GCN-NEXT: buffer_load_dword v63, off, s[0:3], 0 offset:508 +; GCN-NEXT: s_waitcnt vmcnt(60) ; GCN-NEXT: global_store_dwordx4 v64, v[0:3], s[8:9] +; GCN-NEXT: s_waitcnt vmcnt(57) ; GCN-NEXT: global_store_dwordx4 v64, v[4:7], s[8:9] offset:16 +; GCN-NEXT: s_waitcnt vmcnt(54) ; GCN-NEXT: global_store_dwordx4 v64, v[8:11], s[8:9] offset:32 +; GCN-NEXT: s_waitcnt vmcnt(51) ; GCN-NEXT: global_store_dwordx4 v64, v[12:15], s[8:9] offset:48 +; GCN-NEXT: s_waitcnt vmcnt(48) ; GCN-NEXT: global_store_dwordx4 v64, v[16:19], s[8:9] offset:64 +; GCN-NEXT: s_waitcnt vmcnt(45) ; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[8:9] offset:80 +; GCN-NEXT: s_waitcnt vmcnt(42) ; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[8:9] offset:96 +; GCN-NEXT: s_waitcnt vmcnt(39) ; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[8:9] offset:112 +; GCN-NEXT: s_waitcnt vmcnt(36) ; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[8:9] offset:128 +; GCN-NEXT: s_waitcnt vmcnt(33) ; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[8:9] offset:144 +; GCN-NEXT: s_waitcnt vmcnt(30) ; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[8:9] offset:160 +; GCN-NEXT: s_waitcnt vmcnt(27) ; GCN-NEXT: global_store_dwordx4 v64, v[44:47], s[8:9] offset:176 +; GCN-NEXT: s_waitcnt vmcnt(24) ; GCN-NEXT: global_store_dwordx4 v64, v[48:51], s[8:9] offset:192 +; GCN-NEXT: s_waitcnt vmcnt(21) ; GCN-NEXT: global_store_dwordx4 v64, v[52:55], s[8:9] offset:208 +; GCN-NEXT: s_waitcnt vmcnt(18) ; GCN-NEXT: global_store_dwordx4 v64, v[56:59], s[8:9] offset:224 +; GCN-NEXT: s_waitcnt vmcnt(15) ; GCN-NEXT: global_store_dwordx4 v64, v[60:63], s[8:9] offset:240 ; GCN-NEXT: s_endpgm %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir index 77bbf83..f0411d4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir @@ -782,7 +782,6 @@ body: | ... --- - name: load_private_s32_from_1_fi_offset_4095 legalized: true regBankSelected: true @@ -810,6 +809,36 @@ body: | ... +# Have to hack around the copy of the constant to VGPR +--- +name: load_private_s32_from_1_fi_offset_sgpr_4095 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +stack: + - { id: 0, size: 4096, alignment: 4 } + +body: | + bb.0: + + ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 + ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_sgpr_4095 + ; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5) + ; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]] + %0:vgpr(p5) = G_FRAME_INDEX %stack.0 + %1:sgpr(s32) = G_CONSTANT i32 4095 + %2:vgpr(s32) = COPY %1 + %3:vgpr(p5) = G_PTR_ADD %0, %2 + %4:vgpr(s32) = G_LOAD %3 :: (load 1, align 1, addrspace 5) + $vgpr0 = COPY %4 + +... + --- name: load_private_s32_from_1_fi_offset_4096 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll index 3eeea3c..f573bae 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll @@ -32,16 +32,14 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache ; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x0 ; GCN-NEXT: s_load_dword s8, s[4:5], 0x10 ; GCN-NEXT: s_add_u32 s4, s32, 0x1000 -; GCN-NEXT: s_add_u32 s5, s4, 4 -; GCN-NEXT: v_mov_b32_e32 v3, s5 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: v_mov_b32_e32 v3, 1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_lshl_b32 s5, s8, 2 -; GCN-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v2, 1 ; GCN-NEXT: s_add_u32 s4, s4, s5 -; GCN-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:4 ; GCN-NEXT: v_mov_b32_e32 v2, s4 ; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -103,16 +101,14 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache ; GCN-NEXT: s_load_dword s8, s[4:5], 0xc ; GCN-NEXT: s_add_u32 s4, s32, 0x1000 ; GCN-NEXT: s_and_b32 s4, s4, 0xfffff000 -; GCN-NEXT: s_add_u32 s5, s4, 4 -; GCN-NEXT: v_mov_b32_e32 v3, s5 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: v_mov_b32_e32 v2, s4 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_lshl_b32 s5, s8, 2 -; GCN-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen -; GCN-NEXT: v_mov_b32_e32 v2, 1 +; GCN-NEXT: v_mov_b32_e32 v3, 1 ; GCN-NEXT: s_add_u32 s4, s4, s5 -; GCN-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:4 ; GCN-NEXT: v_mov_b32_e32 v2, s4 ; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -156,7 +152,7 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i ; GCN-LABEL: func_non_entry_block_static_alloca_align4: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s8, s33 +; GCN-NEXT: s_mov_b32 s7, s33 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_add_u32 s32, s32, 0x400 @@ -170,11 +166,9 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i ; GCN-NEXT: s_add_u32 s6, s32, 0x1000 ; GCN-NEXT: v_mov_b32_e32 v2, 0 ; GCN-NEXT: v_mov_b32_e32 v3, s6 -; GCN-NEXT: s_add_u32 s7, s6, 4 ; GCN-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen ; GCN-NEXT: v_mov_b32_e32 v2, 1 -; GCN-NEXT: v_mov_b32_e32 v3, s7 -; GCN-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v2, v3, s[0:3], 0 offen offset:4 ; GCN-NEXT: v_lshlrev_b32_e32 v2, 2, v4 ; GCN-NEXT: v_add_u32_e32 v2, s6, v2 ; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen @@ -188,7 +182,7 @@ define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i ; GCN-NEXT: global_store_dword v[0:1], v0, off ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_sub_u32 s32, s32, 0x400 -; GCN-NEXT: s_mov_b32 s33, s8 +; GCN-NEXT: s_mov_b32 s33, s7 ; GCN-NEXT: s_setpc_b64 s[30:31] entry: @@ -222,7 +216,7 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out, ; GCN-LABEL: func_non_entry_block_static_alloca_align64: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s8, s33 +; GCN-NEXT: s_mov_b32 s7, s33 ; GCN-NEXT: s_add_u32 s33, s32, 0xfc0 ; GCN-NEXT: s_and_b32 s33, s33, 0xfffff000 ; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 @@ -232,13 +226,11 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out, ; GCN-NEXT: ; %bb.1: ; %bb.0 ; GCN-NEXT: s_add_u32 s6, s32, 0x1000 ; GCN-NEXT: s_and_b32 s6, s6, 0xfffff000 -; GCN-NEXT: s_add_u32 s7, s6, 4 ; GCN-NEXT: v_mov_b32_e32 v2, 0 ; GCN-NEXT: v_mov_b32_e32 v4, s6 ; GCN-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen ; GCN-NEXT: v_mov_b32_e32 v2, 1 -; GCN-NEXT: v_mov_b32_e32 v4, s7 -; GCN-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen +; GCN-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen offset:4 ; GCN-NEXT: v_lshlrev_b32_e32 v2, 2, v3 ; GCN-NEXT: v_add_u32_e32 v2, s6, v2 ; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen @@ -252,7 +244,7 @@ define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out, ; GCN-NEXT: global_store_dword v[0:1], v0, off ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_sub_u32 s32, s32, 0x2000 -; GCN-NEXT: s_mov_b32 s33, s8 +; GCN-NEXT: s_mov_b32 s33, s7 ; GCN-NEXT: s_setpc_b64 s[30:31] entry: %cond = icmp eq i32 %arg.cond, 0 -- 2.7.4