From 517171ce209adcf00214b305b8600587b3de9763 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 24 Feb 2022 11:13:46 -0800 Subject: [PATCH] [AMDGPU] Extend SILoadStoreOptimizer to handle flat load/stores TODO: merge flat with global promoting to flat. Differential Revision: https://reviews.llvm.org/D120351 --- llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 80 +++- llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir | 480 +++++++++++++++++++++ 2 files changed, 551 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp index 588a55b..4fc8d3d 100644 --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -82,7 +82,9 @@ enum InstClassEnum { GLOBAL_LOAD, GLOBAL_LOAD_SADDR, GLOBAL_STORE, - GLOBAL_STORE_SADDR + GLOBAL_STORE_SADDR, + FLAT_LOAD, + FLAT_STORE }; struct AddressRegs { @@ -244,11 +246,11 @@ private: mergeTBufferStorePair(CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore); MachineBasicBlock::iterator - mergeGlobalLoadPair(CombineInfo &CI, CombineInfo &Paired, - MachineBasicBlock::iterator InsertBefore); + mergeFlatLoadPair(CombineInfo &CI, CombineInfo &Paired, + MachineBasicBlock::iterator InsertBefore); MachineBasicBlock::iterator - mergeGlobalStorePair(CombineInfo &CI, CombineInfo &Paired, - MachineBasicBlock::iterator InsertBefore); + mergeFlatStorePair(CombineInfo &CI, CombineInfo &Paired, + MachineBasicBlock::iterator InsertBefore); void updateBaseAndOffset(MachineInstr &I, Register NewBase, int32_t NewOffset) const; @@ -323,23 +325,31 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_LOAD_DWORD_SADDR: case AMDGPU::GLOBAL_STORE_DWORD: case AMDGPU::GLOBAL_STORE_DWORD_SADDR: + case AMDGPU::FLAT_LOAD_DWORD: + case AMDGPU::FLAT_STORE_DWORD: return 1; case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM: case AMDGPU::GLOBAL_LOAD_DWORDX2: case AMDGPU::GLOBAL_LOAD_DWORDX2_SADDR: case AMDGPU::GLOBAL_STORE_DWORDX2: case AMDGPU::GLOBAL_STORE_DWORDX2_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX2: + case AMDGPU::FLAT_STORE_DWORDX2: return 2; case AMDGPU::GLOBAL_LOAD_DWORDX3: case AMDGPU::GLOBAL_LOAD_DWORDX3_SADDR: case AMDGPU::GLOBAL_STORE_DWORDX3: case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX3: + case AMDGPU::FLAT_STORE_DWORDX3: return 3; case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM: case AMDGPU::GLOBAL_LOAD_DWORDX4: case AMDGPU::GLOBAL_LOAD_DWORDX4_SADDR: case AMDGPU::GLOBAL_STORE_DWORDX4: case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: + case AMDGPU::FLAT_LOAD_DWORDX4: + case AMDGPU::FLAT_STORE_DWORDX4: return 4; case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM: return 8; @@ -444,6 +454,16 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: return GLOBAL_STORE_SADDR; + case AMDGPU::FLAT_LOAD_DWORD: + case AMDGPU::FLAT_LOAD_DWORDX2: + case AMDGPU::FLAT_LOAD_DWORDX3: + case AMDGPU::FLAT_LOAD_DWORDX4: + return FLAT_LOAD; + case AMDGPU::FLAT_STORE_DWORD: + case AMDGPU::FLAT_STORE_DWORDX2: + case AMDGPU::FLAT_STORE_DWORDX3: + case AMDGPU::FLAT_STORE_DWORDX4: + return FLAT_STORE; } } @@ -497,6 +517,16 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORDX3_SADDR: case AMDGPU::GLOBAL_STORE_DWORDX4_SADDR: return AMDGPU::GLOBAL_STORE_DWORD_SADDR; + case AMDGPU::FLAT_LOAD_DWORD: + case AMDGPU::FLAT_LOAD_DWORDX2: + case AMDGPU::FLAT_LOAD_DWORDX3: + case AMDGPU::FLAT_LOAD_DWORDX4: + return AMDGPU::FLAT_LOAD_DWORD; + case AMDGPU::FLAT_STORE_DWORD: + case AMDGPU::FLAT_STORE_DWORDX2: + case AMDGPU::FLAT_STORE_DWORDX3: + case AMDGPU::FLAT_STORE_DWORDX4: + return AMDGPU::FLAT_STORE_DWORD; } } @@ -577,6 +607,14 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) { case AMDGPU::GLOBAL_STORE_DWORDX2: case AMDGPU::GLOBAL_STORE_DWORDX3: case AMDGPU::GLOBAL_STORE_DWORDX4: + case AMDGPU::FLAT_LOAD_DWORD: + case AMDGPU::FLAT_LOAD_DWORDX2: + case AMDGPU::FLAT_LOAD_DWORDX3: + case AMDGPU::FLAT_LOAD_DWORDX4: + case AMDGPU::FLAT_STORE_DWORD: + case AMDGPU::FLAT_STORE_DWORDX2: + case AMDGPU::FLAT_STORE_DWORDX3: + case AMDGPU::FLAT_STORE_DWORDX4: Result.VAddr = true; return Result; } @@ -1449,7 +1487,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeTBufferStorePair( return New; } -MachineBasicBlock::iterator SILoadStoreOptimizer::mergeGlobalLoadPair( +MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatLoadPair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); @@ -1492,7 +1530,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeGlobalLoadPair( return New; } -MachineBasicBlock::iterator SILoadStoreOptimizer::mergeGlobalStorePair( +MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatStorePair( CombineInfo &CI, CombineInfo &Paired, MachineBasicBlock::iterator InsertBefore) { MachineBasicBlock *MBB = CI.I->getParent(); @@ -1606,6 +1644,28 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI, case 4: return AMDGPU::GLOBAL_STORE_DWORDX4_SADDR; } + case FLAT_LOAD: + switch (Width) { + default: + return 0; + case 2: + return AMDGPU::FLAT_LOAD_DWORDX2; + case 3: + return AMDGPU::FLAT_LOAD_DWORDX3; + case 4: + return AMDGPU::FLAT_LOAD_DWORDX4; + } + case FLAT_STORE: + switch (Width) { + default: + return 0; + case 2: + return AMDGPU::FLAT_STORE_DWORDX2; + case 3: + return AMDGPU::FLAT_STORE_DWORDX3; + case 4: + return AMDGPU::FLAT_STORE_DWORDX4; + } case MIMG: assert((countPopulation(CI.DMask | Paired.DMask) == Width) && "No overlaps"); @@ -2240,14 +2300,16 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr( NewMI = mergeTBufferStorePair(CI, Paired, Where->I); OptimizeListAgain |= CI.Width + Paired.Width < 4; break; + case FLAT_LOAD: case GLOBAL_LOAD: case GLOBAL_LOAD_SADDR: - NewMI = mergeGlobalLoadPair(CI, Paired, Where->I); + NewMI = mergeFlatLoadPair(CI, Paired, Where->I); OptimizeListAgain |= CI.Width + Paired.Width < 4; break; + case FLAT_STORE: case GLOBAL_STORE: case GLOBAL_STORE_SADDR: - NewMI = mergeGlobalStorePair(CI, Paired, Where->I); + NewMI = mergeFlatStorePair(CI, Paired, Where->I); OptimizeListAgain |= CI.Width + Paired.Width < 4; break; } diff --git a/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir new file mode 100644 index 0000000..5353881 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/merge-flat-load-store.mir @@ -0,0 +1,480 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=si-load-store-opt -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: merge_flat_load_dword_2 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_load_dword_2 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `i32* undef`, align 4) + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + S_NOP 0, implicit %1, implicit %2 +... + +--- +name: merge_flat_load_dword_3 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_load_dword_3 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96_align2 = FLAT_LOAD_DWORDX3 [[DEF]], 0, 1, implicit $exec, implicit $flat_scr :: (load (s96) from `i32* undef`, align 4) + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[FLAT_LOAD_DWORDX3_]].sub0_sub1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX3_]].sub2 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY1]] + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 1, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 1, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %3:vgpr_32 = FLAT_LOAD_DWORD %0, 8, 1, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + S_NOP 0, implicit %1, implicit %2, implicit %3 +... + +--- +name: merge_flat_load_dword_4 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_load_dword_4 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 2, implicit $exec, implicit $flat_scr :: (load (s128) from `i32* undef`, align 4) + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]] + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 2, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 2, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %3:vgpr_32 = FLAT_LOAD_DWORD %0, 8, 2, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %4:vgpr_32 = FLAT_LOAD_DWORD %0, 12, 2, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + S_NOP 0, implicit %1, implicit %2, implicit %3, implicit %4 +... + +--- +name: merge_flat_load_dword_5 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_load_dword_5 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 3, implicit $exec, implicit $flat_scr :: (load (s128) from `i32* undef`, align 4) + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 + ; GCN-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF]], 16, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`) + ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]], implicit [[FLAT_LOAD_DWORD]] + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %3:vgpr_32 = FLAT_LOAD_DWORD %0, 8, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %4:vgpr_32 = FLAT_LOAD_DWORD %0, 12, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %5:vgpr_32 = FLAT_LOAD_DWORD %0, 16, 3, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + S_NOP 0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5 +... + +--- +name: merge_flat_load_dword_6 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_load_dword_6 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `i32* undef`, align 4) + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY [[COPY]].sub0_sub1 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[COPY]].sub2 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]].sub0 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY2]].sub1 + ; GCN-NEXT: [[FLAT_LOAD_DWORDX2_:%[0-9]+]]:vreg_64_align2 = FLAT_LOAD_DWORDX2 [[DEF]], 16, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `i32* undef`, align 4) + ; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX2_]].sub0 + ; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX2_]].sub1 + ; GCN-NEXT: S_NOP 0, implicit [[COPY4]], implicit [[COPY5]], implicit [[COPY3]], implicit [[COPY1]], implicit [[COPY6]], implicit [[COPY7]] + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %3:vgpr_32 = FLAT_LOAD_DWORD %0, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %4:vgpr_32 = FLAT_LOAD_DWORD %0, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %5:vgpr_32 = FLAT_LOAD_DWORD %0, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %6:vgpr_32 = FLAT_LOAD_DWORD %0, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + S_NOP 0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6 +... + +--- +name: merge_flat_load_dwordx2 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_load_dwordx2 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `i64* undef`, align 4) + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub2_sub3 + ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vreg_64_align2 = FLAT_LOAD_DWORDX2 %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `i64* undef`, align 4) + %2:vreg_64_align2 = FLAT_LOAD_DWORDX2 %0, 8, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `i64* undef`, align 4) + S_NOP 0, implicit %1, implicit %2 +... + +--- +name: merge_flat_load_dwordx3_with_dwordx1 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_load_dwordx3_with_dwordx1 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128_align2 = FLAT_LOAD_DWORDX4 [[DEF]], 12, 0, implicit $exec, implicit $flat_scr :: (load (s128) from `i128* undef`, align 8) + ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_96_align2 = COPY [[FLAT_LOAD_DWORDX4_]].sub0_sub1_sub2 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed [[FLAT_LOAD_DWORDX4_]].sub3 + ; GCN-NEXT: S_NOP 0, implicit [[COPY]], implicit [[COPY1]] + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vreg_96_align2 = FLAT_LOAD_DWORDX3 %0, 12, 0, implicit $exec, implicit $flat_scr :: (load (s96) from `i128* undef`, align 8) + %2:vgpr_32 = FLAT_LOAD_DWORD %0, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + S_NOP 0, implicit %1, implicit %2 +... + +--- +name: merge_flat_load_dwordx1_with_dwordx2 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_load_dwordx1_with_dwordx2 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORDX3_:%[0-9]+]]:vreg_96_align2 = FLAT_LOAD_DWORDX3 [[DEF]], 12, 0, implicit $exec, implicit $flat_scr :: (load (s96) from `i32* undef`, align 4) + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[FLAT_LOAD_DWORDX3_]].sub0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY killed [[FLAT_LOAD_DWORDX3_]].sub1_sub2 + ; GCN-NEXT: S_NOP 0, implicit [[COPY1]], implicit [[COPY]] + %0:vreg_64_align2 = IMPLICIT_DEF + %2:vgpr_32 = FLAT_LOAD_DWORD %0, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %1:vreg_64_align2 = FLAT_LOAD_DWORDX2 %0, 16, 0, implicit $exec, implicit $flat_scr :: (load (s64) from `i64* undef`, align 8) + S_NOP 0, implicit %1, implicit %2 +... + +--- +name: no_merge_flat_load_dword_agpr_with_vgpr +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_flat_load_dword_agpr_with_vgpr + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`) + ; GCN-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:agpr_32 = FLAT_LOAD_DWORD [[DEF]], 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`) + ; GCN-NEXT: S_NOP 0, implicit [[FLAT_LOAD_DWORD]], implicit [[FLAT_LOAD_DWORD1]] + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %2:agpr_32 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + S_NOP 0, implicit %1, implicit %2 +... + +--- +name: no_merge_flat_load_dword_disjoint +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_flat_load_dword_disjoint + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`) + ; GCN-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF]], 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`) + ; GCN-NEXT: S_NOP 0, implicit [[FLAT_LOAD_DWORD]], implicit [[FLAT_LOAD_DWORD1]] + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %2:vgpr_32 = FLAT_LOAD_DWORD %0, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + S_NOP 0, implicit %1, implicit %2 +... + +--- +name: no_merge_flat_load_dword_overlap +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_flat_load_dword_overlap + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`) + ; GCN-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF]], 3, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`) + ; GCN-NEXT: S_NOP 0, implicit [[FLAT_LOAD_DWORD]], implicit [[FLAT_LOAD_DWORD1]] + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %2:vgpr_32 = FLAT_LOAD_DWORD %0, 3, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`) + S_NOP 0, implicit %1, implicit %2 +... + +--- +name: no_merge_flat_load_dword_different_cpol +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_flat_load_dword_different_cpol + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF]], 0, 1, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`) + ; GCN-NEXT: [[FLAT_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[DEF]], 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`) + ; GCN-NEXT: S_NOP 0, implicit [[FLAT_LOAD_DWORD]], implicit [[FLAT_LOAD_DWORD1]] + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = FLAT_LOAD_DWORD %0, 0, 1, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + %2:vgpr_32 = FLAT_LOAD_DWORD %0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `i32* undef`, align 4) + S_NOP 0, implicit %1, implicit %2 +... + +--- +name: merge_flat_store_dword_2 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_store_dword_2 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 + ; GCN-NEXT: FLAT_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `i32* undef`, align 4) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORD %0, killed %1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD killed %0, killed %2, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) +... + +--- +name: merge_flat_store_dword_3 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_store_dword_3 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0, killed [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, killed [[DEF3]], %subreg.sub2 + ; GCN-NEXT: FLAT_STORE_DWORDX3 [[DEF]], killed [[REG_SEQUENCE1]], 4, 1, implicit $exec, implicit $flat_scr :: (store (s96) into `i32* undef`, align 4) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORD %0, killed %1, 4, 1, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD %0, killed %2, 8, 1, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD killed %0, killed %3, 12, 1, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) +... + +--- +name: merge_flat_store_dword_4 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_store_dword_4 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]].sub1, %subreg.sub1, [[DEF1]].sub0, %subreg.sub0 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE [[DEF1]].sub2, %subreg.sub2, killed [[REG_SEQUENCE]], %subreg.sub0_sub1 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE [[DEF1]].sub3, %subreg.sub3, killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2 + ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 2, implicit $exec, implicit $flat_scr :: (store (s128) into `i32* undef`, align 4) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vreg_128 = IMPLICIT_DEF + FLAT_STORE_DWORD %0, %1.sub1, 8, 2, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD %0, %1.sub2, 12, 2, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD %0, %1.sub3, 16, 2, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD killed %0, %1.sub0, 4, 2, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) +... + +--- +name: merge_flat_store_dword_5 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_store_dword_5 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF4:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF5:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:areg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 + ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 3, implicit $exec, implicit $flat_scr :: (store (s128) into `i32* undef`, align 4) + ; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], [[DEF5]], 20, 3, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:agpr_32 = IMPLICIT_DEF + %2:agpr_32 = IMPLICIT_DEF + %3:agpr_32 = IMPLICIT_DEF + %4:agpr_32 = IMPLICIT_DEF + %5:agpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORD %0, %1, 4, 3, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD %0, %2, 8, 3, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 8) + FLAT_STORE_DWORD %0, %3, 12, 3, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD %0, %4, 16, 3, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD %0, %5, 20, 3, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) +... + +--- +name: merge_flat_store_dword_6 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_store_dword_6 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF1]], %subreg.sub0, [[DEF2]], %subreg.sub1 + ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96_align2 = REG_SEQUENCE killed [[REG_SEQUENCE]], %subreg.sub0_sub1, [[DEF3]], %subreg.sub2 + ; GCN-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1_sub2, [[DEF4]], %subreg.sub3 + ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE2]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s128) into `i32* undef`, align 8) + ; GCN-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[DEF5]], %subreg.sub0, [[DEF6]], %subreg.sub1 + ; GCN-NEXT: FLAT_STORE_DWORDX2 [[DEF]], killed [[REG_SEQUENCE3]], 20, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `i32* undef`, align 4) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + %4:vgpr_32 = IMPLICIT_DEF + %5:vgpr_32 = IMPLICIT_DEF + %6:vgpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORD %0, %1, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 8) + FLAT_STORE_DWORD %0, %2, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD %0, %3, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD %0, %4, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD %0, %5, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD %0, %6, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) +... + +--- +name: merge_flat_store_dwordx2 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_store_dwordx2 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0_sub1, killed [[DEF2]], %subreg.sub2_sub3 + ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s128) into `i64* undef`, align 4) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vreg_64_align2 = IMPLICIT_DEF + %2:vreg_64_align2 = IMPLICIT_DEF + FLAT_STORE_DWORDX2 %0, killed %1, 4, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `i64* undef`, align 4) + FLAT_STORE_DWORDX2 %0, killed %2, 12, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `i64* undef`, align 4) +... + +--- +name: merge_flat_store_dwordx3_with_dwordx1 +body: | + bb.0.entry: + + ; GCN-LABEL: name: merge_flat_store_dwordx3_with_dwordx1 + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_96_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[DEF1]], %subreg.sub0_sub1_sub2, killed [[DEF2]], %subreg.sub3 + ; GCN-NEXT: FLAT_STORE_DWORDX4 [[DEF]], killed [[REG_SEQUENCE]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s128) into `i64* undef`) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vreg_96_align2 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORDX3 %0, killed %1, 4, 0, implicit $exec, implicit $flat_scr :: (store (s96) into `i64* undef`, align 16) + FLAT_STORE_DWORD %0, killed %2, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) +... + +--- +name: no_merge_flat_store_dword_agpr_with_vgpr +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_flat_store_dword_agpr_with_vgpr + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:agpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], killed [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) + ; GCN-NEXT: FLAT_STORE_DWORD killed [[DEF]], killed [[DEF2]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:agpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORD %0, killed %1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD killed %0, killed %2, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) +... + +--- +name: no_merge_flat_store_dword_disjoint +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_flat_store_dword_disjoint + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], killed [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) + ; GCN-NEXT: FLAT_STORE_DWORD killed [[DEF]], killed [[DEF2]], 6, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORD %0, killed %1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD killed %0, killed %2, 6, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) +... + +--- +name: no_merge_flat_store_dword_overlap +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_flat_store_dword_overlap + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], killed [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) + ; GCN-NEXT: FLAT_STORE_DWORD killed [[DEF]], killed [[DEF2]], 2, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 2) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORD %0, killed %1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD killed %0, killed %2, 2, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 2) +... + +--- +name: no_merge_flat_store_dword_different_cpol +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_flat_store_dword_different_cpol + ; GCN: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: FLAT_STORE_DWORD [[DEF]], killed [[DEF1]], 0, 1, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) + ; GCN-NEXT: FLAT_STORE_DWORD killed [[DEF]], killed [[DEF2]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) + %0:vreg_64_align2 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORD %0, killed %1, 0, 1, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD killed %0, killed %2, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) +... + +--- +name: no_merge_flat_store_dword_different_vaddr +body: | + bb.0.entry: + + ; GCN-LABEL: name: no_merge_flat_store_dword_different_vaddr + ; GCN: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: FLAT_STORE_DWORD [[DEF]].sub0_sub1, killed [[DEF1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) + ; GCN-NEXT: FLAT_STORE_DWORD [[DEF]].sub2_sub3, killed [[DEF2]], 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`) + %0:vreg_128_align2 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + FLAT_STORE_DWORD %0.sub0_sub1, killed %1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) + FLAT_STORE_DWORD %0.sub2_sub3, killed %2, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `i32* undef`, align 4) +... -- 2.7.4