///
int getOffsetOfLocalArea() const { return LocalAreaOffset; }
- /// isFPCloseToIncomingSP - Return true if the frame pointer is close to
- /// the incoming stack pointer, false if it is close to the post-prologue
- /// stack pointer.
- virtual bool isFPCloseToIncomingSP() const { return true; }
+ /// Control the placement of special register scavenging spill slots when
+ /// allocating a stack frame.
+ ///
+ /// If this returns true, the frame indexes used by the RegScavenger will be
+ /// allocated closest to the incoming stack pointer.
+ virtual bool allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const;
/// assignCalleeSavedSpillSlots - Allows target to override spill slot
/// assignment logic. If implemented, assignCalleeSavedSpillSlots() should
// incoming stack pointer if a frame pointer is required and is closer
// to the incoming rather than the final stack pointer.
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
- bool EarlyScavengingSlots = (TFI.hasFP(MF) && TFI.isFPCloseToIncomingSP() &&
- RegInfo->useFPForScavengingIndex(MF) &&
- !RegInfo->hasStackRealignment(MF));
+ bool EarlyScavengingSlots = TFI.allocateScavengingFrameIndexesNearIncomingSP(MF);
if (RS && EarlyScavengingSlots) {
SmallVector<int, 2> SFIs;
RS->getScavengingFrameIndices(SFIs);
return 0;
}
+bool TargetFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const {
+ if (!hasFP(MF))
+ return false;
+
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+ return RegInfo->useFPForScavengingIndex(MF) &&
+ !RegInfo->hasStackRealignment(MF);
+}
+
bool TargetFrameLowering::isSafeForNoCSROpt(const Function &F) {
if (!F.hasLocalLinkage() || F.hasAddressTaken() ||
!F.hasFnAttribute(Attribute::NoRecurse))
return false;
}
+bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const {
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ uint64_t EstStackSize = MFI.estimateStackSize(MF);
+ uint64_t MaxOffset = EstStackSize - 1;
+
+ // We need the emergency stack slots to be allocated in range of the
+ // MUBUF/flat scratch immediate offset from the base register, so assign these
+ // first at the incoming SP position.
+ //
+ // TODO: We could try sorting the objects to find a hole in the first bytes
+ // rather than allocating as close to possible. This could save a lot of space
+ // on frames with alignment requirements.
+ if (ST.enableFlatScratch()) {
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
+ SIInstrFlags::FlatScratch))
+ return false;
+ } else {
+ if (SIInstrInfo::isLegalMUBUFImmOffset(MaxOffset))
+ return false;
+ }
+
+ return true;
+}
+
MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF,
MachineBasicBlock &MBB,
const TargetRegisterInfo *TRI,
std::vector<CalleeSavedInfo> &CSI) const override;
+ bool allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const override;
+
bool isSupportedStackID(TargetStackID::Value ID) const override;
void processFunctionBeforeFrameFinalized(
bool hasBP(const MachineFunction &MF) const;
- bool isFPCloseToIncomingSP() const override { return false; }
+ bool allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const override {
+ return false;
+ }
bool enableShrinkWrapping(const MachineFunction &MF) const override {
return true;
create(const SystemZSubtarget &STI);
// Override TargetFrameLowering.
- bool isFPCloseToIncomingSP() const override { return false; }
+ bool allocateScavengingFrameIndexesNearIncomingSP(
+ const MachineFunction &MF) const override {
+ // SystemZ wants normal register scavenging slots, as close to the stack or
+ // frame pointer as possible.
+ // The default implementation assumes an x86-like layout, where the frame
+ // pointer is at the opposite end of the frame from the stack pointer.
+ // This meant that when frame pointer elimination was disabled,
+ // the slots ended up being as close as possible to the incoming
+ // stack pointer, which is the opposite of what we want on SystemZ.
+ return false;
+ }
+
bool hasReservedCallFrame(const MachineFunction &MF) const override;
MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
SystemZELFFrameLowering();
// Override TargetFrameLowering.
- bool isFPCloseToIncomingSP() const override { return false; }
bool
assignCalleeSavedSpillSlots(MachineFunction &MF,
const TargetRegisterInfo *TRI,
; GFX9-LABEL: store_load_vindex_large_offset_foo:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: scratch_load_dword v1, off, s32 glc
+; GFX9-NEXT: scratch_load_dword v1, off, s32 offset:4 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
+; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4004
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GFX9-NEXT: v_mov_b32_e32 v2, vcc_hi
; GFX9-NEXT: v_and_b32_e32 v0, 15, v0
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_and_b32_e32 v1, 15, v0
-; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
+; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
; GFX10-NEXT: v_mov_b32_e32 v3, 15
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1
; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0
; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1
-; GFX10-NEXT: scratch_load_dword v2, off, s32 glc dlc
+; GFX10-NEXT: scratch_load_dword v2, off, s32 offset:4 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: scratch_store_dword v0, v3, off
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, 13
; GFX9-NEXT: s_movk_i32 s0, 0x3e80
-; GFX9-NEXT: scratch_store_dword off, v0, s32
+; GFX9-NEXT: s_add_i32 vcc_hi, s32, 4
+; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, 15
-; GFX9-NEXT: s_add_i32 s0, s0, s32
+; GFX9-NEXT: s_add_i32 s0, s0, vcc_hi
; GFX9-NEXT: scratch_store_dword off, v0, s0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: scratch_load_dword v0, off, s0 glc
; GFX10-NEXT: v_mov_b32_e32 v0, 13
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_movk_i32 s0, 0x3e80
-; GFX10-NEXT: s_add_i32 s0, s0, s32
-; GFX10-NEXT: scratch_store_dword off, v0, s32
+; GFX10-NEXT: s_add_i32 vcc_lo, s32, 4
+; GFX10-NEXT: s_add_i32 s0, s0, vcc_lo
+; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_store_dword off, v1, s0
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; MUBUF-NEXT: s_addk_i32 s32, 0x200
; FLATSCR-NEXT: s_add_i32 s32, s32, 8
; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
-; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4{{$}}
-; FLATSCR-NEXT: scratch_store_dword off, v0, s33 offset:4{{$}}
+; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s33{{$}}
+; FLATSCR-NEXT: scratch_store_dword off, v0, s33{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_addk_i32 s32, 0xfe00
; FLATSCR-NEXT: s_add_i32 s32, s32, -8
; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; MUBUF-DAG: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; FLATSCR-DAG: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill
-; MUBUF-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s33 offset:8
-; FLATSCR-DAG: scratch_store_dword off, [[ZERO]], s33 offset:8
+; MUBUF-DAG: buffer_store_dword [[ZERO]], off, s[0:3], s33 offset:4
+; FLATSCR-DAG: scratch_store_dword off, [[ZERO]], s33 offset:4
; GCN: ;;#ASMSTART
; GCN-NEXT: ; clobber v41
; GCN-LABEL: {{^}}last_lane_vgpr_for_fp_csr:
; GCN: s_waitcnt
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:12 ; 4-byte Folded Spill
+; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GCN-NEXT: v_writelane_b32 v0, s33, 63
; GCN-COUNT-60: v_writelane_b32 v0
; MUBUF: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; FLATSCR: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill
; GCN: v_writelane_b32 v0
-; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:8
-; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s33 offset:8
+; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:4
+; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s33 offset:4
; GCN: ;;#ASMSTART
; GCN: v_writelane_b32 v0
; FLATSCR: s_add_i32 s32, s32, -16
; GCN-NEXT: v_readlane_b32 s33, v0, 63
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:12 ; 4-byte Folded Reload
+; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
; GCN-LABEL: {{^}}no_new_vgpr_for_fp_csr:
; GCN: s_waitcnt
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
-; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:12 ; 4-byte Folded Spill
+; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GCN-COUNT-61: v_writelane_b32 v0,
; FLATSCR: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
; FLATSCR-NEXT: s_add_i32 s32, s32, -16
; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
-; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:12 ; 4-byte Folded Reload
+; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
+; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
; FLATSCR-NEXT: s_add_i32 s33, s32, 0x1fff
; MUBUF-NEXT: s_and_b32 s33, s33, 0xfff80000
; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000
-; MUBUF-NEXT: s_add_i32 s32, s32, 0x100000
-; FLATSCR-NEXT: s_addk_i32 s32, 0x4000
+; MUBUF-NEXT: s_add_i32 s32, s32, 0x180000
+; FLATSCR-NEXT: s_addk_i32 s32, 0x6000
; GCN-NEXT: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
-; MUBUF-NEXT: buffer_store_dword [[ZERO]], off, s[0:3], s33
-; FLATSCR-NEXT: scratch_store_dword off, [[ZERO]], s33
+; MUBUF-NEXT: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x2000{{$}}
+; MUBUF-NEXT: buffer_store_dword [[ZERO]], [[OFFSET]], s[0:3], s33 offen{{$}}
+; FLATSCR-NEXT: s_add_i32 vcc_hi, s33, 0x2000
+; FLATSCR-NEXT: scratch_store_dword off, [[ZERO]], vcc_hi
; GCN-NEXT: s_waitcnt vmcnt(0)
-; MUBUF-NEXT: s_add_i32 s32, s32, 0xfff00000
-; FLATSCR-NEXT: s_addk_i32 s32, 0xc000
+; MUBUF-NEXT: s_add_i32 s32, s32, 0xffe80000
+; FLATSCR-NEXT: s_addk_i32 s32, 0xa000
; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
; GCN-NEXT: s_setpc_b64
define void @realign_stack_no_fp_elim() #1 {
; GCN-LABEL: {{^}}no_unused_non_csr_sgpr_for_fp:
; GCN: s_waitcnt
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:8 ; 4-byte Folded Spill
+; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GCN-NEXT: v_writelane_b32 v0, s33, 2
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: v_writelane_b32 v0, s30, 0
; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
; GCN: v_writelane_b32 v0, s31, 1
-; MUBUF: buffer_store_dword [[ZERO]], off, s[0:3], s33 offset:4
-; FLATSCR: scratch_store_dword off, [[ZERO]], s33 offset:4
+; MUBUF: buffer_store_dword [[ZERO]], off, s[0:3], s33{{$}}
+; FLATSCR: scratch_store_dword off, [[ZERO]], s33{{$}}
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN: ;;#ASMSTART
; MUBUF: s_addk_i32 s32, 0x300
; FLATSCR-NEXT: s_add_i32 s32, s32, -12
; GCN-NEXT: v_readlane_b32 s33, v0, 2
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:8 ; 4-byte Folded Reload
+; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_setpc_b64 s[4:5]
; GCN-LABEL: {{^}}no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr:
; GCN: s_waitcnt
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
-; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:8 ; 4-byte Folded Spill
+; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2
; GCN-NEXT: s_mov_b32 s33, s32
; FLATSCR-NEXT: s_add_i32 s32, s32, -12{{$}}
; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
-; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:8 ; 4-byte Folded Reload
+; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
+; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64
; GCN-LABEL: {{^}}scratch_reg_needed_mubuf_offset:
; GCN: s_waitcnt
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
+; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40100
; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
-; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1008
+; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1004
; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2
; FLATSCR-NEXT: s_addk_i32 s32, 0xeff4{{$}}
; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2
; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
-; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
+; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40100
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Reload
-; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1008
+; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1004
; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, [[SCRATCH_SGPR]] ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
; GCN-NEXT: s_waitcnt vmcnt(0)
; With no free registers, we must spill the FP to memory.
; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_memory:
; MUBUF: v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], s33
-; MUBUF: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 offset:4
+; MUBUF: buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 ; 4-byte Folded Spill
; FLATSCR: s_mov_b32 s0, s33
; GCN: s_mov_b32 s33, s32
-; MUBUF: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 offset:4
+; MUBUF: buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 ; 4-byte Folded Reload
; FLATSCR: s_mov_b32 s33, s0
; MUBUF: s_waitcnt vmcnt(0)
; MUBUF: v_readfirstlane_b32 s33, [[TMP_VGPR2]]
; scratch VGPR to hold the offset.
; GCN-LABEL: {{^}}spill_fp_to_memory_scratch_reg_needed_mubuf_offset
; MUBUF: s_or_saveexec_b64 s[4:5], -1
-; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
+; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40100
; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
; MUBUF: v_mov_b32_e32 v0, s33
; GCN-NOT: v_mov_b32_e32 v0, 0x100c
-; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40300
+; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
; MUBUF: buffer_store_dword v0, off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
; FLATSCR: v_mov_b32_e32 v0, 0
-; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s33, 0x1004
+; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s33, 0x1000
; FLATSCR: scratch_store_dword off, v0, [[SOFF]]
define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval([4096 x i8]) align 4 %arg) #3 {
%alloca = alloca i32, addrspace(5)
; GFX9-LABEL: zero_init_large_offset_foo:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: scratch_load_dword v0, off, s32 glc
+; GFX9-NEXT: scratch_load_dword v0, off, s32 offset:16 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b32 s0, 0
; GFX9-NEXT: s_mov_b32 s1, s0
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_mov_b32_e32 v2, s2
; GFX9-NEXT: v_mov_b32_e32 v3, s3
-; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
+; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4010
; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi
-; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
+; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4010
; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
-; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
+; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4010
; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
-; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
+; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4010
; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT: scratch_load_dword v0, off, s32 glc dlc
+; GFX10-NEXT: scratch_load_dword v0, off, s32 offset:16 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_mov_b32 s0, 0
-; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
+; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4010
; GFX10-NEXT: s_mov_b32 s1, s0
; GFX10-NEXT: s_mov_b32 s2, s0
; GFX10-NEXT: s_mov_b32 s3, s0
; GFX10-NEXT: v_mov_b32_e32 v2, s2
; GFX10-NEXT: v_mov_b32_e32 v3, s3
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo
-; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
+; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4010
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
-; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
+; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4010
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
-; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
+; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4010
; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: s_setpc_b64 s[30:31]
; GFX9-PAL-LABEL: zero_init_large_offset_foo:
; GFX9-PAL: ; %bb.0:
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-PAL-NEXT: scratch_load_dword v0, off, s32 glc
+; GFX9-PAL-NEXT: scratch_load_dword v0, off, s32 offset:16 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_mov_b32 s0, 0
; GFX9-PAL-NEXT: s_mov_b32 s1, s0
; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1
; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2
; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3
-; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000
+; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4010
; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi
-; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000
+; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4010
; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16
-; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000
+; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4010
; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32
-; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000
+; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4010
; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX9-PAL-NEXT: s_setpc_b64 s[30:31]
; GFX1010-PAL: ; %bb.0:
; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s32 glc dlc
+; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s32 offset:16 glc dlc
; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX1010-PAL-NEXT: s_mov_b32 s0, 0
-; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
+; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4010
; GFX1010-PAL-NEXT: s_mov_b32 s1, s0
; GFX1010-PAL-NEXT: s_mov_b32 s2, s0
; GFX1010-PAL-NEXT: s_mov_b32 s3, s0
; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3
; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo
; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
+; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4010
; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
+; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4010
; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3
-; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
+; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4010
; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1010-PAL-NEXT: s_setpc_b64 s[30:31]
; GFX1030-PAL: ; %bb.0:
; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
-; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s32 glc dlc
+; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s32 offset:16 glc dlc
; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX1030-PAL-NEXT: s_mov_b32 s0, 0
-; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
+; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4010
; GFX1030-PAL-NEXT: s_mov_b32 s1, s0
; GFX1030-PAL-NEXT: s_mov_b32 s2, s0
; GFX1030-PAL-NEXT: s_mov_b32 s3, s0
; GFX1030-PAL-NEXT: v_mov_b32_e32 v2, s2
; GFX1030-PAL-NEXT: v_mov_b32_e32 v3, s3
; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo
-; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
+; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4010
; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16
-; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
+; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4010
; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32
-; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
+; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4010
; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48
; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX1030-PAL-NEXT: s_setpc_b64 s[30:31]
; GFX9-LABEL: store_load_vindex_large_offset_foo:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: scratch_load_dword v1, off, s32 glc
+; GFX9-NEXT: scratch_load_dword v1, off, s32 offset:4 glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4000
+; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4004
; GFX9-NEXT: v_mov_b32_e32 v1, vcc_hi
; GFX9-NEXT: v_mov_b32_e32 v3, 15
; GFX9-NEXT: v_lshl_add_u32 v2, v0, 2, v1
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: v_mov_b32_e32 v1, 15
-; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4000
+; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004
; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo
; GFX10-NEXT: v_and_b32_e32 v3, v0, v1
; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, v2
; GFX10-NEXT: v_lshl_add_u32 v2, v3, 2, v2
-; GFX10-NEXT: scratch_load_dword v3, off, s32 glc dlc
+; GFX10-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: scratch_store_dword v0, v1, off
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX9-PAL-LABEL: store_load_vindex_large_offset_foo:
; GFX9-PAL: ; %bb.0: ; %bb
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-PAL-NEXT: scratch_load_dword v1, off, s32 glc
+; GFX9-PAL-NEXT: scratch_load_dword v1, off, s32 offset:4 glc
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4000
+; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4004
; GFX9-PAL-NEXT: v_mov_b32_e32 v1, vcc_hi
; GFX9-PAL-NEXT: v_mov_b32_e32 v3, 15
; GFX9-PAL-NEXT: v_lshl_add_u32 v2, v0, 2, v1
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
-; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4000
+; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004
; GFX10-PAL-NEXT: v_mov_b32_e32 v2, vcc_lo
; GFX10-PAL-NEXT: v_and_b32_e32 v3, v0, v1
; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, v2
; GFX10-PAL-NEXT: v_lshl_add_u32 v2, v3, 2, v2
-; GFX10-PAL-NEXT: scratch_load_dword v3, off, s32 glc dlc
+; GFX10-PAL-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc
; GFX10-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: scratch_store_dword v0, v1, off
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, 13
; GFX9-NEXT: s_movk_i32 s0, 0x3000
-; GFX9-NEXT: scratch_store_dword off, v0, s32
+; GFX9-NEXT: s_add_i32 vcc_hi, s32, 4
+; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_add_i32 s0, s0, s32
+; GFX9-NEXT: s_add_i32 s0, s0, vcc_hi
; GFX9-NEXT: v_mov_b32_e32 v0, 15
; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, 13
; GFX10-NEXT: v_mov_b32_e32 v1, 15
; GFX10-NEXT: s_movk_i32 s0, 0x3800
-; GFX10-NEXT: s_add_i32 s0, s0, s32
-; GFX10-NEXT: scratch_store_dword off, v0, s32
+; GFX10-NEXT: s_add_i32 vcc_lo, s32, 4
+; GFX10-NEXT: s_add_i32 s0, s0, vcc_lo
+; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13
; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000
-; GFX9-PAL-NEXT: scratch_store_dword off, v0, s32
+; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 4
+; GFX9-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-PAL-NEXT: s_add_i32 s0, s0, s32
+; GFX9-PAL-NEXT: s_add_i32 s0, s0, vcc_hi
; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15
; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712
; GFX9-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 13
; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15
; GFX10-PAL-NEXT: s_movk_i32 s0, 0x3800
-; GFX10-PAL-NEXT: s_add_i32 s0, s0, s32
-; GFX10-PAL-NEXT: scratch_store_dword off, v0, s32
+; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 4
+; GFX10-PAL-NEXT: s_add_i32 s0, s0, vcc_lo
+; GFX10-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664
; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_to_offset:
; GFX900-MUBUF: buffer_store_dword
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
-; GFX900-MUBUF-NEXT: buffer_load_short_d16_hi v{{[0-9]+}}, off, s[0:3], s32 offset:4094
+; GFX900-MUBUF-NEXT: buffer_load_short_d16_hi v{{[0-9]+}}, off, s[0:3], s32 offset:4058
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
; GFX900-FLATSCR: scratch_store_dword
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX900-FLATSCR-NEXT: scratch_load_short_d16_hi v{{[0-9]+}}, off, s32 offset:4094
+; GFX900-FLATSCR-NEXT: scratch_load_short_d16_hi v{{[0-9]+}}, off, s32 offset:4058
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-define void @load_private_hi_v2i16_reglo_vreg_to_offset(i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_to_offset(i16 %reg, [10 x i32] addrspace(5)* %obj0) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4, addrspace(5)
%obj1 = alloca [4096 x i16], align 2, addrspace(5)
%bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
store volatile i32 123, i32 addrspace(5)* %bc
; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_sexti8_to_offset:
; GFX900-MUBUF: buffer_store_dword
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
-; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16_hi v{{[0-9]+}}, off, s[0:3], s32 offset:4095
+; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16_hi v{{[0-9]+}}, off, s[0:3], s32 offset:4059
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
; GFX900-FLATSCR: scratch_store_dword
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16_hi v{{[0-9]+}}, off, s32 offset:4095
+; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16_hi v{{[0-9]+}}, off, s32 offset:4059
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-define void @load_private_hi_v2i16_reglo_vreg_sexti8_to_offset(i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_sexti8_to_offset(i16 %reg, [10 x i32] addrspace(5)* %obj0) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4, addrspace(5)
%obj1 = alloca [4096 x i8], align 2, addrspace(5)
%bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
store volatile i32 123, i32 addrspace(5)* %bc
; GCN-LABEL: {{^}}load_private_hi_v2i16_reglo_vreg_zexti8_to_offset:
; GFX900-MUBUF: buffer_store_dword
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
-; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16_hi v{{[0-9]+}}, off, s[0:3], s32 offset:4095
+; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16_hi v{{[0-9]+}}, off, s[0:3], s32 offset:4059
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
; GFX900-FLATSCR: scratch_store_dword
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16_hi v{{[0-9]+}}, off, s32 offset:4095
+; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16_hi v{{[0-9]+}}, off, s32 offset:4059
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-define void @load_private_hi_v2i16_reglo_vreg_zexti8_to_offset(i16 %reg) #0 {
+define void @load_private_hi_v2i16_reglo_vreg_zexti8_to_offset(i16 %reg, [10 x i32] addrspace(5)* %obj0) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4, addrspace(5)
%obj1 = alloca [4096 x i8], align 2, addrspace(5)
%bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
store volatile i32 123, i32 addrspace(5)* %bc
; GFX900-MUBUF: ; %bb.0: ; %entry
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
-; GFX900-MUBUF-NEXT: buffer_load_short_d16 v0, off, s[0:3], s32 offset:4094 glc
+; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 44
+; GFX900-MUBUF-NEXT: buffer_load_short_d16 v0, v1, s[0:3], s32 offen offset:4054 glc
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
; GFX906: ; %bb.0: ; %entry
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
; GFX906-NEXT: s_waitcnt vmcnt(0)
-; GFX906-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 glc
+; GFX906-NEXT: v_mov_b32_e32 v3, 44
+; GFX906-NEXT: buffer_load_ushort v1, v3, s[0:3], s32 offen offset:4054 glc
; GFX906-NEXT: s_waitcnt vmcnt(0)
; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0
; GFX803: ; %bb.0: ; %entry
; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
; GFX803-NEXT: s_waitcnt vmcnt(0)
-; GFX803-NEXT: buffer_load_ushort v1, off, s[0:3], s32 offset:4094 glc
+; GFX803-NEXT: v_mov_b32_e32 v2, 44
+; GFX803-NEXT: buffer_load_ushort v1, v2, s[0:3], s32 offen offset:4054 glc
; GFX803-NEXT: s_waitcnt vmcnt(0)
; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX803-NEXT: v_or_b32_e32 v0, v1, v0
; GFX900-FLATSCR: ; %bb.0: ; %entry
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32
+; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v0, off, s32 offset:4094 glc
+; GFX900-FLATSCR-NEXT: s_add_i32 vcc_hi, s32, 44
+; GFX900-FLATSCR-NEXT: scratch_load_short_d16 v0, off, vcc_hi offset:4054 glc
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
; GFX900-MUBUF: ; %bb.0: ; %entry
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
-; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s32 offset:4095 glc
+; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 44
+; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v0, v1, s[0:3], s32 offen offset:4055 glc
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
; GFX906: ; %bb.0: ; %entry
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
; GFX906-NEXT: s_waitcnt vmcnt(0)
-; GFX906-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095 glc
+; GFX906-NEXT: v_mov_b32_e32 v3, 44
+; GFX906-NEXT: buffer_load_sbyte v1, v3, s[0:3], s32 offen offset:4055 glc
; GFX906-NEXT: s_waitcnt vmcnt(0)
; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0
; GFX803: ; %bb.0: ; %entry
; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
; GFX803-NEXT: s_waitcnt vmcnt(0)
-; GFX803-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095 glc
+; GFX803-NEXT: v_mov_b32_e32 v2, 44
+; GFX803-NEXT: buffer_load_sbyte v1, v2, s[0:3], s32 offen offset:4055 glc
; GFX803-NEXT: s_waitcnt vmcnt(0)
; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX900-FLATSCR: ; %bb.0: ; %entry
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32
+; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, s32 offset:4095 glc
+; GFX900-FLATSCR-NEXT: s_add_i32 vcc_hi, s32, 44
+; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, vcc_hi offset:4055 glc
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
; GFX900-MUBUF: ; %bb.0: ; %entry
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
-; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s32 offset:4095 glc
+; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 44
+; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v0, v1, s[0:3], s32 offen offset:4055 glc
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
; GFX906: ; %bb.0: ; %entry
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
; GFX906-NEXT: s_waitcnt vmcnt(0)
-; GFX906-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095 glc
+; GFX906-NEXT: v_mov_b32_e32 v3, 44
+; GFX906-NEXT: buffer_load_ubyte v1, v3, s[0:3], s32 offen offset:4055 glc
; GFX906-NEXT: s_waitcnt vmcnt(0)
; GFX906-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX906-NEXT: v_bfi_b32 v0, v2, v1, v0
; GFX803: ; %bb.0: ; %entry
; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
; GFX803-NEXT: s_waitcnt vmcnt(0)
-; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095 glc
+; GFX803-NEXT: v_mov_b32_e32 v2, 44
+; GFX803-NEXT: buffer_load_ubyte v1, v2, s[0:3], s32 offen offset:4055 glc
; GFX803-NEXT: s_waitcnt vmcnt(0)
; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
; GFX900-FLATSCR: ; %bb.0: ; %entry
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32
+; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, s32 offset:4095 glc
+; GFX900-FLATSCR-NEXT: s_add_i32 vcc_hi, s32, 44
+; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, vcc_hi offset:4055 glc
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
; GFX900-MUBUF: ; %bb.0: ; %entry
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
-; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s32 offset:4095 glc
+; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 44
+; GFX900-MUBUF-NEXT: buffer_load_sbyte_d16 v0, v1, s[0:3], s32 offen offset:4055 glc
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
; GFX906: ; %bb.0: ; %entry
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
; GFX906-NEXT: s_waitcnt vmcnt(0)
-; GFX906-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095 glc
+; GFX906-NEXT: v_mov_b32_e32 v2, 44
+; GFX906-NEXT: buffer_load_sbyte v1, v2, s[0:3], s32 offen offset:4055 glc
; GFX906-NEXT: s_waitcnt vmcnt(0)
; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX803: ; %bb.0: ; %entry
; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
; GFX803-NEXT: s_waitcnt vmcnt(0)
-; GFX803-NEXT: buffer_load_sbyte v1, off, s[0:3], s32 offset:4095 glc
+; GFX803-NEXT: v_mov_b32_e32 v2, 44
+; GFX803-NEXT: buffer_load_sbyte v1, v2, s[0:3], s32 offen offset:4055 glc
; GFX803-NEXT: s_waitcnt vmcnt(0)
; GFX803-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
; GFX803-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
; GFX900-FLATSCR: ; %bb.0: ; %entry
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32
+; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, s32 offset:4095 glc
+; GFX900-FLATSCR-NEXT: s_add_i32 vcc_hi, s32, 44
+; GFX900-FLATSCR-NEXT: scratch_load_sbyte_d16 v0, off, vcc_hi offset:4055 glc
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
; GFX900-MUBUF: ; %bb.0: ; %entry
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GFX900-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
-; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s32 offset:4095 glc
+; GFX900-MUBUF-NEXT: v_mov_b32_e32 v1, 44
+; GFX900-MUBUF-NEXT: buffer_load_ubyte_d16 v0, v1, s[0:3], s32 offen offset:4055 glc
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
; GFX900-MUBUF-NEXT: global_store_dword v[0:1], v0, off
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
; GFX906: ; %bb.0: ; %entry
; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX906-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GFX906-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
; GFX906-NEXT: s_waitcnt vmcnt(0)
-; GFX906-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095 glc
+; GFX906-NEXT: v_mov_b32_e32 v2, 44
+; GFX906-NEXT: buffer_load_ubyte v1, v2, s[0:3], s32 offen offset:4055 glc
; GFX906-NEXT: s_waitcnt vmcnt(0)
; GFX906-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX906-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX803: ; %bb.0: ; %entry
; GFX803-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX803-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32
+; GFX803-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4
; GFX803-NEXT: s_waitcnt vmcnt(0)
-; GFX803-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 offset:4095 glc
+; GFX803-NEXT: v_mov_b32_e32 v2, 44
+; GFX803-NEXT: buffer_load_ubyte v1, v2, s[0:3], s32 offen offset:4055 glc
; GFX803-NEXT: s_waitcnt vmcnt(0)
; GFX803-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX803-NEXT: s_mov_b32 s4, 0x5040c00
; GFX900-FLATSCR: ; %bb.0: ; %entry
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x7b
-; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32
+; GFX900-FLATSCR-NEXT: scratch_store_dword off, v1, s32 offset:4
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, s32 offset:4095 glc
+; GFX900-FLATSCR-NEXT: s_add_i32 vcc_hi, s32, 44
+; GFX900-FLATSCR-NEXT: scratch_load_ubyte_d16 v0, off, vcc_hi offset:4055 glc
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
; GFX900-FLATSCR-NEXT: global_store_dword v[0:1], v0, off
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_add_i32 s33, s32, 0x7ffc0
; MUBUF-NEXT: s_and_b32 s33, s33, 0xfff80000
; MUBUF-NEXT: v_lshrrev_b32_e64 v3, 6, s33
-; MUBUF-NEXT: v_add_u32_e32 v3, 0x1000, v3
+; MUBUF-NEXT: v_add_u32_e32 v3, 0x3000, v3
; MUBUF-NEXT: v_add_u32_e32 v2, 64, v3
; MUBUF-NEXT: v_mov_b32_e32 v4, 0
+; MUBUF-NEXT: v_mov_b32_e32 v5, 0x2000
; MUBUF-NEXT: s_mov_b32 s4, 0
-; MUBUF-NEXT: s_add_i32 s32, s32, 0x180000
-; MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], s33
+; MUBUF-NEXT: s_add_i32 s32, s32, 0x200000
+; MUBUF-NEXT: buffer_store_dword v4, v5, s[0:3], s33 offen
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: .LBB1_1: ; %loadstoreloop
; MUBUF-NEXT: ; =>This Inner Loop Header: Depth=1
; MUBUF-NEXT: s_cbranch_scc1 .LBB1_1
; MUBUF-NEXT: ; %bb.2: ; %split
; MUBUF-NEXT: v_lshrrev_b32_e64 v3, 6, s33
-; MUBUF-NEXT: v_add_u32_e32 v3, 0x1000, v3
+; MUBUF-NEXT: v_add_u32_e32 v3, 0x3000, v3
; MUBUF-NEXT: v_add_u32_e32 v3, 0x20d0, v3
; MUBUF-NEXT: buffer_load_dword v4, v3, s[0:3], 0 offen glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: buffer_load_dword v7, v2, s[0:3], 0 offen offset:4 glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
-; MUBUF-NEXT: s_add_i32 s32, s32, 0xffe80000
+; MUBUF-NEXT: s_add_i32 s32, s32, 0xffe00000
; MUBUF-NEXT: s_mov_b32 s33, s5
; MUBUF-NEXT: v_add_co_u32_e32 v2, vcc, v4, v6
; MUBUF-NEXT: v_addc_co_u32_e32 v3, vcc, v5, v7, vcc
; FLATSCR-NEXT: s_mov_b32 s2, s33
; FLATSCR-NEXT: s_add_i32 s33, s32, 0x1fff
; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000
+; FLATSCR-NEXT: s_add_i32 s32, s32, 0x8000
; FLATSCR-NEXT: v_mov_b32_e32 v2, 0
+; FLATSCR-NEXT: s_add_i32 vcc_hi, s33, 0x2000
; FLATSCR-NEXT: s_mov_b32 s0, 0
-; FLATSCR-NEXT: s_addk_i32 s32, 0x6000
-; FLATSCR-NEXT: scratch_store_dword off, v2, s33
+; FLATSCR-NEXT: scratch_store_dword off, v2, vcc_hi
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: .LBB1_1: ; %loadstoreloop
; FLATSCR-NEXT: ; =>This Inner Loop Header: Depth=1
-; FLATSCR-NEXT: s_add_i32 vcc_hi, s33, 0x1000
+; FLATSCR-NEXT: s_add_i32 vcc_hi, s33, 0x3000
; FLATSCR-NEXT: s_add_i32 s1, s0, vcc_hi
; FLATSCR-NEXT: s_add_i32 s0, s0, 1
; FLATSCR-NEXT: s_cmpk_lt_u32 s0, 0x2120
; FLATSCR-NEXT: s_cbranch_scc1 .LBB1_1
; FLATSCR-NEXT: ; %bb.2: ; %split
; FLATSCR-NEXT: s_movk_i32 s0, 0x2000
-; FLATSCR-NEXT: s_add_i32 s1, s33, 0x1000
+; FLATSCR-NEXT: s_add_i32 s1, s33, 0x3000
; FLATSCR-NEXT: s_add_i32 s0, s0, s1
; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s0 offset:208 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; FLATSCR-NEXT: s_add_i32 s0, s33, 0x1000
+; FLATSCR-NEXT: s_add_i32 s0, s33, 0x3000
; FLATSCR-NEXT: scratch_load_dwordx2 v[4:5], off, s0 offset:64 glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; FLATSCR-NEXT: s_addk_i32 s32, 0xa000
+; FLATSCR-NEXT: s_addk_i32 s32, 0x8000
; FLATSCR-NEXT: s_mov_b32 s33, s2
; FLATSCR-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
; FLATSCR-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
; CHECK-NEXT: s_mov_b32 s33, s32
; CHECK-NEXT: s_add_i32 s32, s32, 0x200
; CHECK-NEXT: v_mov_b32_e32 v0, 1
-; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4
+; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffe00
; CHECK-NEXT: s_mov_b32 s33, s4
; CHECK: liveins: $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
+ ; CHECK-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def $scc
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
- ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def dead $scc
+ ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
- ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc
- ; CHECK-NEXT: $vgpr3 = COPY killed $sgpr33
+ ; CHECK-NEXT: $vgpr0 = COPY killed $sgpr33
; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc
; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
+ ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
+ ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 16384, implicit-def $scc
+ ; CHECK-NEXT: $vgpr3 = COPY killed $sgpr33
+ ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -16384, implicit-def $scc
+ ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
- ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def dead $scc
+ ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
; CHECK-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
+ ; CHECK-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def $scc
; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; CHECK-NEXT: S_ENDPGM 0, implicit $vcc
; CHECK-NEXT: $sgpr29 = frame-setup COPY $sgpr33
; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
- ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def dead $scc
+ ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
- ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 8192, implicit-def $scc
- ; CHECK-NEXT: $vgpr2 = COPY killed $sgpr33
+ ; CHECK-NEXT: $vgpr0 = COPY killed $sgpr33
; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -8192, implicit-def $scc
; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
+ ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
+ ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 16384, implicit-def $scc
+ ; CHECK-NEXT: $vgpr2 = COPY killed $sgpr33
+ ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -16384, implicit-def $scc
+ ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr31
- ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def dead $scc
+ ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr29
; CHECK-NEXT: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK-NEXT: $sgpr28 = frame-setup COPY $sgpr33
; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
- ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def dead $scc
+ ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
- ; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; CHECK-NEXT: $sgpr29 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
; CHECK-NEXT: $sgpr29 = S_ADD_I32 killed $sgpr29, 8192, implicit-def $scc
+ ; CHECK-NEXT: $vgpr0 = COPY killed $sgpr29
+ ; CHECK-NEXT: $sgpr29 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
+ ; CHECK-NEXT: $sgpr29 = S_ADD_I32 killed $sgpr29, 16384, implicit-def $scc
; CHECK-NEXT: $vgpr2 = COPY killed $sgpr29
; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31
- ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def dead $scc
+ ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr28
; CHECK-NEXT: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK-NEXT: $sgpr28 = frame-setup COPY $sgpr33
; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
- ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def dead $scc
+ ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31
; CHECK-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
- ; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; CHECK-NEXT: $vcc_lo = S_MOV_B32 8192
+ ; CHECK-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr0, 0, implicit $exec
+ ; CHECK-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; CHECK-NEXT: $vcc_lo = S_MOV_B32 16384
; CHECK-NEXT: $vgpr2, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec
; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr31
- ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def dead $scc
+ ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
; CHECK-NEXT: $sgpr33 = frame-destroy COPY $sgpr28
; CHECK-NEXT: S_ENDPGM 0
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr30, implicit-def $sgpr31
; MUBUF: liveins: $vgpr1, $vgpr2
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; MUBUF-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
+ ; MUBUF-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def $scc
; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; MUBUF-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; MUBUF-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
- ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def dead $scc
+ ; MUBUF-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; MUBUF-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; MUBUF-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; MUBUF-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec
; MUBUF-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
- ; MUBUF-NEXT: $vgpr3 = V_ADD_U32_e32 8192, killed $vgpr3, implicit $exec
+ ; MUBUF-NEXT: $vgpr3 = V_ADD_U32_e32 16384, killed $vgpr3, implicit $exec
; MUBUF-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
- ; MUBUF-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def dead $scc
+ ; MUBUF-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
; MUBUF-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; MUBUF-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; MUBUF-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
+ ; MUBUF-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def $scc
; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
; MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; MUBUF-NEXT: S_ENDPGM 0, implicit $vcc
; FLATSCR: liveins: $vgpr1, $vgpr2
; FLATSCR-NEXT: {{ $}}
; FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc
+ ; FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def $scc
; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
; FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
- ; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
+ ; FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
; FLATSCR-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
- ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec
; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
- ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 $sgpr33, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
+ ; FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec
; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, -8192, implicit-def $scc
- ; FLATSCR-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc
+ ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, 16384, implicit-def $scc
+ ; FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 $sgpr33, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
+ ; FLATSCR-NEXT: $sgpr33 = S_ADD_I32 $sgpr33, -16384, implicit-def $scc
+ ; FLATSCR-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
; FLATSCR-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc
+ ; FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def $scc
; FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
; FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; FLATSCR-NEXT: S_ENDPGM 0, implicit $vcc
; CHECK: liveins: $vgpr1, $vgpr2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+ ; CHECK-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 262400, implicit-def $scc
+ ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; CHECK-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 262080, implicit-def $scc
; CHECK-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294705152, implicit-def dead $scc
- ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 524288, implicit-def dead $scc
+ ; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 786432, implicit-def dead $scc
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
- ; CHECK-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
+ ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, 4096, implicit-def $scc
+ ; CHECK-NEXT: $vgpr3 = COPY killed $sgpr33
+ ; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -4096, implicit-def $scc
+ ; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
- ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -524288, implicit-def dead $scc
+ ; CHECK-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -786432, implicit-def dead $scc
; CHECK-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; CHECK-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+ ; CHECK-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 262400, implicit-def $scc
+ ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; CHECK-NEXT: S_ENDPGM 0, implicit $vcc
S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GFX8-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
+ ; GFX8-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def $scc
; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GFX8-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
- ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def dead $scc
+ ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
- ; GFX8-NEXT: $sgpr7 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc
- ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr7, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
- ; GFX8-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
; GFX8-NEXT: $vcc_lo = S_MOV_B32 8192
+ ; GFX8-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr0, 0, implicit $exec
+ ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+ ; GFX8-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
+ ; GFX8-NEXT: $vcc_lo = S_MOV_B32 16384
; GFX8-NEXT: $vgpr3, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec
; GFX8-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec
- ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def dead $scc
+ ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
; GFX8-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; GFX8-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GFX8-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
+ ; GFX8-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def $scc
; GFX8-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
- ; GFX8-NEXT: $sgpr4 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc
- ; GFX8-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+ ; GFX8-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
; GFX8-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs
; GFX9-LABEL: name: pei_scavenge_vgpr_spill
; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GFX9-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
+ ; GFX9-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def $scc
; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GFX9-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; GFX9-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
; GFX9-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
- ; GFX9-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 1572864, implicit-def dead $scc
+ ; GFX9-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; GFX9-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
- ; GFX9-NEXT: $sgpr7 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc
- ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr7, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+ ; GFX9-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec
+ ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
; GFX9-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
- ; GFX9-NEXT: $vgpr3 = V_ADD_U32_e32 8192, killed $vgpr3, implicit $exec
+ ; GFX9-NEXT: $vgpr3 = V_ADD_U32_e32 16384, killed $vgpr3, implicit $exec
; GFX9-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec
- ; GFX9-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -1572864, implicit-def dead $scc
+ ; GFX9-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc
; GFX9-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; GFX9-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GFX9-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 524544, implicit-def $scc
+ ; GFX9-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def $scc
; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
- ; GFX9-NEXT: $sgpr4 = S_ADD_I32 $sgpr33, 524800, implicit-def $scc
- ; GFX9-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+ ; GFX9-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
; GFX9-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs
; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill
; GFX9-FLATSCR: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2
; GFX9-FLATSCR-NEXT: {{ $}}
; GFX9-FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GFX9-FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc
+ ; GFX9-FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def $scc
; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GFX9-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2
; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
- ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
- ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr33, implicit $exec
+ ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADD_I32 $sgpr33, 8192, implicit-def $scc
+ ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vcc_hi, implicit $exec
+ ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADD_I32 $sgpr33, 16384, implicit-def $scc
; GFX9-FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $vcc_hi, $vgpr1, implicit $exec
- ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24576, implicit-def dead $scc
+ ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc
; GFX9-FLATSCR-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0
; GFX9-FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
- ; GFX9-FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 8196, implicit-def $scc
+ ; GFX9-FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def $scc
; GFX9-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; GFX9-FLATSCR-NEXT: S_ENDPGM 0, csr_amdgpu_allvgprs
; 0x40000 / 64 = 4096 (for wave64)
%a = load volatile i32, i32 addrspace(5)* %aptr
- ; MUBUF: s_add_i32 s32, s32, 0x40000
+ ; MUBUF: s_add_i32 s32, s32, 0x40100
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s32 ; 4-byte Folded Spill
- ; MUBUF: s_add_i32 s32, s32, 0xfffc0000
- ; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1000
+ ; MUBUF: s_add_i32 s32, s32, 0xfffbff00
+ ; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1004
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill
call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0.0, i32 %asm1.0, i32 %asm2.0, i32 %asm3.0, i32 %asm4.0, i32 %asm5.0, i32 %asm6.0, i32 %asm7.0, i32 %a)
call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
- ; MUBUF: s_add_i32 s32, s32, 0x40000
+ ; MUBUF: s_add_i32 s32, s32, 0x40100
; MUBUF: buffer_load_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s32 ; 4-byte Folded Reload
- ; MUBUF: s_add_i32 s32, s32, 0xfffc0000
- ; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1000
+ ; MUBUF: s_add_i32 s32, s32, 0xfffbff00
+ ; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1004
; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, [[SOFF]] ; 4-byte Folded Reload
; Force %a to spill with no free SGPRs
; GCN-LABEL: test_inst_offset_function
define void @test_inst_offset_function() {
entry:
- ; Occupy 4092 bytes of scratch, so the offset of the spill of %a just fits in
- ; the instruction offset field.
- %alloca = alloca i8, i32 4092, align 4, addrspace(5)
+ ; Occupy enough bytes of scratch, so the offset of the spill of %a
+ ; just fits in the instruction offset field when the emergency stack
+ ; slot is added. It's hard to hit the actual limit since we're also
+ ; going to insert the emergency stack slot for large frames.
+ %alloca = alloca i8, i32 4088, align 4, addrspace(5)
%buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
%aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
- ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4092 ; 4-byte Folded Spill
- ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s{{[0-9]+}} offset:4092 ; 4-byte Folded Spill
+ ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill
+ ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill
%a = load volatile i32, i32 addrspace(5)* %aptr
; Force %a to spill.
%aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
; 0x40000 / 64 = 4096 (for wave64)
- ; MUBUF: s_add_i32 s4, s32, 0x40000
+ ; MUBUF: s_add_i32 s4, s32, 0x40100
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill
- ; FLATSCR: s_add_i32 s0, s32, 0x1000
+ ; FLATSCR: s_add_i32 s0, s32, 0x1004
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s0 ; 4-byte Folded Spill
%a = load volatile i32, i32 addrspace(5)* %aptr
; GCN-LABEL: test_sgpr_offset_subregs_function
define void @test_sgpr_offset_subregs_function() {
entry:
- ; Occupy 4088 bytes of scratch, so that the spill of the last subreg of %a
- ; still fits below offset 4096 (4088 + 8 - 4 = 4092), and can be placed in
+ ; We want to test the spill of the last subreg of %a is the highest
+ ; valid value for the immediate offset. We enable the emergency
+ ; stack slot for large frames, so it's hard to get the frame layout
+ ; exactly as we want to test it.
+ ;
+ ; Occupy 4084 bytes of scratch, so that the spill of the last subreg of %a
+ ; still fits below offset 4096 (4084 + 8 - 4 = 4092), and can be placed in
; the instruction offset field.
- %alloca = alloca i8, i32 4088, align 4, addrspace(5)
+ %alloca = alloca i8, i32 4084, align 4, addrspace(5)
%bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
%bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
+ ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4084 ; 4-byte Folded Spill
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill
- ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4092 ; 4-byte Folded Spill
- ; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], s32 offset:4088 ; 8-byte Folded Spill
+ ; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], s32 offset:4084 ; 8-byte Folded Spill
%aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1
%a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr
; GCN-LABEL: test_inst_offset_subregs_function
define void @test_inst_offset_subregs_function() {
entry:
- ; Occupy 4092 bytes of scratch, so that the spill of the last subreg of %a
- ; does not fit below offset 4096 (4092 + 8 - 4 = 4096), and has to live
+ ; Occupy 4088 bytes of scratch, so that the spill of the last subreg of %a
+ ; does not fit below offset 4096 (408 + 4 + 8 - 4 = 4096), and has to live
; in the SGPR offset.
- %alloca = alloca i8, i32 4092, align 4, addrspace(5)
+ %alloca = alloca i8, i32 4088, align 4, addrspace(5)
%bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
%bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)*
- ; 0x3ff00 / 64 = 4092 (for wave64)
+ ; 0x3ff0000 / 64 = 4092 (for wave64)
; MUBUF: s_add_i32 s4, s32, 0x3ff00
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 offset:4 ; 4-byte Folded Spill
; GCN: s_waitcnt
; GFX900-MUBUF: buffer_store_dword
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
-; GFX900-MUBUF-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s32 offset:4094
+; GFX900-MUBUF-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s32 offset:4058
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
; GFX900-FLATSCR: scratch_store_dword
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX900-FLATSCR-NEXT: scratch_store_short_d16_hi off, v0, s32 offset:4094
+; GFX900-FLATSCR-NEXT: scratch_store_short_d16_hi off, v0, s32 offset:4058
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-define void @store_private_hi_v2i16_to_offset(i32 %arg) #0 {
+define void @store_private_hi_v2i16_to_offset(i32 %arg, [10 x i32] addrspace(5)* %obj0) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4, addrspace(5)
%obj1 = alloca [4096 x i16], align 2, addrspace(5)
%bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
store volatile i32 123, i32 addrspace(5)* %bc
; GCN: s_waitcnt
; GFX900-MUBUF: buffer_store_dword
; GFX900-MUBUF-NEXT: s_waitcnt vmcnt(0)
-; GFX900-MUBUF-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s32 offset:4095
+; GFX900-MUBUF-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s32 offset:4059
; GFX900-FLATSCR: scratch_store_dword
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; GFX900-FLATSCR-NEXT: scratch_store_byte_d16_hi off, v0, s32 offset:4095
+; GFX900-FLATSCR-NEXT: scratch_store_byte_d16_hi off, v0, s32 offset:4059
; GFX900-FLATSCR-NEXT: s_waitcnt vmcnt(0)
-define void @store_private_hi_v2i16_i8_to_offset(i32 %arg) #0 {
+define void @store_private_hi_v2i16_i8_to_offset(i32 %arg, [10 x i32] addrspace(5)* %obj0) #0 {
entry:
- %obj0 = alloca [10 x i32], align 4, addrspace(5)
%obj1 = alloca [4096 x i8], align 2, addrspace(5)
%bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)*
store volatile i32 123, i32 addrspace(5)* %bc