std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
SDLoc DL(N);
- const MachineFunction &MF = CurDAG->getMachineFunction();
- const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
- SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
- FI->getValueType(0));
-
- // If we can resolve this to a frame index access, this will be relative to
- // either the stack or frame pointer SGPR.
- return std::make_pair(
- TFI, CurDAG->getRegister(Info->getStackPtrOffsetReg(), MVT::i32));
- }
+ auto *FI = dyn_cast<FrameIndexSDNode>(N);
+ SDValue TFI =
+ FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N;
- // If we don't know this private access is a local stack object, it needs to
- // be relative to the entry point's scratch wave offset.
- return std::make_pair(N, CurDAG->getTargetConstant(0, DL, MVT::i32));
+ // We rebase the base address into an absolute stack address and hence
+ // use constant 0 for soffset.
+ return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32));
}
bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
assert(TII->isMUBUF(MI));
MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
- assert(SOffset->getReg() ==
- MF->getInfo<SIMachineFunctionInfo>()->getStackPtrOffsetReg() &&
- "should only be seeing stack pointer offset relative FrameIndex");
+ assert((SOffset->isReg() &&
+ SOffset->getReg() ==
+ MF->getInfo<SIMachineFunctionInfo>()->getStackPtrOffsetReg()) ||
+ (SOffset->isImm() && SOffset->getImm() == 0));
MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
int64_t NewOffset = OffsetOp->getImm() + Offset;
FIOp->ChangeToRegister(BaseReg, false);
OffsetOp->setImm(NewOffset);
-
- // The move materializing the base address will be an absolute stack address,
- // so clear the base offset.
- SOffset->ChangeToImmediate(0);
}
bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
} else {
SOffset.setReg(FrameReg);
}
+ } else if (SOffset.isImm() && FrameReg != AMDGPU::NoRegister) {
+ SOffset.ChangeToRegister(FrameReg, false);
}
int64_t Offset = FrameInfo.getObjectOffset(Index);
; GCN-LABEL: {{^}}tail_call_byval_align16:
; GCN-NOT: s32
-; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12
-; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8
+; GCN: buffer_load_dword [[VREG1:v[0-9]+]], off, s[0:3], s32 offset:8
+; GCN: buffer_load_dword [[VREG2:v[0-9]+]], off, s[0:3], s32 offset:12
; GCN: s_getpc_b64
-; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4
-; GCN: buffer_store_dword v33, off, s[0:3], s32{{$}}
+; GCN: buffer_store_dword [[VREG2]], off, s[0:3], s32 offset:4
+; GCN: buffer_store_dword [[VREG1]], off, s[0:3], s32{{$}}
; GCN-NOT: s32
; GCN: s_setpc_b64
define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
; VARABI: enable_vgpr_workitem_id = 0
; VARABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
-; VARABI: buffer_store_dword [[K]], off, s[0:3], 0 offset:4
-; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0 offset:4
; VARABI: s_movk_i32 s32, 0x400{{$}}
-
-; VARABI-NOT: s32
+; VARABI: buffer_store_dword [[K]], off, s[0:3], 0 offset:4
; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4
+; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0 offset:4
; VARABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
; VARABI: v_mov_b32_e32 [[RELOAD_BYVAL]],
; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
; VARABI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
; VARABI: buffer_store_dword [[K]], off, s[0:3], s33{{$}}
-; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}}
; VARABI: buffer_store_dword v0, off, s[0:3], s32 offset:4
+; VARABI: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}}
; VARABI: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}}
; VARABI: v_mov_b32_e32 [[RELOAD_BYVAL]],
; VARABI: s_swappc_b64