IncomingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
CCAssignFn *AssignFn)
: ValueHandler(true, MIRBuilder, MRI, AssignFn) {}
+
+ /// Insert G_ASSERT_ZEXT/G_ASSERT_SEXT or other hint instruction based on \p
+ /// VA, returning the new register if a hint was inserted.
+ Register buildExtensionHint(CCValAssign &VA, Register SrcReg, LLT NarrowTy);
+
+ /// Provides a default implementation for argument handling.
+ void assignValueToReg(Register ValVReg, Register PhysReg,
+ CCValAssign &VA) override;
};
struct OutgoingValueHandler : public ValueHandler {
}
void CallLowering::ValueHandler::anchor() {}
+
+Register CallLowering::IncomingValueHandler::buildExtensionHint(CCValAssign &VA,
+ Register SrcReg,
+ LLT NarrowTy) {
+ switch (VA.getLocInfo()) {
+ case CCValAssign::LocInfo::ZExt: {
+ return MIRBuilder
+ .buildAssertZExt(MRI.cloneVirtualRegister(SrcReg), SrcReg,
+ NarrowTy.getScalarSizeInBits())
+ .getReg(0);
+ }
+ case CCValAssign::LocInfo::SExt: {
+ return MIRBuilder
+ .buildAssertSExt(MRI.cloneVirtualRegister(SrcReg), SrcReg,
+ NarrowTy.getScalarSizeInBits())
+ .getReg(0);
+ break;
+ }
+ default:
+ return SrcReg;
+ }
+}
+
+void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg,
+ Register PhysReg,
+ CCValAssign &VA) {
+ const LLT LocTy(VA.getLocVT());
+ const LLT ValTy = MRI.getType(ValVReg);
+
+ if (ValTy.getSizeInBits() == LocTy.getSizeInBits()) {
+ MIRBuilder.buildCopy(ValVReg, PhysReg);
+ return;
+ }
+
+ auto Copy = MIRBuilder.buildCopy(LocTy, PhysReg);
+ auto Hint = buildExtensionHint(VA, Copy.getReg(0), ValTy);
+ MIRBuilder.buildTrunc(ValVReg, Hint);
+}
void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
markPhysRegUsed(PhysReg);
- switch (VA.getLocInfo()) {
- default:
- MIRBuilder.buildCopy(ValVReg, PhysReg);
- break;
- case CCValAssign::LocInfo::ZExt: {
- auto WideTy = LLT{VA.getLocVT()};
- auto NarrowTy = MRI.getType(ValVReg);
- MIRBuilder.buildTrunc(ValVReg,
- MIRBuilder.buildAssertZExt(
- WideTy, MIRBuilder.buildCopy(WideTy, PhysReg),
- NarrowTy.getSizeInBits()));
- break;
- }
- case CCValAssign::LocInfo::SExt: {
- auto WideTy = LLT{VA.getLocVT()};
- auto NarrowTy = MRI.getType(ValVReg);
- MIRBuilder.buildTrunc(ValVReg,
- MIRBuilder.buildAssertSExt(
- WideTy, MIRBuilder.buildCopy(WideTy, PhysReg),
- NarrowTy.getSizeInBits()));
- break;
- }
- case CCValAssign::LocInfo::AExt: {
- auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
- MIRBuilder.buildTrunc(ValVReg, Copy);
- break;
- }
- }
+ IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t MemSize,
namespace {
-struct AMDGPUValueHandler : public CallLowering::ValueHandler {
- AMDGPUValueHandler(bool IsIncoming, MachineIRBuilder &B,
- MachineRegisterInfo &MRI, CCAssignFn *AssignFn)
- : ValueHandler(IsIncoming, B, MRI, AssignFn) {}
-
- /// Wrapper around extendRegister to ensure we extend to a full 32-bit
- /// register.
- Register extendRegisterMin32(Register ValVReg, CCValAssign &VA) {
- if (VA.getLocVT().getSizeInBits() < 32) {
- // 16-bit types are reported as legal for 32-bit registers. We need to
- // extend and do a 32-bit copy to avoid the verifier complaining about it.
- return MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
- }
-
- return extendRegister(ValVReg, VA);
+/// Wrapper around extendRegister to ensure we extend to a full 32-bit register.
+static Register extendRegisterMin32(CallLowering::ValueHandler &Handler,
+ Register ValVReg, CCValAssign &VA) {
+ if (VA.getLocVT().getSizeInBits() < 32) {
+ // 16-bit types are reported as legal for 32-bit registers. We need to
+ // extend and do a 32-bit copy to avoid the verifier complaining about it.
+ return Handler.MIRBuilder.buildAnyExt(LLT::scalar(32), ValVReg).getReg(0);
}
-};
-struct AMDGPUOutgoingValueHandler : public AMDGPUValueHandler {
+ return Handler.extendRegister(ValVReg, VA);
+}
+
+struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
AMDGPUOutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB, CCAssignFn *AssignFn)
- : AMDGPUValueHandler(false, B, MRI, AssignFn), MIB(MIB) {}
+ : OutgoingValueHandler(B, MRI, AssignFn), MIB(MIB) {}
MachineInstrBuilder MIB;
void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
- Register ExtReg = extendRegisterMin32(ValVReg, VA);
+ Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
// If this is a scalar return, insert a readfirstlane just in case the value
// ends up in a VGPR.
}
};
-struct AMDGPUIncomingArgHandler : public AMDGPUValueHandler {
+struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
uint64_t StackUsed = 0;
AMDGPUIncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
CCAssignFn *AssignFn)
- : AMDGPUValueHandler(true, B, MRI, AssignFn) {}
+ : IncomingValueHandler(B, MRI, AssignFn) {}
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
// 16-bit types are reported as legal for 32-bit registers. We need to do
// a 32-bit copy, and truncate to avoid the verifier complaining about it.
auto Copy = MIRBuilder.buildCopy(LLT::scalar(32), PhysReg);
- MIRBuilder.buildTrunc(ValVReg, Copy);
+
+ // If we have signext/zeroext, it applies to the whole 32-bit register
+ // before truncation.
+ auto Extended =
+ buildExtensionHint(VA, Copy.getReg(0), LLT(VA.getLocVT()));
+ MIRBuilder.buildTrunc(ValVReg, Extended);
return;
}
- switch (VA.getLocInfo()) {
- case CCValAssign::LocInfo::SExt:
- case CCValAssign::LocInfo::ZExt:
- case CCValAssign::LocInfo::AExt: {
- auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
- MIRBuilder.buildTrunc(ValVReg, Copy);
- break;
- }
- default:
- MIRBuilder.buildCopy(ValVReg, PhysReg);
- break;
- }
+ IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
void assignValueToAddress(Register ValVReg, Register Addr, uint64_t MemSize,
MachineInstrBuilder MIB;
};
-struct AMDGPUOutgoingArgHandler : public AMDGPUValueHandler {
- MachineInstrBuilder MIB;
+struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
CCAssignFn *AssignFnVarArg;
/// For tail calls, the byte offset of the call's argument area from the
MachineRegisterInfo &MRI, MachineInstrBuilder MIB,
CCAssignFn *AssignFn, CCAssignFn *AssignFnVarArg,
bool IsTailCall = false, int FPDiff = 0)
- : AMDGPUValueHandler(false, MIRBuilder, MRI, AssignFn), MIB(MIB),
+ : AMDGPUOutgoingValueHandler(MIRBuilder, MRI, MIB, AssignFn),
AssignFnVarArg(AssignFnVarArg), FPDiff(FPDiff), IsTailCall(IsTailCall) {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
- Register ExtReg = extendRegisterMin32(ValVReg, VA);
+ Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
}
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 1
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 1
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 8
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 8
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
ret void
}
+; The signext is an no-op
+define void @void_func_i32_signext(i32 signext %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_i32_signext
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]]
+ store i32 %arg0, i32 addrspace(1)* undef
+ ret void
+}
+
+; The zeroext is an no-op
+define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 {
+ ; CHECK-LABEL: name: void_func_i32_zeroext
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+ ; CHECK: S_SETPC_B64_return [[COPY2]]
+ store i32 %arg0, i32 addrspace(1)* undef
+ ret void
+}
+
define void @void_func_p3i8(i8 addrspace(3)* %arg0) #0 {
; CHECK-LABEL: name: void_func_p3i8
; CHECK: bb.1 (%ir-block.0):
; GFX7-LABEL: v_mul_i16_zeroext:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX7-NEXT: s_mov_b32 s4, 0xffff
-; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
-; GFX7-NEXT: v_and_b32_e32 v1, s4, v1
; GFX7-NEXT: v_mul_u32_u24_e32 v0, v0, v1
-; GFX7-NEXT: v_and_b32_e32 v0, s4, v0
+; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_mul_i16_zeroext: