These are only needed as VGPR counterpart.
Differential Revision: https://reviews.llvm.org/D78597
AMDGPU::VGPR_HI16RegClass.contains(Reg)) {
IsSGPR = false;
Width = 1;
- } else if (AMDGPU::AGPR_32RegClass.contains(Reg)) {
+ } else if (AMDGPU::AGPR_32RegClass.contains(Reg) ||
+ AMDGPU::AGPR_LO16RegClass.contains(Reg)) {
IsSGPR = false;
IsAGPR = true;
Width = 1;
def VCCRegBank : RegisterBank <"VCC", [SReg_1]>;
def AGPRRegBank : RegisterBank <"AGPR",
- [AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_192, AReg_256, AReg_512, AReg_1024]
+ [AGPR_LO16, AGPR_32, AReg_64, AReg_96, AReg_128, AReg_160, AReg_192, AReg_256, AReg_512, AReg_1024]
>;
MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_160RegClassID).contains(Reg) ||
MRI.getRegClass(AMDGPU::AReg_192RegClassID).contains(Reg) ||
- MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg))
+ MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg))
Enc |= 512;
return Enc;
if (RI.getRegSizeInBits(*RC) == 16) {
assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
AMDGPU::VGPR_HI16RegClass.contains(SrcReg) ||
- AMDGPU::SReg_LO16RegClass.contains(SrcReg));
+ AMDGPU::SReg_LO16RegClass.contains(SrcReg) ||
+ AMDGPU::AGPR_LO16RegClass.contains(SrcReg));
bool IsSGPRDst = AMDGPU::SReg_LO16RegClass.contains(DestReg);
bool IsSGPRSrc = AMDGPU::SReg_LO16RegClass.contains(SrcReg);
+ bool IsAGPRDst = AMDGPU::AGPR_LO16RegClass.contains(DestReg);
+ bool IsAGPRSrc = AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
bool DstLow = AMDGPU::VGPR_LO16RegClass.contains(DestReg) ||
- AMDGPU::SReg_LO16RegClass.contains(DestReg);
+ AMDGPU::SReg_LO16RegClass.contains(DestReg) ||
+ AMDGPU::AGPR_LO16RegClass.contains(DestReg);
bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
- AMDGPU::SReg_LO16RegClass.contains(SrcReg);
- const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SReg_32RegClass
+ AMDGPU::SReg_LO16RegClass.contains(SrcReg) ||
+ AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
+ const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SGPR_32RegClass
+ : IsAGPRDst ? &AMDGPU::AGPR_32RegClass
: &AMDGPU::VGPR_32RegClass;
- const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SReg_32RegClass
+ const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass
+ : IsAGPRSrc ? &AMDGPU::AGPR_32RegClass
: &AMDGPU::VGPR_32RegClass;
MCRegister NewDestReg =
RI.getMatchingSuperReg(DestReg, DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
return;
}
+ if (IsAGPRDst || IsAGPRSrc) {
+ if (!DstLow || !SrcLow) {
+ reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc,
+ "Cannot use hi16 subreg with an AGPR!");
+ }
+
+ copyPhysReg(MBB, MI, DL, NewDestReg, NewSrcReg, KillSrc);
+ return;
+ }
+
if (IsSGPRSrc && !ST.hasSDWAScalar()) {
if (!DstLow || !SrcLow) {
reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc,
Reserved.set(Low);
}
+ for (auto Reg : AMDGPU::AGPR_32RegClass) {
+ Reserved.set(getSubReg(Reg, AMDGPU::hi16));
+ }
+
// Reserve all the rest AGPRs if there are no instructions to use it.
if (!ST.hasMAIInsts()) {
for (unsigned i = 0; i < MaxNumVGPRs; ++i) {
const TargetRegisterClass *
SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) {
switch (BitWidth) {
+ case 16:
+ return &AMDGPU::AGPR_LO16RegClass;
case 32:
return &AMDGPU::AGPR_32RegClass;
case 64:
&AMDGPU::VGPR_LO16RegClass,
&AMDGPU::VGPR_HI16RegClass,
&AMDGPU::SReg_LO16RegClass,
+ &AMDGPU::AGPR_LO16RegClass,
&AMDGPU::VGPR_32RegClass,
&AMDGPU::SReg_32RegClass,
&AMDGPU::AGPR_32RegClass,
bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const {
unsigned Size = getRegSizeInBits(*RC);
- if (Size < 32)
+ if (Size < 16)
return false;
const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
if (!ARC) {
// AccVGPR registers
foreach Index = 0-255 in {
- def AGPR#Index :
- SIReg <"a"#Index, Index>,
- DwarfRegNum<[!add(Index, 3072), !add(Index, 2048)]> {
- let HWEncoding{8} = 1;
- }
+ defm AGPR#Index :
+ SIRegLoHi16 <"a"#Index, Index, 1, 1>,
+ DwarfRegNum<[!add(Index, 3072), !add(Index, 2048)]>;
}
//===----------------------------------------------------------------------===//
// VGPR 1024-bit registers
def VGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, VGPR_32, 255, 1, 32, "v">;
+def AGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
+ (add (sequence "AGPR%u_LO16", 0, 255))> {
+ let isAllocatable = 0;
+ let Size = 16;
+ let GeneratePressureSet = 0;
+}
+
// AccVGPR 32-bit registers
def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "AGPR%u", 0, 255))> {
// (move from MC* level to Target* level). Return size in bits.
unsigned getRegBitWidth(unsigned RCID) {
switch (RCID) {
+ case AMDGPU::VGPR_LO16RegClassID:
+ case AMDGPU::VGPR_HI16RegClassID:
+ case AMDGPU::SGPR_LO16RegClassID:
+ case AMDGPU::AGPR_LO16RegClassID:
+ return 16;
case AMDGPU::SGPR_32RegClassID:
case AMDGPU::VGPR_32RegClassID:
case AMDGPU::VRegOrLds_32RegClassID:
$vgpr1_hi16 = COPY killed $sgpr0_lo16
S_ENDPGM 0
...
+
+# GCN-LABEL: {{^}}lo_to_lo_illegal_agpr_to_sgpr:
+# GCN: ; illegal copy a0.l to s1.l
+# ERR: error: <unknown>:0:0: in function lo_to_lo_illegal_agpr_to_sgpr void (): illegal SGPR to VGPR copy
+name: lo_to_lo_illegal_agpr_to_sgpr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $agpr0 = IMPLICIT_DEF
+ $sgpr1_lo16 = COPY $agpr0_lo16
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: {{^}}lo_to_hi_vgpr_to_agpr:
+# GCN: ; illegal copy v0.h to a1.l
+# ERR: error: <unknown>:0:0: in function lo_to_hi_vgpr_to_agpr void (): Cannot use hi16 subreg with an AGPR!
+name: lo_to_hi_vgpr_to_agpr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $vgpr0 = IMPLICIT_DEF
+ $agpr1_lo16 = COPY killed $vgpr0_hi16
+ S_ENDPGM 0
+...
--- /dev/null
+# RUN: llc -march=amdgcn -mcpu=gfx908 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+# GCN-LABEL: {{^}}lo_to_lo_agpr_to_agpr:
+# GCN: v_accvgpr_read_b32 [[TMP:v[0-9]+]], a0
+# GCN-NEXT: s_nop 1
+# GCN-NEXT: v_accvgpr_write_b32 a1, [[TMP]]
+name: lo_to_lo_agpr_to_agpr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $agpr0 = IMPLICIT_DEF
+ $agpr1_lo16 = COPY $agpr0_lo16
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: {{^}}lo_to_lo_samereg:
+# GCN: s_waitcnt
+# GCN-NEXT: s_endpgm
+name: lo_to_lo_samereg
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $agpr0 = IMPLICIT_DEF
+ $agpr0_lo16 = COPY $agpr0_lo16
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: {{^}}lo_to_lo_undef_agpr_to_agpr:
+# GCN: v_accvgpr_read_b32 [[TMP:v[0-9]+]], a1
+# GCN-NEXT: s_nop 1
+# GCN-NEXT: v_accvgpr_write_b32 a2, [[TMP]]
+name: lo_to_lo_undef_agpr_to_agpr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $agpr1_lo16 = COPY undef $agpr0_lo16
+ $agpr2 = COPY killed $agpr1
+ S_ENDPGM 0
+...
+
+# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_agpr:
+# GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], s0
+# GCN-NEXT: s_nop 1
+# GCN-NEXT: v_accvgpr_write_b32 a1, [[TMP]]
+name: lo_to_lo_sgpr_to_agpr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ $sgpr0 = IMPLICIT_DEF
+ $agpr1_lo16 = COPY $sgpr0_lo16
+ S_ENDPGM 0
+...
; CHECK: dead %9:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; CHECK: undef %11.sub1:vreg_512 = COPY [[COPY]].sub1
- ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def dead [[COPY1]], 851978 /* regdef:SReg_LO16 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1
+ ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def dead [[COPY1]], 851978 /* regdef:VGPR_LO16 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1
; CHECK: %11.sub0:vreg_512 = COPY [[COPY]].sub0
; CHECK: %11.sub3:vreg_512 = COPY [[COPY]].sub3
; CHECK: dead %10:vgpr_32 = V_ADD_I32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec
; CHECK: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK: bb.1:
; CHECK: successors: %bb.1(0x80000000)
- ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def dead %11
+ ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def dead %11
; CHECK: GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
; CHECK: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3)
- ; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def %15, 851978 /* regdef:SReg_LO16 */, def %16
+ ; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def %15, 851978 /* regdef:VGPR_LO16 */, def %16
; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec
; CHECK: [[DS_READ_B32_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
; CHECK: [[DS_READ_B32_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec
- ; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def %21, 851978 /* regdef:SReg_LO16 */, def %22
+ ; CHECK: INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def %21, 851978 /* regdef:VGPR_LO16 */, def %22
; CHECK: [[DS_READ_B32_gfx9_3:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
- ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def dead [[V_MOV_B32_e32_2]], 851978 /* regdef:SReg_LO16 */, def dead [[V_MOV_B32_e32_3]], 851977 /* reguse:SReg_LO16 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_3]](tied-def 5), 851977 /* reguse:SReg_LO16 */, %15, 851977 /* reguse:SReg_LO16 */, %16, 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_1]], 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_]], 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_3]], 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_2]]
+ ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def dead [[V_MOV_B32_e32_2]], 851978 /* regdef:VGPR_LO16 */, def dead [[V_MOV_B32_e32_3]], 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_3]](tied-def 5), 851977 /* reguse:VGPR_LO16 */, %15, 851977 /* reguse:VGPR_LO16 */, %16, 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_1]], 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_]], 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_3]], 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_2]]
; CHECK: %5.sub1:vreg_64 = COPY [[V_MOV_B32_e32_]]
; CHECK: DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3)
; CHECK: DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3)
; CHECK: bb.1:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3)
- ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
- ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_]]
- ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def undef %0.sub0, 851978 /* regdef:SReg_LO16 */, def undef %0.sub1
+ ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
+ ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_]]
+ ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def undef %0.sub0, 851978 /* regdef:VGPR_LO16 */, def undef %0.sub1
; CHECK: S_NOP 0, implicit %0.sub1
; CHECK: $sgpr10 = S_MOV_B32 -1
; CHECK: S_BRANCH %bb.1
; CHECK: bb.1:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3)
- ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
- ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SReg_LO16 */, [[DS_READ_B32_gfx9_]]
- ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SReg_LO16 */, def undef %0.sub1, 851978 /* regdef:SReg_LO16 */, def undef %0.sub0
+ ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
+ ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:VGPR_LO16 */, [[DS_READ_B32_gfx9_]]
+ ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def undef %0.sub1, 851978 /* regdef:VGPR_LO16 */, def undef %0.sub0
; CHECK: S_NOP 0, implicit %0.sub1
; CHECK: $sgpr10 = S_MOV_B32 -1
; CHECK: S_BRANCH %bb.1