const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
unsigned Size = TRI->getRegSizeInBits(*RC);
- if (Size > 32)
+ if (Size == 16)
+ Reg = TRI->get32BitRegister(Reg);
+ else if (Size > 32)
Reg = TRI->getSubReg(Reg, AMDGPU::sub0);
if (TRI->hasVGPRs(RC)) {
}
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- unsigned Size = TRI->getRegSizeInBits(*RC) / 32;
- if (Size > 1)
- Reg = TRI->getSubReg(Reg, AMDGPU::sub0);
+ unsigned Size = TRI->getRegSizeInBits(*RC);
+
+ if (Size == 16) {
+ Reg = TRI->get32BitRegister(Reg);
+ Size = 1;
+ } else {
+ Size /= 32;
+ if (Size > 1)
+ Reg = TRI->getSubReg(Reg, AMDGPU::sub0);
+ }
if (TRI->hasVGPRs(RC)) {
// VGPRs have 4 banks assigned in a round-robin fashion.
}
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg);
+ unsigned Size = TRI->getRegSizeInBits(*RC);
+
+ // TODO: Support 16 bit registers. Those needs to be moved with their
+ // parent VGPR_32 and potentially a sibling 16 bit sub-register.
+ if (Size < 32)
+ return false;
+
if (TRI->hasVGPRs(RC))
return true;
- unsigned Size = TRI->getRegSizeInBits(*RC);
+ if (Size == 16)
+ return AMDGPU::SGPR_LO16RegClass.contains(PhysReg);
+
if (Size > 32)
PhysReg = TRI->getSubReg(PhysReg, AMDGPU::sub0);
bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
AMDGPU::SReg_LO16RegClass.contains(SrcReg) ||
AMDGPU::AGPR_LO16RegClass.contains(SrcReg);
- const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SGPR_32RegClass
- : IsAGPRDst ? &AMDGPU::AGPR_32RegClass
- : &AMDGPU::VGPR_32RegClass;
- const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass
- : IsAGPRSrc ? &AMDGPU::AGPR_32RegClass
- : &AMDGPU::VGPR_32RegClass;
- MCRegister NewDestReg =
- RI.getMatchingSuperReg(DestReg, DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
- DstRC);
- MCRegister NewSrcReg =
- RI.getMatchingSuperReg(SrcReg, SrcLow ? AMDGPU::lo16 : AMDGPU::hi16,
- SrcRC);
+ MCRegister NewDestReg = RI.get32BitRegister(DestReg);
+ MCRegister NewSrcReg = RI.get32BitRegister(SrcReg);
if (IsSGPRDst) {
if (!IsSGPRSrc) {
DS_WRITE2_B32_gfx9 %2, %1.sub14, %1.sub15, 14, 15, 0, implicit $exec
S_ENDPGM 0
...
+
+# GCN-LABEL: vgpr_lo16_sub{{$}}
+# GCN: renamable $vgpr0 = V_AND_B32_e32 killed $vgpr3, killed $vgpr1, implicit $exec
+# GCN: renamable $vgpr1_lo16 = COPY renamable $vgpr0_lo16
+---
+name: vgpr_lo16_sub
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32, preferred-register: '$vgpr1' }
+ - { id: 1, class: vgpr_32, preferred-register: '$vgpr5' }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_lo16 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2 = V_AND_B32_e32 %1, %0, implicit $exec
+ %3 = COPY %2.lo16
+ $vgpr1_lo16 = COPY %3
+ SI_RETURN_TO_EPILOG $vgpr1_lo16
+...
+
+# GCN-LABEL: vgpr_lo16{{$}}
+# GCN: $vgpr1_lo16 = COPY killed renamable $vgpr0_lo16
+---
+name: vgpr_lo16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_lo16, preferred-register: '$vgpr4_lo16' }
+body: |
+ bb.0:
+ liveins: $vgpr0_lo16
+
+ %0 = COPY $vgpr0_lo16
+ $vgpr1_lo16 = COPY %0
+ SI_RETURN_TO_EPILOG $vgpr1_lo16
+...
+
+# GCN-LABEL: vgpr_hi16_sub{{$}}
+# GCN: renamable $vgpr0 = V_AND_B32_e32 killed $vgpr3, killed $vgpr1, implicit $exec
+# GCN: renamable $vgpr1_hi16 = COPY renamable $vgpr0_hi16
+---
+name: vgpr_hi16_sub
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32, preferred-register: '$vgpr1' }
+ - { id: 1, class: vgpr_32, preferred-register: '$vgpr5' }
+ - { id: 2, class: vgpr_32 }
+ - { id: 3, class: vgpr_hi16 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ %2 = V_AND_B32_e32 %1, %0, implicit $exec
+ %3 = COPY %2.hi16
+ $vgpr1_hi16 = COPY %3
+ SI_RETURN_TO_EPILOG $vgpr1_hi16
+...
+
+# GCN-LABEL: vgpr_hi16{{$}}
+# GCN: $vgpr1_hi16 = COPY killed renamable $vgpr0_hi16
+---
+name: vgpr_hi16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_hi16, preferred-register: '$vgpr4_hi16' }
+body: |
+ bb.0:
+ liveins: $vgpr0_hi16
+
+ %0 = COPY $vgpr0_hi16
+ $vgpr1_hi16 = COPY %0
+ SI_RETURN_TO_EPILOG $vgpr1_hi16
+...
+
+# GCN-LABEL: sgpr_lo16_sub{{$}}
+# GCN: renamable $sgpr0 = S_AND_B32 killed renamable $sgpr14, $sgpr0, implicit-def $scc
+# GCN: renamable $sgpr1_lo16 = COPY renamable $sgpr0_lo16
+---
+name: sgpr_lo16_sub
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sgpr_32, preferred-register: '$sgpr16' }
+ - { id: 1, class: sgpr_32 }
+ - { id: 2, class: sgpr_lo16 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ $sgpr0 = IMPLICIT_DEF
+ %1 = S_AND_B32 %0, $sgpr0, implicit-def $scc
+ %2 = COPY %1.lo16
+ $sgpr1_lo16 = COPY %2
+ SI_RETURN_TO_EPILOG $sgpr1_lo16
+...
+
+# GCN-LABEL: sgpr_lo16{{$}}
+# GCN: $sgpr1_lo16 = COPY killed renamable $sgpr0_lo16
+---
+name: sgpr_lo16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sgpr_lo16, preferred-register: '$sgpr4_lo16' }
+body: |
+ bb.0:
+ liveins: $sgpr0_lo16
+
+ %0 = COPY $sgpr0_lo16
+ $sgpr1_lo16 = COPY %0
+ SI_RETURN_TO_EPILOG $sgpr1_lo16
+...
+
+# Check that we do not use VGPR3 which we would use otherwise.
+# We cannot use it because of interference with VGPR3_LO16.
+# GCN-LABEL: v1_vs_v5_src_interence{{$}}
+# GCN: V_AND_B32_e32 killed $vgpr7, killed $vgpr1,
+---
+name: v1_vs_v5_src_interence
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32, preferred-register: '$vgpr1' }
+ - { id: 1, class: vgpr_32, preferred-register: '$vgpr5' }
+ - { id: 2, class: vgpr_32 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ $vgpr3_lo16 = IMPLICIT_DEF
+ %2 = V_AND_B32_e32 %1, %0, implicit $exec
+ S_ENDPGM 0
+...