if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
return false;
- // Don't constrain the source register to a class so the def instruction
- // handles it (unless it's undef).
- //
- // FIXME: This is a hack. When selecting the def, we neeed to know
- // specifically know that the result is VCCRegBank, and not just an SGPR
- // with size 1. An SReg_32 with size 1 is ambiguous with wave32.
- if (Src.isUndef()) {
- const TargetRegisterClass *SrcRC =
- TRI.getConstrainedRegClassForOperand(Src, *MRI);
- if (SrcRC && !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
- return false;
- }
-
return true;
}
}
bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(MachineInstr &I) const {
- MachineOperand &Dst = I.getOperand(0);
- MachineOperand &Src0 = I.getOperand(1);
- MachineOperand &Src1 = I.getOperand(2);
- Register DstReg = Dst.getReg();
+ Register DstReg = I.getOperand(0).getReg();
unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
- if (DstRB->getID() == AMDGPU::VCCRegBankID) {
- const TargetRegisterClass *RC = TRI.getBoolRC();
- unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(),
- RC == &AMDGPU::SReg_64RegClass);
- I.setDesc(TII.get(InstOpc));
- // Dead implicit-def of scc
- I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef
- true, // isImp
- false, // isKill
- true)); // isDead
-
- // FIXME: Hack to avoid turning the register bank into a register class.
- // The selector for G_ICMP relies on seeing the register bank for the result
- // is VCC. In wave32 if we constrain the registers to SReg_32 here, it will
- // be ambiguous whether it's a scalar or vector bool.
- if (Src0.isUndef() && !MRI->getRegClassOrNull(Src0.getReg()))
- MRI->setRegClass(Src0.getReg(), RC);
- if (Src1.isUndef() && !MRI->getRegClassOrNull(Src1.getReg()))
- MRI->setRegClass(Src1.getReg(), RC);
-
- return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
- }
-
- // TODO: Should this allow an SCC bank result, and produce a copy from SCC for
- // the result?
- if (DstRB->getID() == AMDGPU::SGPRRegBankID) {
- unsigned InstOpc = getLogicalBitOpcode(I.getOpcode(), Size > 32);
- I.setDesc(TII.get(InstOpc));
- // Dead implicit-def of scc
- I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef
- true, // isImp
- false, // isKill
- true)); // isDead
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
+ if (DstRB->getID() != AMDGPU::SGPRRegBankID &&
+ DstRB->getID() != AMDGPU::VCCRegBankID)
+ return false;
- return false;
+ bool Is64 = Size > 32 || (DstRB->getID() == AMDGPU::VCCRegBankID &&
+ STI.isWave64());
+ I.setDesc(TII.get(getLogicalBitOpcode(I.getOpcode(), Is64)));
+
+ // Dead implicit-def of scc
+ I.addOperand(MachineOperand::CreateReg(AMDGPU::SCC, true, // isDef
+ true, // isImp
+ false, // isKill
+ true)); // isDead
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
CondPhysReg = AMDGPU::SCC;
BrOpcode = AMDGPU::S_CBRANCH_SCC1;
- // FIXME: Hack for isSCC tests
- ConstrainRC = &AMDGPU::SGPR_32RegClass;
+ ConstrainRC = &AMDGPU::SReg_32RegClass;
} else {
// FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
// We sort of know that a VCC producer based on the register bank, that ands
return getWavefrontSize() == 32;
}
+ bool isWave64() const {
+ return getWavefrontSize() == 64;
+ }
+
const TargetRegisterClass *getBoolRC() const {
return getRegisterInfo()->getBoolRC();
}
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
- ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY2]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: bb.1:
; GCN-LABEL: name: brcond_scc_impdef
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
- ; GCN: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
; GCN: $scc = COPY [[DEF]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: bb.1:
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc
- ; GCN: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY2]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.1
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[S_MOV_B32_]], implicit $exec
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GCN: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
- ; GCN: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $scc
+ ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc
; GCN: $scc = COPY [[COPY3]]
; GCN: S_CBRANCH_SCC1 %bb.1, implicit $scc
; GCN: S_BRANCH %bb.2