// tablegen definition for the pseudo.
switch (MBBI->getOpcode()) {
case RISCV::PseudoCCMOVGPR:
+ case RISCV::PseudoCCADD:
+ case RISCV::PseudoCCSUB:
+ case RISCV::PseudoCCAND:
+ case RISCV::PseudoCCOR:
+ case RISCV::PseudoCCXOR:
+ case RISCV::PseudoCCADDW:
+ case RISCV::PseudoCCSUBW:
return expandCCOp(MBB, MBBI, NextMBBI);
case RISCV::PseudoVSETVLI:
case RISCV::PseudoVSETVLIX0:
bool RISCVExpandPseudo::expandCCOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
- assert(MBBI->getOpcode() == RISCV::PseudoCCMOVGPR && "Unexpected opcode");
MachineFunction *MF = MBB.getParent();
MachineInstr &MI = *MBBI;
Register DestReg = MI.getOperand(0).getReg();
assert(MI.getOperand(4).getReg() == DestReg);
- // Add MV.
- BuildMI(TrueBB, DL, TII->get(RISCV::ADDI), DestReg)
- .add(MI.getOperand(5))
- .addImm(0);
+ if (MI.getOpcode() == RISCV::PseudoCCMOVGPR) {
+ // Add MV.
+ BuildMI(TrueBB, DL, TII->get(RISCV::ADDI), DestReg)
+ .add(MI.getOperand(5))
+ .addImm(0);
+ } else {
+ unsigned NewOpc;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case RISCV::PseudoCCADD: NewOpc = RISCV::ADD; break;
+ case RISCV::PseudoCCSUB: NewOpc = RISCV::SUB; break;
+ case RISCV::PseudoCCAND: NewOpc = RISCV::AND; break;
+ case RISCV::PseudoCCOR: NewOpc = RISCV::OR; break;
+ case RISCV::PseudoCCXOR: NewOpc = RISCV::XOR; break;
+ case RISCV::PseudoCCADDW: NewOpc = RISCV::ADDW; break;
+ case RISCV::PseudoCCSUBW: NewOpc = RISCV::SUBW; break;
+ }
+ BuildMI(TrueBB, DL, TII->get(NewOpc), DestReg)
+ .add(MI.getOperand(5))
+ .add(MI.getOperand(6));
+ }
TrueBB->addSuccessor(MergeBB);
}
}
+// If the operation has a predicated pseudo instruction, return the pseudo
+// instruction opcode. Otherwise, return RISCV::INSTRUCTION_LIST_END.
+// TODO: Support more operations.
+unsigned getPredicatedOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ case RISCV::ADD: return RISCV::PseudoCCADD; break;
+ case RISCV::SUB: return RISCV::PseudoCCSUB; break;
+ case RISCV::AND: return RISCV::PseudoCCAND; break;
+ case RISCV::OR: return RISCV::PseudoCCOR; break;
+ case RISCV::XOR: return RISCV::PseudoCCXOR; break;
+
+ case RISCV::ADDW: return RISCV::PseudoCCADDW; break;
+ case RISCV::SUBW: return RISCV::PseudoCCSUBW; break;
+ }
+
+ return RISCV::INSTRUCTION_LIST_END;
+}
+
+/// Identify instructions that can be folded into a CCMOV instruction, and
+/// return the defining instruction.
+static MachineInstr *canFoldAsPredicatedOp(Register Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII) {
+ if (!Reg.isVirtual())
+ return nullptr;
+ if (!MRI.hasOneNonDBGUse(Reg))
+ return nullptr;
+ MachineInstr *MI = MRI.getVRegDef(Reg);
+ if (!MI)
+ return nullptr;
+ // Check if MI can be predicated and folded into the CCMOV.
+ if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
+ return nullptr;
+ // Check if MI has any other defs or physreg uses.
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Reject frame index operands, PEI can't handle the predicated pseudos.
+ if (MO.isFI() || MO.isCPI() || MO.isJTI())
+ return nullptr;
+ if (!MO.isReg())
+ continue;
+ // MI can't have any tied operands, that would conflict with predication.
+ if (MO.isTied())
+ return nullptr;
+ if (MO.isDef())
+ return nullptr;
+ // Allow constant physregs.
+ if (Register::isPhysicalRegister(MO.getReg()) &&
+ !MRI.isConstantPhysReg(MO.getReg()))
+ return nullptr;
+ }
+ bool DontMoveAcrossStores = true;
+ if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))
+ return nullptr;
+ return MI;
+}
+
+bool RISCVInstrInfo::analyzeSelect(const MachineInstr &MI,
+ SmallVectorImpl<MachineOperand> &Cond,
+ unsigned &TrueOp, unsigned &FalseOp,
+ bool &Optimizable) const {
+ assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
+ "Unknown select instruction");
+ // CCMOV operands:
+ // 0: Def.
+ // 1: LHS of compare.
+ // 2: RHS of compare.
+ // 3: Condition code.
+ // 4: False use.
+ // 5: True use.
+ TrueOp = 5;
+ FalseOp = 4;
+ Cond.push_back(MI.getOperand(1));
+ Cond.push_back(MI.getOperand(2));
+ Cond.push_back(MI.getOperand(3));
+ // We can only fold when we support short forward branch opt.
+ Optimizable = STI.hasShortForwardBranchOpt();
+ return false;
+}
+
+MachineInstr *
+RISCVInstrInfo::optimizeSelect(MachineInstr &MI,
+ SmallPtrSetImpl<MachineInstr *> &SeenMIs,
+ bool PreferFalse) const {
+ assert(MI.getOpcode() == RISCV::PseudoCCMOVGPR &&
+ "Unknown select instruction");
+ if (!STI.hasShortForwardBranchOpt())
+ return nullptr;
+
+ MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ MachineInstr *DefMI =
+ canFoldAsPredicatedOp(MI.getOperand(5).getReg(), MRI, this);
+ bool Invert = !DefMI;
+ if (!DefMI)
+ DefMI = canFoldAsPredicatedOp(MI.getOperand(4).getReg(), MRI, this);
+ if (!DefMI)
+ return nullptr;
+
+ // Find new register class to use.
+ MachineOperand FalseReg = MI.getOperand(Invert ? 5 : 4);
+ Register DestReg = MI.getOperand(0).getReg();
+ const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
+ if (!MRI.constrainRegClass(DestReg, PreviousClass))
+ return nullptr;
+
+ unsigned PredOpc = getPredicatedOpcode(DefMI->getOpcode());
+ assert(PredOpc != RISCV::INSTRUCTION_LIST_END && "Unexpected opcode!");
+
+ // Create a new predicated version of DefMI.
+ MachineInstrBuilder NewMI =
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(PredOpc), DestReg);
+
+ // Copy the condition portion.
+ NewMI.add(MI.getOperand(1));
+ NewMI.add(MI.getOperand(2));
+
+ // Add condition code, inverting if necessary.
+ auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
+ if (Invert)
+ CC = RISCVCC::getOppositeBranchCondition(CC);
+ NewMI.addImm(CC);
+
+ // Copy the false register.
+ NewMI.add(FalseReg);
+
+ // Copy all the DefMI operands.
+ const MCInstrDesc &DefDesc = DefMI->getDesc();
+ for (unsigned i = 1, e = DefDesc.getNumOperands(); i != e; ++i)
+ NewMI.add(DefMI->getOperand(i));
+
+ // Update SeenMIs set: register newly created MI and erase removed DefMI.
+ SeenMIs.insert(NewMI);
+ SeenMIs.erase(DefMI);
+
+ // If MI is inside a loop, and DefMI is outside the loop, then kill flags on
+ // DefMI would be invalid when tranferred inside the loop. Checking for a
+ // loop is expensive, but at least remove kill flags if they are in different
+ // BBs.
+ if (DefMI->getParent() != MI.getParent())
+ NewMI->clearKillInfo();
+
+ // The caller will erase MI, but not DefMI.
+ DefMI->eraseFromParent();
+ return NewMI;
+}
+
unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
if (MI.isMetaInstruction())
return 0;
bool isBranchOffsetInRange(unsigned BranchOpc,
int64_t BrOffset) const override;
+ bool analyzeSelect(const MachineInstr &MI,
+ SmallVectorImpl<MachineOperand> &Cond, unsigned &TrueOp,
+ unsigned &FalseOp, bool &Optimizable) const override;
+
+ MachineInstr *optimizeSelect(MachineInstr &MI,
+ SmallPtrSetImpl<MachineInstr *> &SeenMIs,
+ bool) const override;
+
bool isAsCheapAsAMove(const MachineInstr &MI) const override;
std::optional<DestSourcePair>
node:$falsev), [{}],
IntCCtoRISCVCC>;
-let Predicates = [HasShortForwardBranchOpt],
+let Predicates = [HasShortForwardBranchOpt], isSelect = 1,
Constraints = "$dst = $falsev", isCommutable = 1, Size = 8 in {
// This instruction moves $truev to $dst when the condition is true. It will
// be expanded to control flow in RISCVExpandPseudoInsts.
Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
}
+// Conditional binops, that updates update $dst to (op rs1, rs2) when condition
+// is true. Returns $falsev otherwise. Selected by optimizeSelect.
+// TODO: Can we use DefaultOperands on the regular binop to accomplish this more
+// like how ARM does predication?
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8,
+ Constraints = "$dst = $falsev" in {
+def PseudoCCADD : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
+def PseudoCCSUB : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
+def PseudoCCAND : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
+def PseudoCCOR : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
+def PseudoCCXOR : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
+
+// RV64I instructions
+def PseudoCCADDW : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
+def PseudoCCSUBW : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+ GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
+}
+
multiclass SelectCC_GPR_rrirr<RegisterClass valty> {
let usesCustomInserter = 1 in
def _Using_CC_GPR : Pseudo<(outs valty:$dst),
;
; SFB64-LABEL: and_select_all_ones_i32:
; SFB64: # %bb.0:
-; SFB64-NEXT: and a1, a1, a2
-; SFB64-NEXT: bnez a0, .LBB0_2
+; SFB64-NEXT: beqz a0, .LBB0_2
; SFB64-NEXT: # %bb.1:
-; SFB64-NEXT: mv a1, a2
+; SFB64-NEXT: and a2, a2, a1
; SFB64-NEXT: .LBB0_2:
-; SFB64-NEXT: mv a0, a1
+; SFB64-NEXT: mv a0, a2
; SFB64-NEXT: ret
%a = select i1 %c, i32 %x, i32 -1
%b = and i32 %a, %y
;
; SFB64-LABEL: and_select_all_ones_i64:
; SFB64: # %bb.0:
-; SFB64-NEXT: and a1, a1, a2
-; SFB64-NEXT: beqz a0, .LBB1_2
+; SFB64-NEXT: bnez a0, .LBB1_2
; SFB64-NEXT: # %bb.1:
-; SFB64-NEXT: mv a1, a2
+; SFB64-NEXT: and a2, a2, a1
; SFB64-NEXT: .LBB1_2:
-; SFB64-NEXT: mv a0, a1
+; SFB64-NEXT: mv a0, a2
; SFB64-NEXT: ret
%a = select i1 %c, i64 -1, i64 %x
%b = and i64 %y, %a
;
; SFB64-LABEL: or_select_all_zeros_i32:
; SFB64: # %bb.0:
-; SFB64-NEXT: or a1, a1, a2
-; SFB64-NEXT: bnez a0, .LBB2_2
+; SFB64-NEXT: beqz a0, .LBB2_2
; SFB64-NEXT: # %bb.1:
-; SFB64-NEXT: mv a1, a2
+; SFB64-NEXT: or a2, a2, a1
; SFB64-NEXT: .LBB2_2:
-; SFB64-NEXT: mv a0, a1
+; SFB64-NEXT: mv a0, a2
; SFB64-NEXT: ret
%a = select i1 %c, i32 %x, i32 0
%b = or i32 %y, %a
;
; SFB64-LABEL: or_select_all_zeros_i64:
; SFB64: # %bb.0:
-; SFB64-NEXT: or a1, a1, a2
-; SFB64-NEXT: beqz a0, .LBB3_2
+; SFB64-NEXT: bnez a0, .LBB3_2
; SFB64-NEXT: # %bb.1:
-; SFB64-NEXT: mv a1, a2
+; SFB64-NEXT: or a2, a2, a1
; SFB64-NEXT: .LBB3_2:
-; SFB64-NEXT: mv a0, a1
+; SFB64-NEXT: mv a0, a2
; SFB64-NEXT: ret
%a = select i1 %c, i64 0, i64 %x
%b = or i64 %a, %y
;
; SFB64-LABEL: xor_select_all_zeros_i32:
; SFB64: # %bb.0:
-; SFB64-NEXT: xor a1, a1, a2
-; SFB64-NEXT: beqz a0, .LBB4_2
+; SFB64-NEXT: bnez a0, .LBB4_2
; SFB64-NEXT: # %bb.1:
-; SFB64-NEXT: mv a1, a2
+; SFB64-NEXT: xor a2, a2, a1
; SFB64-NEXT: .LBB4_2:
-; SFB64-NEXT: mv a0, a1
+; SFB64-NEXT: mv a0, a2
; SFB64-NEXT: ret
%a = select i1 %c, i32 0, i32 %x
%b = xor i32 %y, %a
;
; SFB64-LABEL: xor_select_all_zeros_i64:
; SFB64: # %bb.0:
-; SFB64-NEXT: xor a1, a1, a2
-; SFB64-NEXT: bnez a0, .LBB5_2
+; SFB64-NEXT: beqz a0, .LBB5_2
; SFB64-NEXT: # %bb.1:
-; SFB64-NEXT: mv a1, a2
+; SFB64-NEXT: xor a2, a2, a1
; SFB64-NEXT: .LBB5_2:
-; SFB64-NEXT: mv a0, a1
+; SFB64-NEXT: mv a0, a2
; SFB64-NEXT: ret
%a = select i1 %c, i64 %x, i64 0
%b = xor i64 %a, %y
;
; SFB64-LABEL: add_select_all_zeros_i32:
; SFB64: # %bb.0:
-; SFB64-NEXT: addw a1, a1, a2
-; SFB64-NEXT: beqz a0, .LBB6_2
+; SFB64-NEXT: bnez a0, .LBB6_2
; SFB64-NEXT: # %bb.1:
-; SFB64-NEXT: mv a1, a2
+; SFB64-NEXT: addw a2, a2, a1
; SFB64-NEXT: .LBB6_2:
-; SFB64-NEXT: mv a0, a1
+; SFB64-NEXT: mv a0, a2
; SFB64-NEXT: ret
%a = select i1 %c, i32 0, i32 %x
%b = add i32 %y, %a
;
; SFB64-LABEL: add_select_all_zeros_i64:
; SFB64: # %bb.0:
-; SFB64-NEXT: add a1, a1, a2
-; SFB64-NEXT: bnez a0, .LBB7_2
+; SFB64-NEXT: beqz a0, .LBB7_2
; SFB64-NEXT: # %bb.1:
-; SFB64-NEXT: mv a1, a2
+; SFB64-NEXT: add a2, a2, a1
; SFB64-NEXT: .LBB7_2:
-; SFB64-NEXT: mv a0, a1
+; SFB64-NEXT: mv a0, a2
; SFB64-NEXT: ret
%a = select i1 %c, i64 %x, i64 0
%b = add i64 %a, %y
;
; SFB64-LABEL: sub_select_all_zeros_i32:
; SFB64: # %bb.0:
-; SFB64-NEXT: subw a1, a2, a1
-; SFB64-NEXT: beqz a0, .LBB8_2
+; SFB64-NEXT: bnez a0, .LBB8_2
; SFB64-NEXT: # %bb.1:
-; SFB64-NEXT: mv a1, a2
+; SFB64-NEXT: subw a2, a2, a1
; SFB64-NEXT: .LBB8_2:
-; SFB64-NEXT: mv a0, a1
+; SFB64-NEXT: mv a0, a2
; SFB64-NEXT: ret
%a = select i1 %c, i32 0, i32 %x
%b = sub i32 %y, %a
;
; SFB64-LABEL: sub_select_all_zeros_i64:
; SFB64: # %bb.0:
-; SFB64-NEXT: sub a1, a2, a1
-; SFB64-NEXT: bnez a0, .LBB9_2
+; SFB64-NEXT: beqz a0, .LBB9_2
; SFB64-NEXT: # %bb.1:
-; SFB64-NEXT: mv a1, a2
+; SFB64-NEXT: sub a2, a2, a1
; SFB64-NEXT: .LBB9_2:
-; SFB64-NEXT: mv a0, a1
+; SFB64-NEXT: mv a0, a2
; SFB64-NEXT: ret
%a = select i1 %c, i64 %x, i64 0
%b = sub i64 %y, %a
; SFB-LABEL: select_xor_2:
; SFB: # %bb.0: # %entry
; SFB-NEXT: andi a2, a2, 1
-; SFB-NEXT: xor a1, a1, a0
; SFB-NEXT: beqz a2, .LBB9_2
; SFB-NEXT: # %bb.1: # %entry
-; SFB-NEXT: mv a0, a1
+; SFB-NEXT: xor a0, a0, a1
; SFB-NEXT: .LBB9_2: # %entry
; SFB-NEXT: ret
entry:
; SFB-LABEL: select_xor_2b:
; SFB: # %bb.0: # %entry
; SFB-NEXT: andi a2, a2, 1
-; SFB-NEXT: xor a1, a1, a0
; SFB-NEXT: beqz a2, .LBB10_2
; SFB-NEXT: # %bb.1: # %entry
-; SFB-NEXT: mv a0, a1
+; SFB-NEXT: xor a0, a0, a1
; SFB-NEXT: .LBB10_2: # %entry
; SFB-NEXT: ret
entry:
; SFB-LABEL: select_or:
; SFB: # %bb.0: # %entry
; SFB-NEXT: andi a2, a2, 1
-; SFB-NEXT: or a1, a1, a0
; SFB-NEXT: beqz a2, .LBB11_2
; SFB-NEXT: # %bb.1: # %entry
-; SFB-NEXT: mv a0, a1
+; SFB-NEXT: or a0, a0, a1
; SFB-NEXT: .LBB11_2: # %entry
; SFB-NEXT: ret
entry:
; SFB-LABEL: select_or_b:
; SFB: # %bb.0: # %entry
; SFB-NEXT: andi a2, a2, 1
-; SFB-NEXT: or a1, a1, a0
; SFB-NEXT: beqz a2, .LBB12_2
; SFB-NEXT: # %bb.1: # %entry
-; SFB-NEXT: mv a0, a1
+; SFB-NEXT: or a0, a0, a1
; SFB-NEXT: .LBB12_2: # %entry
; SFB-NEXT: ret
entry:
; SFB-LABEL: select_or_1:
; SFB: # %bb.0: # %entry
; SFB-NEXT: andi a2, a2, 1
-; SFB-NEXT: or a1, a1, a0
; SFB-NEXT: beqz a2, .LBB13_2
; SFB-NEXT: # %bb.1: # %entry
-; SFB-NEXT: mv a0, a1
+; SFB-NEXT: or a0, a0, a1
; SFB-NEXT: .LBB13_2: # %entry
; SFB-NEXT: ret
entry:
; SFB-LABEL: select_or_1b:
; SFB: # %bb.0: # %entry
; SFB-NEXT: andi a2, a2, 1
-; SFB-NEXT: or a1, a1, a0
; SFB-NEXT: beqz a2, .LBB14_2
; SFB-NEXT: # %bb.1: # %entry
-; SFB-NEXT: mv a0, a1
+; SFB-NEXT: or a0, a0, a1
; SFB-NEXT: .LBB14_2: # %entry
; SFB-NEXT: ret
entry: