ProduceNonFlagSettingCondBr =
!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
MFReturnAddr = Register();
+
+ processPHIs(MF);
}
private:
// An early selection function that runs before the selectImpl() call.
bool earlySelect(MachineInstr &I) const;
+ // Do some preprocessing of G_PHIs before we begin selection.
+ void processPHIs(MachineFunction &MF);
+
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
/// Eliminate same-sized cross-bank copies into stores before selectImpl().
}
}
+
+// Perform fixups on the given PHI instruction's operands to force them all
+// to be the same as the destination regbank.
+static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
+ const AArch64RegisterBankInfo &RBI) {
+ assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
+ Register DstReg = MI.getOperand(0).getReg();
+ const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
+ assert(DstRB && "Expected PHI dst to have regbank assigned");
+ MachineIRBuilder MIB(MI);
+
+ // Go through each operand and ensure it has the same regbank.
+ for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg())
+ continue;
+ Register OpReg = MO.getReg();
+ const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
+ if (RB != DstRB) {
+ // Insert a cross-bank copy.
+ auto *OpDef = MRI.getVRegDef(OpReg);
+ const LLT &Ty = MRI.getType(OpReg);
+ MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator()));
+ auto Copy = MIB.buildCopy(Ty, OpReg);
+ MRI.setRegBank(Copy.getReg(0), *DstRB);
+ MO.setReg(Copy.getReg(0));
+ }
+ }
+}
+
+void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
+ // We're looking for PHIs, build a list so we don't invalidate iterators.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ SmallVector<MachineInstr *, 32> Phis;
+ for (auto &BB : MF) {
+ for (auto &MI : BB) {
+ if (MI.getOpcode() == TargetOpcode::G_PHI)
+ Phis.emplace_back(&MI);
+ }
+ }
+
+ for (auto *MI : Phis) {
+ // We need to do some work here if the operand types are < 16 bit and they
+ // are split across fpr/gpr banks. Since all types <32b on gpr
+ // end up being assigned gpr32 regclasses, we can end up with PHIs here
+ // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
+ // be selecting heterogenous regbanks for operands if possible, but we
+ // still need to be able to deal with it here.
+ //
+ // To fix this, if we have a gpr-bank operand < 32b in size and at least
+ // one other operand is on the fpr bank, then we add cross-bank copies
+ // to homogenize the operand banks. For simplicity the bank that we choose
+ // to settle on is whatever bank the def operand has. For example:
+ //
+ // %endbb:
+ // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
+ // =>
+ // %bb2:
+ // ...
+ // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
+ // ...
+ // %endbb:
+ // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
+ bool HasGPROp = false, HasFPROp = false;
+ for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
+ const auto &MO = MI->getOperand(OpIdx);
+ if (!MO.isReg())
+ continue;
+ const LLT &Ty = MRI.getType(MO.getReg());
+ if (!Ty.isValid() || !Ty.isScalar())
+ break;
+ if (Ty.getSizeInBits() >= 32)
+ break;
+ const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
+ // If for some reason we don't have a regbank yet. Don't try anything.
+ if (!RB)
+ break;
+
+ if (RB->getID() == AArch64::GPRRegBankID)
+ HasGPROp = true;
+ else
+ HasFPROp = true;
+ }
+ // We have heterogenous regbanks, need to fixup.
+ if (HasGPROp && HasFPROp)
+ fixupPHIOpBanks(*MI, MRI, RBI);
+ }
+}
+
namespace llvm {
InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &TM,
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -verify-machineinstrs -mtriple aarch64--- -run-pass=instruction-select -global-isel %s -o - | FileCheck %s
+---
+name: test_loop_phi_fpr_to_gpr
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: false
+failedISel: false
+tracksRegLiveness: true
+liveins: []
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: test_loop_phi_fpr_to_gpr
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:gpr64common = IMPLICIT_DEF
+ ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 2143289344
+ ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: [[DEF2:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+ ; CHECK: $wzr = ANDSWri [[DEF]], 0, implicit-def $nzcv
+ ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[DEF2]], [[DEF2]], 1, implicit $nzcv
+ ; CHECK: bb.2:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: [[PHI:%[0-9]+]]:gpr32 = PHI [[CSELWr]], %bb.1, %8, %bb.2
+ ; CHECK: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]]
+ ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[FCVTHSr]], %subreg.hsub
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]]
+ ; CHECK: STRHHui [[PHI]], [[DEF1]], 0 :: (store 2 into `half* undef`)
+ ; CHECK: B %bb.2
+ bb.0:
+ successors: %bb.1(0x80000000)
+
+ %0:gpr(s1) = G_IMPLICIT_DEF
+ %4:gpr(p0) = G_IMPLICIT_DEF
+ %8:fpr(s32) = G_FCONSTANT float 0x7FF8000000000000
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ %6:gpr(s32) = G_IMPLICIT_DEF
+ %7:gpr(s32) = G_SELECT %0(s1), %6, %6
+ %1:gpr(s16) = G_TRUNC %7(s32)
+
+ bb.2:
+ successors: %bb.2(0x80000000)
+
+ %3:gpr(s16) = G_PHI %1(s16), %bb.1, %5(s16), %bb.2
+ %5:fpr(s16) = G_FPTRUNC %8(s32)
+ G_STORE %3(s16), %4(p0) :: (store 2 into `half* undef`)
+ G_BR %bb.2
+
+...
+---
+name: test_loop_phi_gpr_to_fpr
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: false
+failedISel: false
+tracksRegLiveness: true
+liveins: []
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: test_loop_phi_gpr_to_fpr
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:gpr64common = IMPLICIT_DEF
+ ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 2143289344
+ ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]]
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: [[DEF2:%[0-9]+]]:gpr32 = IMPLICIT_DEF
+ ; CHECK: $wzr = ANDSWri [[DEF]], 0, implicit-def $nzcv
+ ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[DEF2]], [[DEF2]], 1, implicit $nzcv
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[CSELWr]]
+ ; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub
+ ; CHECK: bb.2:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: [[PHI:%[0-9]+]]:fpr16 = PHI %7, %bb.2, [[COPY2]], %bb.1
+ ; CHECK: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]]
+ ; CHECK: STRHui [[PHI]], [[DEF1]], 0 :: (store 2 into `half* undef`)
+ ; CHECK: B %bb.2
+ bb.0:
+ successors: %bb.1(0x80000000)
+
+ %0:gpr(s1) = G_IMPLICIT_DEF
+ %4:gpr(p0) = G_IMPLICIT_DEF
+ %8:fpr(s32) = G_FCONSTANT float 0x7FF8000000000000
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ %6:gpr(s32) = G_IMPLICIT_DEF
+ %7:gpr(s32) = G_SELECT %0(s1), %6, %6
+ %1:gpr(s16) = G_TRUNC %7(s32)
+
+ bb.2:
+ successors: %bb.2(0x80000000)
+
+ %3:fpr(s16) = G_PHI %5(s16), %bb.2, %1(s16), %bb.1
+ %5:fpr(s16) = G_FPTRUNC %8(s32)
+ G_STORE %3(s16), %4(p0) :: (store 2 into `half* undef`)
+ G_BR %bb.2
+
+...