return true;
}
+/// \return true iff the two registers are known to have the same value.
+static bool hasSameValue(const MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII, Register TReg,
+ Register FReg) {
+ if (TReg == FReg)
+ return true;
+
+ if (!TReg.isVirtual() || !FReg.isVirtual())
+ return false;
+
+ const MachineInstr *TDef = MRI.getUniqueVRegDef(TReg);
+ const MachineInstr *FDef = MRI.getUniqueVRegDef(FReg);
+ if (!TDef || !FDef)
+ return false;
+
+ // If there are side-effects, all bets are off.
+ if (TDef->hasUnmodeledSideEffects())
+ return false;
+
+ // If the instruction could modify memory, or there may be some intervening
+ // store between the two, we can't consider them to be equal.
+ if (TDef->mayLoadOrStore() && !TDef->isDereferenceableInvariantLoad(nullptr))
+ return false;
+
+ // We also can't guarantee that they are the same if, for example, the
+ // instructions are both a copy from a physical reg, because some other
+ // instruction may have modified the value in that reg between the two
+ // defining insts.
+ if (any_of(TDef->uses(), [](const MachineOperand &MO) {
+ return MO.isReg() && MO.getReg().isPhysical();
+ }))
+ return false;
+
+ // Check whether the two defining instructions produce the same value(s).
+ if (!TII->produceSameValue(*TDef, *FDef, &MRI))
+ return false;
+
+ // Further, check that the two defs come from corresponding operands.
+ int TIdx = TDef->findRegisterDefOperandIdx(TReg);
+ int FIdx = FDef->findRegisterDefOperandIdx(FReg);
+ if (TIdx == -1 || FIdx == -1)
+ return false;
+
+ return TIdx == FIdx;
+}
+
/// replacePHIInstrs - Completely replace PHI instructions with selects.
/// This is possible when the only Tail predecessors are the if-converted
/// blocks.
PHIInfo &PI = PHIs[i];
LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);
Register DstReg = PI.PHI->getOperand(0).getReg();
- TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
+ if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) {
+ // We do not need the select instruction if both incoming values are
+ // equal, but we do need a COPY.
+ BuildMI(*Head, FirstTerm, HeadDL, TII->get(TargetOpcode::COPY), DstReg)
+ .addReg(PI.TReg);
+ } else {
+ TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg,
+ PI.FReg);
+ }
LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));
PI.PHI->eraseFromParent();
PI.PHI = nullptr;
unsigned DstReg = 0;
LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);
- if (PI.TReg == PI.FReg) {
+ if (hasSameValue(*MRI, TII, PI.TReg, PI.FReg)) {
// We do not need the select instruction if both incoming values are
// equal.
DstReg = PI.TReg;
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-- -run-pass=early-ifcvt -stress-early-ifcvt -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: fmov0
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr32, preferred-register: '' }
+ - { id: 1, class: fpr32, preferred-register: '' }
+ - { id: 2, class: fpr32, preferred-register: '' }
+ - { id: 3, class: fpr32, preferred-register: '' }
+ - { id: 4, class: fpr32, preferred-register: '' }
+ - { id: 5, class: gpr32common, preferred-register: '' }
+ - { id: 6, class: gpr32, preferred-register: '' }
+ - { id: 7, class: fpr32, preferred-register: '' }
+ - { id: 8, class: fpr32, preferred-register: '' }
+liveins:
+ - { reg: '$s1', virtual-reg: '%4' }
+ - { reg: '$w0', virtual-reg: '%5' }
+body: |
+ ; CHECK-LABEL: name: fmov0
+ ; CHECK: bb.0.entry:
+ ; CHECK: liveins: $s1, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
+ ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv
+ ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
+ ; CHECK: [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0
+ ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[FMOVS0_]]
+ ; CHECK: $s0 = COPY [[COPY2]]
+ ; CHECK: RET_ReallyLR implicit $s0
+ bb.0.entry:
+ successors: %bb.1, %bb.2
+ liveins: $s1, $w0
+
+ %5:gpr32common = COPY $w0
+ %4:fpr32 = COPY $s1
+ %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv
+ Bcc 1, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1:
+ successors: %bb.3
+
+ %0:fpr32 = FMOVS0
+ B %bb.3
+
+ bb.2:
+ successors: %bb.3
+
+ %1:fpr32 = FMOVS0
+
+ bb.3:
+ %2:fpr32 = PHI %1, %bb.2, %0, %bb.1
+ $s0 = COPY %2
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: fmov0_extrapred
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr32, preferred-register: '' }
+ - { id: 1, class: fpr32, preferred-register: '' }
+ - { id: 2, class: fpr32, preferred-register: '' }
+ - { id: 3, class: fpr32, preferred-register: '' }
+ - { id: 4, class: fpr32, preferred-register: '' }
+ - { id: 5, class: gpr32common, preferred-register: '' }
+ - { id: 6, class: gpr32, preferred-register: '' }
+ - { id: 7, class: fpr32, preferred-register: '' }
+ - { id: 8, class: fpr32, preferred-register: '' }
+liveins:
+ - { reg: '$s1', virtual-reg: '%4' }
+ - { reg: '$w0', virtual-reg: '%5' }
+body: |
+ ; CHECK-LABEL: name: fmov0_extrapred
+ ; CHECK: bb.0.entry:
+ ; CHECK: successors: %bb.4(0x80000000)
+ ; CHECK: liveins: $s1, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
+ ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv
+ ; CHECK: [[FMOVS0_:%[0-9]+]]:fpr32 = FMOVS0
+ ; CHECK: [[FMOVS0_1:%[0-9]+]]:fpr32 = FMOVS0
+ ; CHECK: B %bb.4
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.4(0x80000000)
+ ; CHECK: [[DEF:%[0-9]+]]:fpr32 = IMPLICIT_DEF
+ ; CHECK: B %bb.4
+ ; CHECK: bb.4:
+ ; CHECK: [[PHI:%[0-9]+]]:fpr32 = PHI [[FMOVS0_]], %bb.0, [[DEF]], %bb.1
+ ; CHECK: $s0 = COPY [[PHI]]
+ ; CHECK: RET_ReallyLR implicit $s0
+ bb.0.entry:
+ successors: %bb.1, %bb.2
+ liveins: $s1, $w0
+
+ %5:gpr32common = COPY $w0
+ %4:fpr32 = COPY $s1
+ %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv
+ Bcc 1, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.4:
+ successors: %bb.3
+
+ ; Make sure we also handle the case when there are extra predecessors on
+ ; the tail block.
+ %3:fpr32 = IMPLICIT_DEF
+ B %bb.3
+
+ bb.1:
+ successors: %bb.3
+
+ %0:fpr32 = FMOVS0
+ B %bb.3
+
+ bb.2:
+ successors: %bb.3
+
+ %1:fpr32 = FMOVS0
+
+ bb.3:
+ %2:fpr32 = PHI %1, %bb.2, %0, %bb.1, %3, %bb.4
+ $s0 = COPY %2
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: copy_physreg
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr32, preferred-register: '' }
+ - { id: 1, class: fpr32, preferred-register: '' }
+ - { id: 2, class: fpr32, preferred-register: '' }
+ - { id: 3, class: fpr32, preferred-register: '' }
+ - { id: 4, class: fpr32, preferred-register: '' }
+ - { id: 5, class: gpr32common, preferred-register: '' }
+ - { id: 6, class: gpr32, preferred-register: '' }
+ - { id: 7, class: fpr32, preferred-register: '' }
+ - { id: 8, class: fpr32, preferred-register: '' }
+ - { id: 9, class: fpr32, preferred-register: '' }
+ - { id: 10, class: fpr32, preferred-register: '' }
+liveins:
+ - { reg: '$s1', virtual-reg: '%4' }
+ - { reg: '$w0', virtual-reg: '%5' }
+body: |
+ ; CHECK-LABEL: name: copy_physreg
+ ; CHECK: bb.0.entry:
+ ; CHECK: liveins: $s1, $w0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY $s1
+ ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY]], 1, 0, implicit-def $nzcv
+ ; CHECK: [[DEF:%[0-9]+]]:fpr32 = IMPLICIT_DEF implicit-def $s1
+ ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s1
+ ; CHECK: [[DEF1:%[0-9]+]]:fpr32 = IMPLICIT_DEF implicit-def $s1
+ ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1
+ ; CHECK: [[FCSELSrrr:%[0-9]+]]:fpr32 = FCSELSrrr [[COPY2]], [[COPY3]], 1, implicit $nzcv
+ ; CHECK: $s0 = COPY [[FCSELSrrr]]
+ ; CHECK: RET_ReallyLR implicit $s0
+ bb.0.entry:
+ successors: %bb.1, %bb.2
+ liveins: $s1, $w0
+
+ %5:gpr32common = COPY $w0
+ %4:fpr32 = COPY $s1
+ %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv
+ Bcc 1, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1:
+ successors: %bb.3
+
+ %9:fpr32 = IMPLICIT_DEF implicit-def $s1
+ %0:fpr32 = COPY $s1
+ B %bb.3
+
+ bb.2:
+ successors: %bb.3
+
+ %10:fpr32 = IMPLICIT_DEF implicit-def $s1
+ %1:fpr32 = COPY $s1
+
+ bb.3:
+ %2:fpr32 = PHI %1, %bb.2, %0, %bb.1
+ $s0 = COPY %2
+ RET_ReallyLR implicit $s0
+
+...
+---
+name: same_def_different_operand
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: fpr32, preferred-register: '' }
+ - { id: 1, class: fpr32, preferred-register: '' }
+ - { id: 2, class: gpr64common, preferred-register: '' }
+ - { id: 3, class: fpr32, preferred-register: '' }
+ - { id: 4, class: fpr32, preferred-register: '' }
+ - { id: 5, class: gpr32common, preferred-register: '' }
+ - { id: 6, class: gpr32, preferred-register: '' }
+ - { id: 7, class: fpr32, preferred-register: '' }
+ - { id: 8, class: fpr32, preferred-register: '' }
+ - { id: 9, class: gpr64common, preferred-register: '' }
+ - { id: 10, class: gpr64, preferred-register: '' }
+ - { id: 11, class: gpr64common, preferred-register: '' }
+liveins:
+ - { reg: '$s1', virtual-reg: '%4' }
+ - { reg: '$w0', virtual-reg: '%5' }
+ - { reg: '$x2', virtual-reg: '%9' }
+body: |
+ ; CHECK-LABEL: name: same_def_different_operand
+ ; CHECK: bb.0.entry:
+ ; CHECK: liveins: $s1, $w0, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+ ; CHECK: early-clobber %11:gpr64common, %10:gpr64 = LDRXpre [[COPY]], 16
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32common = COPY $w0
+ ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s1
+ ; CHECK: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY1]], 1, 0, implicit-def $nzcv
+ ; CHECK: [[CSELXr:%[0-9]+]]:gpr64common = CSELXr %11, %10, 1, implicit $nzcv
+ ; CHECK: $x2 = COPY [[CSELXr]]
+ ; CHECK: RET_ReallyLR implicit $x2
+ bb.0.entry:
+ successors: %bb.1, %bb.2
+ liveins: $s1, $w0, $x2
+
+ %9:gpr64common = COPY $x0
+ early-clobber %11:gpr64common, %10:gpr64 = LDRXpre %9:gpr64common, 16
+
+ %5:gpr32common = COPY $w0
+ %4:fpr32 = COPY $s1
+ %6:gpr32 = SUBSWri %5, 1, 0, implicit-def $nzcv
+ Bcc 1, %bb.2, implicit $nzcv
+ B %bb.1
+
+ bb.1:
+ successors: %bb.3
+
+ B %bb.3
+
+ bb.2:
+ successors: %bb.3
+
+ B %bb.3
+
+ bb.3:
+ %2:gpr64common = PHI %11, %bb.2, %10, %bb.1
+ $x2 = COPY %2
+ RET_ReallyLR implicit $x2
+
+...
; CHECK-GEN-ISEL-TRUE-NEXT: std r0, 16(r1)
; CHECK-GEN-ISEL-TRUE-NEXT: stdu r1, -64(r1)
; CHECK-GEN-ISEL-TRUE-NEXT: mr r30, r3
-; CHECK-GEN-ISEL-TRUE-NEXT: # implicit-def: $x4
+; CHECK-GEN-ISEL-TRUE-NEXT: # implicit-def: $x3
; CHECK-GEN-ISEL-TRUE-NEXT: # implicit-def: $r29
; CHECK-GEN-ISEL-TRUE-NEXT: .p2align 4
; CHECK-GEN-ISEL-TRUE-NEXT: .LBB0_1: # %while.cond11
; CHECK-GEN-ISEL-TRUE-NEXT: #
-; CHECK-GEN-ISEL-TRUE-NEXT: lwz r3, 0(r3)
-; CHECK-GEN-ISEL-TRUE-NEXT: cmplwi r3, 0
+; CHECK-GEN-ISEL-TRUE-NEXT: lwz r4, 0(r3)
+; CHECK-GEN-ISEL-TRUE-NEXT: cmplwi r4, 0
; CHECK-GEN-ISEL-TRUE-NEXT: beq cr0, .LBB0_3
; CHECK-GEN-ISEL-TRUE-NEXT: # %bb.2: # %while.body21
; CHECK-GEN-ISEL-TRUE-NEXT: #
; CHECK-GEN-ISEL-TRUE-NEXT: bl ZN3pov10pov_callocEmmPKciS1_pov
; CHECK-GEN-ISEL-TRUE-NEXT: nop
; CHECK-GEN-ISEL-TRUE-NEXT: addi r4, r29, 1
-; CHECK-GEN-ISEL-TRUE-NEXT: srwi r6, r29, 1
+; CHECK-GEN-ISEL-TRUE-NEXT: srwi r5, r29, 1
; CHECK-GEN-ISEL-TRUE-NEXT: srawi r4, r4, 1
; CHECK-GEN-ISEL-TRUE-NEXT: std r3, 0(r3)
-; CHECK-GEN-ISEL-TRUE-NEXT: addze r5, r4
-; CHECK-GEN-ISEL-TRUE-NEXT: mr r4, r3
-; CHECK-GEN-ISEL-TRUE-NEXT: isel r29, r5, r6, 4*cr5+lt
+; CHECK-GEN-ISEL-TRUE-NEXT: addze r4, r4
+; CHECK-GEN-ISEL-TRUE-NEXT: isel r29, r4, r5, 4*cr5+lt
; CHECK-GEN-ISEL-TRUE-NEXT: b .LBB0_1
; CHECK-GEN-ISEL-TRUE-NEXT: .LBB0_3: # %lor.rhs
-; CHECK-GEN-ISEL-TRUE-NEXT: std r30, 16(r4)
+; CHECK-GEN-ISEL-TRUE-NEXT: std r30, 16(r3)
; CHECK-GEN-ISEL-TRUE-NEXT: addi r1, r1, 64
; CHECK-GEN-ISEL-TRUE-NEXT: ld r0, 16(r1)
; CHECK-GEN-ISEL-TRUE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -64(r1)
; CHECK-NEXT: mr r30, r3
-; CHECK-NEXT: # implicit-def: $x4
+; CHECK-NEXT: # implicit-def: $x3
; CHECK-NEXT: # implicit-def: $r29
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %while.cond11
; CHECK-NEXT: #
-; CHECK-NEXT: lwz r3, 0(r3)
-; CHECK-NEXT: cmplwi r3, 0
+; CHECK-NEXT: lwz r4, 0(r3)
+; CHECK-NEXT: cmplwi r4, 0
; CHECK-NEXT: beq cr0, .LBB0_6
; CHECK-NEXT: # %bb.2: # %while.body21
; CHECK-NEXT: #
; CHECK-NEXT: bl ZN3pov10pov_callocEmmPKciS1_pov
; CHECK-NEXT: nop
; CHECK-NEXT: addi r4, r29, 1
-; CHECK-NEXT: srwi r6, r29, 1
+; CHECK-NEXT: srwi r5, r29, 1
; CHECK-NEXT: srawi r4, r4, 1
; CHECK-NEXT: std r3, 0(r3)
-; CHECK-NEXT: addze r5, r4
-; CHECK-NEXT: mr r4, r3
+; CHECK-NEXT: addze r4, r4
; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_4
; CHECK-NEXT: # %bb.3: # %while.body21
; CHECK-NEXT: #
-; CHECK-NEXT: ori r29, r6, 0
+; CHECK-NEXT: ori r29, r5, 0
; CHECK-NEXT: b .LBB0_5
; CHECK-NEXT: .LBB0_4: # %while.body21
; CHECK-NEXT: #
-; CHECK-NEXT: addi r29, r5, 0
+; CHECK-NEXT: addi r29, r4, 0
; CHECK-NEXT: .LBB0_5: # %while.body21
; CHECK-NEXT: #
; CHECK-NEXT: b .LBB0_1
; CHECK-NEXT: .LBB0_6: # %lor.rhs
-; CHECK-NEXT: std r30, 16(r4)
+; CHECK-NEXT: std r30, 16(r3)
; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload