private:
/// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
/// dead due to MI being killed, then mark DefMI as dead too.
+ /// Some of the combines (extends(trunc)), try to walk through redundant
+ /// copies in between the extends and the truncs, and this attempts to collect
+ /// the in between copies if they're dead.
void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI,
SmallVectorImpl<MachineInstr *> &DeadInsts) {
DeadInsts.push_back(&MI);
- if (MRI.hasOneUse(DefMI.getOperand(0).getReg()))
+
+ // Collect all the copy instructions that are made dead, due to deleting
+ // this instruction. Collect all of them until the Trunc(DefMI).
+ // Eg,
+ // %1(s1) = G_TRUNC %0(s32)
+ // %2(s1) = COPY %1(s1)
+ // %3(s1) = COPY %2(s1)
+ // %4(s32) = G_ANYEXT %3(s1)
+ // In this case, we would have replaced %4 with a copy of %0,
+ // and as a result, %3, %2, %1 are dead.
+ MachineInstr *PrevMI = &MI;
+ while (PrevMI != &DefMI) {
+ // If we're dealing with G_UNMERGE_VALUES, tryCombineMerges doesn't really try
+ // to fold copies in between and we can ignore them here.
+ if (PrevMI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES)
+ break;
+ unsigned PrevRegSrc = PrevMI->getOperand(1).getReg();
+ MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc);
+ if (MRI.hasOneUse(PrevRegSrc)) {
+ if (TmpDef != &DefMI) {
+ assert(TmpDef->getOpcode() == TargetOpcode::COPY &&
+ "Expecting copy here");
+ DeadInsts.push_back(TmpDef);
+ }
+ } else
+ break;
+ PrevMI = TmpDef;
+ }
+ if ((PrevMI == &DefMI ||
+ DefMI.getOpcode() == TargetOpcode::G_MERGE_VALUES) &&
+ MRI.hasOneUse(DefMI.getOperand(0).getReg()))
DeadInsts.push_back(&DefMI);
}
+
/// Checks if the target legalizer info has specified anything about the
/// instruction, or if unsupported.
bool isInstUnsupported(unsigned Opcode, const LLT &DstTy) const {
define void @testExtOfCopyOfTrunc() {
ret void
}
+ define void @testExtOf2CopyOfTrunc() {
+ ret void
+ }
...
---
; CHECK: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[COPY]], [[COPY]]
; CHECK: %x0 = COPY [[SELECT4]](s64)
; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](s64)
- ; CHECK: [[BITCAST1:%[0-9]+]]:_(s64) = G_BITCAST [[TRUNC]]2(<2 x s32>)
+ ; CHECK: [[BITCAST1:%[0-9]+]]:_(s64) = G_BITCAST [[BITCAST]](<2 x s32>)
; CHECK: %x0 = COPY [[BITCAST1]](s64)
- ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]]0(s32)
- ; CHECK: %w0 = COPY [[TRUNC]]4(s32)
+ ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SELECT3]](s32)
+ ; CHECK: %w0 = COPY [[BITCAST2]](s32)
; CHECK: [[BITCAST3:%[0-9]+]]:_(<4 x s8>) = G_BITCAST [[COPY]](s64)
- ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]]5(<4 x s8>)
+ ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST3]](<4 x s8>)
; CHECK: %w0 = COPY [[BITCAST4]](s32)
; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY]](s64)
- ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]]6(<2 x s16>)
+ ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST5]](<2 x s16>)
; CHECK: %w0 = COPY [[BITCAST6]](s32)
bb.0.entry:
liveins: %x0, %x1, %x2, %x3
; CHECK-LABEL: name: testExtOfCopyOfTrunc
; CHECK: liveins: %x0
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0
- ; CHECK: [[COPY1:%[0-9]+]]:_(s1) = COPY %1:_(s1)
- ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
- ; CHECK: %x0 = COPY [[COPY2]](s64)
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+ ; CHECK: %x0 = COPY [[COPY1]](s64)
; CHECK: RET_ReallyLR implicit %x0
%0(s64) = COPY %x0
%1(s1) = G_TRUNC %0
RET_ReallyLR implicit %x0
...
+---
+name: testExtOf2CopyOfTrunc
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _}
+ - { id: 1, class: _}
+ - { id: 2, class: _}
+ - { id: 3, class: _}
+body: |
+ bb.1:
+ liveins: %x0
+ ; CHECK-LABEL: name: testExtOf2CopyOfTrunc
+ ; CHECK: liveins: %x0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %x0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+ ; CHECK: %x0 = COPY [[COPY1]](s64)
+ ; CHECK: RET_ReallyLR implicit %x0
+ %0(s64) = COPY %x0
+ %1(s1) = G_TRUNC %0
+ %2(s1) = COPY %1
+ %4:_(s1) = COPY %2
+ %3(s64) = G_ANYEXT %4
+ %x0 = COPY %3
+ RET_ReallyLR implicit %x0
+
+...