From 1e46dcb77b51846c7ea97cba7fd95bc1f0911a09 Mon Sep 17 00:00:00 2001 From: Guozhi Wei Date: Thu, 28 Oct 2021 11:08:36 -0700 Subject: [PATCH] [TwoAddressInstructionPass] Put all new instructions into DistanceMap In function convertInstTo3Addr, after converting a two address instruction into three address instruction, only the last new instruction is inserted into DistanceMap. This is wrong, DistanceMap should track all instructions from the beginning of current MBB to the working instruction. When a two address instruction is converted to three address instruction, multiple instructions may be generated (usually an extra COPY is generated), all of them should be inserted into DistanceMap. Similarly when unfolding memory operand in function tryInstructionTransform DistanceMap is not maintained correctly. Differential Revision: https://reviews.llvm.org/D111857 --- llvm/lib/CodeGen/TwoAddressInstructionPass.cpp | 18 +++-- llvm/test/CodeGen/X86/addcarry.ll | 2 +- llvm/test/CodeGen/X86/distancemap.mir | 95 ++++++++++++++++++++++++++ llvm/test/CodeGen/X86/sadd_sat_plus.ll | 8 +-- llvm/test/CodeGen/X86/sadd_sat_vec.ll | 20 +++--- 5 files changed, 123 insertions(+), 20 deletions(-) create mode 100644 llvm/test/CodeGen/X86/distancemap.mir diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 461d25e..fdd2bc6 100644 --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -134,7 +134,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass { bool convertInstTo3Addr(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, Register RegA, - Register RegB, unsigned Dist); + Register RegB, unsigned &Dist); bool isDefTooClose(Register Reg, unsigned Dist, MachineInstr *MI); @@ -146,7 +146,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass { bool tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned SrcIdx, unsigned DstIdx, - unsigned Dist, bool shouldOnlyCommute); + unsigned &Dist, bool shouldOnlyCommute); bool tryInstructionCommute(MachineInstr *MI, unsigned DstOpIdx, @@ -674,7 +674,7 @@ bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA, /// Return true if this transformation was successful. bool TwoAddressInstructionPass::convertInstTo3Addr( MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, - Register RegA, Register RegB, unsigned Dist) { + Register RegA, Register RegB, unsigned &Dist) { MachineInstrSpan MIS(mi, MBB); MachineInstr *NewMI = TII->convertToThreeAddress(*mi, LV); if (!NewMI) @@ -723,7 +723,9 @@ bool TwoAddressInstructionPass::convertInstTo3Addr( if (LIS && !SingleInst) LIS->repairIntervalsInRange(MBB, MIS.begin(), MIS.end(), OrigRegs); - DistanceMap.insert(std::make_pair(NewMI, Dist)); + for (MachineInstr &MI : MIS) + DistanceMap.insert(std::make_pair(&MI, Dist++)); + Dist--; mi = NewMI; nmi = std::next(mi); @@ -1226,7 +1228,7 @@ bool TwoAddressInstructionPass:: tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned SrcIdx, unsigned DstIdx, - unsigned Dist, bool shouldOnlyCommute) { + unsigned &Dist, bool shouldOnlyCommute) { if (OptLevel == CodeGenOpt::None) return false; @@ -1334,6 +1336,8 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, // look "normal" to the transformation logic. MBB->insert(mi, NewMIs[0]); MBB->insert(mi, NewMIs[1]); + DistanceMap.insert(std::make_pair(NewMIs[0], Dist++)); + DistanceMap.insert(std::make_pair(NewMIs[1], Dist)); LLVM_DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0] << "2addr: NEW INST: " << *NewMIs[1]); @@ -1389,6 +1393,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, } MI.eraseFromParent(); + DistanceMap.erase(&MI); // Update LiveIntervals. if (LIS) { @@ -1405,6 +1410,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); NewMIs[0]->eraseFromParent(); NewMIs[1]->eraseFromParent(); + DistanceMap.erase(NewMIs[0]); + DistanceMap.erase(NewMIs[1]); + Dist--; } } } diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll index 0db0d4e..8b6a7f6 100644 --- a/llvm/test/CodeGen/X86/addcarry.ll +++ b/llvm/test/CodeGen/X86/addcarry.ll @@ -179,7 +179,7 @@ define i8 @e(i32* nocapture %a, i32 %b) nounwind { ; CHECK-NEXT: leal (%rsi,%rcx), %edx ; CHECK-NEXT: addl %esi, %edx ; CHECK-NEXT: setb %al -; CHECK-NEXT: addl %esi, %ecx +; CHECK-NEXT: addl %ecx, %esi ; CHECK-NEXT: movl %edx, (%rdi) ; CHECK-NEXT: adcb $0, %al ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/distancemap.mir b/llvm/test/CodeGen/X86/distancemap.mir new file mode 100644 index 0000000..b389a0c --- /dev/null +++ b/llvm/test/CodeGen/X86/distancemap.mir @@ -0,0 +1,95 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -run-pass=twoaddressinstruction -verify-machineinstrs | FileCheck %s + +# In TwoAddressInstructionPass, new instructions should be added to DistanceMap. +# In this case, function convertInstTo3Addr is called on the first ADD +# instruction, extra COPY instructions are generated. If they are not added to +# DistanceMap, function noUseAfterLastDef computes a wrong value for the later +# ADD instruction, and a different commute decision is made. + +--- | + ; ModuleID = 't.ll' + source_filename = "t.ll" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux" + + declare i16 @llvm.sadd.sat.i16(i16, i16) + + define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounwind { + %a = mul i16 %y, %z + %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %a) + ret i16 %tmp + } + +... +--- +name: func16 +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr32 } + - { id: 1, class: gr32 } + - { id: 2, class: gr32 } + - { id: 3, class: gr16 } + - { id: 4, class: gr32 } + - { id: 5, class: gr16 } + - { id: 6, class: gr16 } + - { id: 7, class: gr32 } + - { id: 8, class: gr32 } + - { id: 9, class: gr32 } + - { id: 10, class: gr16 } + - { id: 11, class: gr32 } + - { id: 13, class: gr32 } + - { id: 14, class: gr16 } +frameInfo: + maxAlignment: 1 +body: | + bb.0 : + liveins: $edi, $esi, $edx + ; CHECK-LABEL: name: func16 + ; CHECK: liveins: $edi, $esi, $edx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY killed $edx + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY killed $esi + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY killed $edi + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr16 = COPY killed [[COPY2]].sub_16bit + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr32 = COPY [[COPY1]] + ; CHECK-NEXT: [[IMUL32rr:%[0-9]+]]:gr32 = IMUL32rr [[IMUL32rr]], killed [[COPY]], implicit-def dead $eflags + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY killed [[IMUL32rr]].sub_16bit + ; CHECK-NEXT: [[DEF:%[0-9]+]]:gr64_nosp = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF]].sub_16bit:gr64_nosp = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:gr64_nosp = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1]].sub_16bit:gr64_nosp = COPY [[COPY4]] + ; CHECK-NEXT: [[LEA64_32r:%[0-9]+]]:gr32 = LEA64_32r killed [[DEF]], 1, killed [[DEF1]], 0, $noreg + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr16 = COPY killed [[LEA64_32r]].sub_16bit + ; CHECK-NEXT: [[MOVSX32rr16_:%[0-9]+]]:gr32 = MOVSX32rr16 killed [[COPY5]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr32 = COPY [[MOVSX32rr16_]] + ; CHECK-NEXT: [[SAR32ri:%[0-9]+]]:gr32 = SAR32ri [[SAR32ri]], 15, implicit-def dead $eflags + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr32 = COPY [[SAR32ri]] + ; CHECK-NEXT: [[XOR32ri:%[0-9]+]]:gr32 = XOR32ri [[XOR32ri]], -32768, implicit-def dead $eflags + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr16 = COPY [[COPY3]] + ; CHECK-NEXT: [[ADD16rr:%[0-9]+]]:gr16 = ADD16rr [[ADD16rr]], killed [[COPY4]], implicit-def $eflags + ; CHECK-NEXT: undef %11.sub_16bit:gr32 = COPY killed [[ADD16rr]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr32 = COPY [[XOR32ri]] + ; CHECK-NEXT: [[CMOV32rr:%[0-9]+]]:gr32 = CMOV32rr [[CMOV32rr]], killed %11, 1, implicit killed $eflags + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr16 = COPY killed [[CMOV32rr]].sub_16bit + ; CHECK-NEXT: $ax = COPY killed [[COPY6]] + ; CHECK-NEXT: RET 0, killed $ax + %2:gr32 = COPY killed $edx + %1:gr32 = COPY killed $esi + %0:gr32 = COPY killed $edi + %3:gr16 = COPY killed %0.sub_16bit:gr32 + %4:gr32 = IMUL32rr killed %1:gr32, killed %2:gr32, implicit-def dead $eflags + %5:gr16 = COPY killed %4.sub_16bit:gr32 + %6:gr16 = ADD16rr %3:gr16, %5:gr16, implicit-def dead $eflags + %7:gr32 = MOVSX32rr16 killed %6:gr16 + %8:gr32 = SAR32ri killed %7:gr32, 15, implicit-def dead $eflags + %9:gr32 = XOR32ri killed %8:gr32, -32768, implicit-def dead $eflags + %10:gr16 = ADD16rr killed %3:gr16, killed %5:gr16, implicit-def $eflags + %11:gr32 = INSERT_SUBREG undef %12:gr32, killed %10:gr16, %subreg.sub_16bit + %13:gr32 = CMOV32rr killed %11:gr32, killed %9:gr32, 0, implicit killed $eflags + %14:gr16 = COPY killed %13.sub_16bit:gr32 + $ax = COPY killed %14:gr16 + RET 0, killed $ax + +... diff --git a/llvm/test/CodeGen/X86/sadd_sat_plus.ll b/llvm/test/CodeGen/X86/sadd_sat_plus.ll index 6deab30..06799bd 100644 --- a/llvm/test/CodeGen/X86/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_plus.ll @@ -95,8 +95,8 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw ; X64-NEXT: cwtl ; X64-NEXT: sarl $15, %eax ; X64-NEXT: xorl $-32768, %eax # imm = 0x8000 -; X64-NEXT: addw %di, %si -; X64-NEXT: cmovnol %esi, %eax +; X64-NEXT: addw %si, %di +; X64-NEXT: cmovnol %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %a = mul i16 %y, %z @@ -131,8 +131,8 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind { ; X64-NEXT: leal (%rdi,%rax), %ecx ; X64-NEXT: sarb $7, %cl ; X64-NEXT: xorb $-128, %cl -; X64-NEXT: addb %dil, %al -; X64-NEXT: movzbl %al, %edx +; X64-NEXT: addb %al, %dil +; X64-NEXT: movzbl %dil, %edx ; X64-NEXT: movzbl %cl, %eax ; X64-NEXT: cmovnol %edx, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll index 334c7fb..e83209a 100644 --- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll @@ -434,8 +434,8 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; SSE-NEXT: leal (%rax,%rcx), %esi ; SSE-NEXT: sarb $7, %sil ; SSE-NEXT: xorb $-128, %sil -; SSE-NEXT: addb %al, %cl -; SSE-NEXT: movzbl %cl, %eax +; SSE-NEXT: addb %cl, %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: movzbl %sil, %ecx ; SSE-NEXT: cmovnol %eax, %ecx ; SSE-NEXT: movb %cl, (%rdx) @@ -448,8 +448,8 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; AVX-NEXT: leal (%rax,%rcx), %esi ; AVX-NEXT: sarb $7, %sil ; AVX-NEXT: xorb $-128, %sil -; AVX-NEXT: addb %al, %cl -; AVX-NEXT: movzbl %cl, %eax +; AVX-NEXT: addb %cl, %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: movzbl %sil, %ecx ; AVX-NEXT: cmovnol %eax, %ecx ; AVX-NEXT: movb %cl, (%rdx) @@ -470,9 +470,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; SSE-NEXT: movswl %si, %esi ; SSE-NEXT: sarl $15, %esi ; SSE-NEXT: xorl $-32768, %esi # imm = 0x8000 -; SSE-NEXT: addw %ax, %cx -; SSE-NEXT: cmovol %esi, %ecx -; SSE-NEXT: movw %cx, (%rdx) +; SSE-NEXT: addw %cx, %ax +; SSE-NEXT: cmovol %esi, %eax +; SSE-NEXT: movw %ax, (%rdx) ; SSE-NEXT: retq ; ; AVX-LABEL: v1i16: @@ -483,9 +483,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; AVX-NEXT: movswl %si, %esi ; AVX-NEXT: sarl $15, %esi ; AVX-NEXT: xorl $-32768, %esi # imm = 0x8000 -; AVX-NEXT: addw %ax, %cx -; AVX-NEXT: cmovol %esi, %ecx -; AVX-NEXT: movw %cx, (%rdx) +; AVX-NEXT: addw %cx, %ax +; AVX-NEXT: cmovol %esi, %eax +; AVX-NEXT: movw %ax, (%rdx) ; AVX-NEXT: retq %x = load <1 x i16>, <1 x i16>* %px %y = load <1 x i16>, <1 x i16>* %py -- 2.7.4