From 1e46dcb77b51846c7ea97cba7fd95bc1f0911a09 Mon Sep 17 00:00:00 2001
From: Guozhi Wei <carrot@google.com>
Date: Thu, 28 Oct 2021 11:08:36 -0700
Subject: [PATCH] [TwoAddressInstructionPass] Put all new instructions into
 DistanceMap

In function convertInstTo3Addr, after converting a two address instruction into
three address instruction, only the last new instruction is inserted into
DistanceMap. This is wrong, DistanceMap should track all instructions from the
beginning of current MBB to the working instruction. When a two address
instruction is converted to three address instruction, multiple instructions may
be generated (usually an extra COPY is generated), all of them should be
inserted into DistanceMap.

Similarly when unfolding memory operand in function tryInstructionTransform
DistanceMap is not maintained correctly.

Differential Revision: https://reviews.llvm.org/D111857
---
 llvm/lib/CodeGen/TwoAddressInstructionPass.cpp | 18 +++--
 llvm/test/CodeGen/X86/addcarry.ll              |  2 +-
 llvm/test/CodeGen/X86/distancemap.mir          | 95 ++++++++++++++++++++++++++
 llvm/test/CodeGen/X86/sadd_sat_plus.ll         |  8 +--
 llvm/test/CodeGen/X86/sadd_sat_vec.ll          | 20 +++---
 5 files changed, 123 insertions(+), 20 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/distancemap.mir

diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 461d25e..fdd2bc6 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -134,7 +134,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
 
   bool convertInstTo3Addr(MachineBasicBlock::iterator &mi,
                           MachineBasicBlock::iterator &nmi, Register RegA,
-                          Register RegB, unsigned Dist);
+                          Register RegB, unsigned &Dist);
 
   bool isDefTooClose(Register Reg, unsigned Dist, MachineInstr *MI);
 
@@ -146,7 +146,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   bool tryInstructionTransform(MachineBasicBlock::iterator &mi,
                                MachineBasicBlock::iterator &nmi,
                                unsigned SrcIdx, unsigned DstIdx,
-                               unsigned Dist, bool shouldOnlyCommute);
+                               unsigned &Dist, bool shouldOnlyCommute);
 
   bool tryInstructionCommute(MachineInstr *MI,
                              unsigned DstOpIdx,
@@ -674,7 +674,7 @@ bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA,
 /// Return true if this transformation was successful.
 bool TwoAddressInstructionPass::convertInstTo3Addr(
     MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
-    Register RegA, Register RegB, unsigned Dist) {
+    Register RegA, Register RegB, unsigned &Dist) {
   MachineInstrSpan MIS(mi, MBB);
   MachineInstr *NewMI = TII->convertToThreeAddress(*mi, LV);
   if (!NewMI)
@@ -723,7 +723,9 @@ bool TwoAddressInstructionPass::convertInstTo3Addr(
   if (LIS && !SingleInst)
     LIS->repairIntervalsInRange(MBB, MIS.begin(), MIS.end(), OrigRegs);
 
-  DistanceMap.insert(std::make_pair(NewMI, Dist));
+  for (MachineInstr &MI : MIS)
+    DistanceMap.insert(std::make_pair(&MI, Dist++));
+  Dist--;
   mi = NewMI;
   nmi = std::next(mi);
 
@@ -1226,7 +1228,7 @@ bool TwoAddressInstructionPass::
 tryInstructionTransform(MachineBasicBlock::iterator &mi,
                         MachineBasicBlock::iterator &nmi,
                         unsigned SrcIdx, unsigned DstIdx,
-                        unsigned Dist, bool shouldOnlyCommute) {
+                        unsigned &Dist, bool shouldOnlyCommute) {
   if (OptLevel == CodeGenOpt::None)
     return false;
 
@@ -1334,6 +1336,8 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
         // look "normal" to the transformation logic.
         MBB->insert(mi, NewMIs[0]);
         MBB->insert(mi, NewMIs[1]);
+        DistanceMap.insert(std::make_pair(NewMIs[0], Dist++));
+        DistanceMap.insert(std::make_pair(NewMIs[1], Dist));
 
         LLVM_DEBUG(dbgs() << "2addr:    NEW LOAD: " << *NewMIs[0]
                           << "2addr:    NEW INST: " << *NewMIs[1]);
@@ -1389,6 +1393,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
           }
 
           MI.eraseFromParent();
+          DistanceMap.erase(&MI);
 
           // Update LiveIntervals.
           if (LIS) {
@@ -1405,6 +1410,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
           LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
           NewMIs[0]->eraseFromParent();
           NewMIs[1]->eraseFromParent();
+          DistanceMap.erase(NewMIs[0]);
+          DistanceMap.erase(NewMIs[1]);
+          Dist--;
         }
       }
     }
diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll
index 0db0d4e..8b6a7f6 100644
--- a/llvm/test/CodeGen/X86/addcarry.ll
+++ b/llvm/test/CodeGen/X86/addcarry.ll
@@ -179,7 +179,7 @@ define i8 @e(i32* nocapture %a, i32 %b) nounwind {
 ; CHECK-NEXT:    leal (%rsi,%rcx), %edx
 ; CHECK-NEXT:    addl %esi, %edx
 ; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    addl %esi, %ecx
+; CHECK-NEXT:    addl %ecx, %esi
 ; CHECK-NEXT:    movl %edx, (%rdi)
 ; CHECK-NEXT:    adcb $0, %al
 ; CHECK-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/distancemap.mir b/llvm/test/CodeGen/X86/distancemap.mir
new file mode 100644
index 0000000..b389a0c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/distancemap.mir
@@ -0,0 +1,95 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -run-pass=twoaddressinstruction -verify-machineinstrs | FileCheck %s
+
+# In TwoAddressInstructionPass, new instructions should be added to DistanceMap.
+# In this case, function convertInstTo3Addr is called on the first ADD
+# instruction, extra COPY instructions are generated. If they are not added to
+# DistanceMap, function noUseAfterLastDef computes a wrong value for the later
+# ADD instruction, and a different commute decision is made.
+
+--- |
+  ; ModuleID = 't.ll'
+  source_filename = "t.ll"
+  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+  target triple = "x86_64-unknown-linux"
+
+  declare i16 @llvm.sadd.sat.i16(i16, i16)
+
+  define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounwind {
+    %a = mul i16 %y, %z
+    %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %a)
+    ret i16 %tmp
+  }
+
+...
+---
+name:            func16
+alignment:       16
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+  - { id: 1, class: gr32 }
+  - { id: 2, class: gr32 }
+  - { id: 3, class: gr16 }
+  - { id: 4, class: gr32 }
+  - { id: 5, class: gr16 }
+  - { id: 6, class: gr16 }
+  - { id: 7, class: gr32 }
+  - { id: 8, class: gr32 }
+  - { id: 9, class: gr32 }
+  - { id: 10, class: gr16 }
+  - { id: 11, class: gr32 }
+  - { id: 13, class: gr32 }
+  - { id: 14, class: gr16 }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.0 :
+    liveins: $edi, $esi, $edx
+    ; CHECK-LABEL: name: func16
+    ; CHECK: liveins: $edi, $esi, $edx
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY killed $edx
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY killed $esi
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY killed $edi
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr16 = COPY killed [[COPY2]].sub_16bit
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr32 = COPY [[COPY1]]
+    ; CHECK-NEXT: [[IMUL32rr:%[0-9]+]]:gr32 = IMUL32rr [[IMUL32rr]], killed [[COPY]], implicit-def dead $eflags
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY killed [[IMUL32rr]].sub_16bit
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:gr64_nosp = IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF]].sub_16bit:gr64_nosp = COPY [[COPY3]]
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:gr64_nosp = IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF1]].sub_16bit:gr64_nosp = COPY [[COPY4]]
+    ; CHECK-NEXT: [[LEA64_32r:%[0-9]+]]:gr32 = LEA64_32r killed [[DEF]], 1, killed [[DEF1]], 0, $noreg
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr16 = COPY killed [[LEA64_32r]].sub_16bit
+    ; CHECK-NEXT: [[MOVSX32rr16_:%[0-9]+]]:gr32 = MOVSX32rr16 killed [[COPY5]]
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr32 = COPY [[MOVSX32rr16_]]
+    ; CHECK-NEXT: [[SAR32ri:%[0-9]+]]:gr32 = SAR32ri [[SAR32ri]], 15, implicit-def dead $eflags
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr32 = COPY [[SAR32ri]]
+    ; CHECK-NEXT: [[XOR32ri:%[0-9]+]]:gr32 = XOR32ri [[XOR32ri]], -32768, implicit-def dead $eflags
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr16 = COPY [[COPY3]]
+    ; CHECK-NEXT: [[ADD16rr:%[0-9]+]]:gr16 = ADD16rr [[ADD16rr]], killed [[COPY4]], implicit-def $eflags
+    ; CHECK-NEXT: undef %11.sub_16bit:gr32 = COPY killed [[ADD16rr]]
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr32 = COPY [[XOR32ri]]
+    ; CHECK-NEXT: [[CMOV32rr:%[0-9]+]]:gr32 = CMOV32rr [[CMOV32rr]], killed %11, 1, implicit killed $eflags
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr16 = COPY killed [[CMOV32rr]].sub_16bit
+    ; CHECK-NEXT: $ax = COPY killed [[COPY6]]
+    ; CHECK-NEXT: RET 0, killed $ax
+    %2:gr32 = COPY killed $edx
+    %1:gr32 = COPY killed $esi
+    %0:gr32 = COPY killed $edi
+    %3:gr16 = COPY killed %0.sub_16bit:gr32
+    %4:gr32 = IMUL32rr killed %1:gr32, killed %2:gr32, implicit-def dead $eflags
+    %5:gr16 = COPY killed %4.sub_16bit:gr32
+    %6:gr16 = ADD16rr %3:gr16, %5:gr16, implicit-def dead $eflags
+    %7:gr32 = MOVSX32rr16 killed %6:gr16
+    %8:gr32 = SAR32ri killed %7:gr32, 15, implicit-def dead $eflags
+    %9:gr32 = XOR32ri killed %8:gr32, -32768, implicit-def dead $eflags
+    %10:gr16 = ADD16rr killed %3:gr16, killed %5:gr16, implicit-def $eflags
+    %11:gr32 = INSERT_SUBREG undef %12:gr32, killed %10:gr16, %subreg.sub_16bit
+    %13:gr32 = CMOV32rr killed %11:gr32, killed %9:gr32, 0, implicit killed $eflags
+    %14:gr16 = COPY killed %13.sub_16bit:gr32
+    $ax = COPY killed %14:gr16
+    RET 0, killed $ax
+
+...
diff --git a/llvm/test/CodeGen/X86/sadd_sat_plus.ll b/llvm/test/CodeGen/X86/sadd_sat_plus.ll
index 6deab30..06799bd 100644
--- a/llvm/test/CodeGen/X86/sadd_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_plus.ll
@@ -95,8 +95,8 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw
 ; X64-NEXT:    cwtl
 ; X64-NEXT:    sarl $15, %eax
 ; X64-NEXT:    xorl $-32768, %eax # imm = 0x8000
-; X64-NEXT:    addw %di, %si
-; X64-NEXT:    cmovnol %esi, %eax
+; X64-NEXT:    addw %si, %di
+; X64-NEXT:    cmovnol %edi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %a = mul i16 %y, %z
@@ -131,8 +131,8 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
 ; X64-NEXT:    leal (%rdi,%rax), %ecx
 ; X64-NEXT:    sarb $7, %cl
 ; X64-NEXT:    xorb $-128, %cl
-; X64-NEXT:    addb %dil, %al
-; X64-NEXT:    movzbl %al, %edx
+; X64-NEXT:    addb %al, %dil
+; X64-NEXT:    movzbl %dil, %edx
 ; X64-NEXT:    movzbl %cl, %eax
 ; X64-NEXT:    cmovnol %edx, %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
index 334c7fb..e83209a 100644
--- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
@@ -434,8 +434,8 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
 ; SSE-NEXT:    leal (%rax,%rcx), %esi
 ; SSE-NEXT:    sarb $7, %sil
 ; SSE-NEXT:    xorb $-128, %sil
-; SSE-NEXT:    addb %al, %cl
-; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    addb %cl, %al
+; SSE-NEXT:    movzbl %al, %eax
 ; SSE-NEXT:    movzbl %sil, %ecx
 ; SSE-NEXT:    cmovnol %eax, %ecx
 ; SSE-NEXT:    movb %cl, (%rdx)
@@ -448,8 +448,8 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
 ; AVX-NEXT:    leal (%rax,%rcx), %esi
 ; AVX-NEXT:    sarb $7, %sil
 ; AVX-NEXT:    xorb $-128, %sil
-; AVX-NEXT:    addb %al, %cl
-; AVX-NEXT:    movzbl %cl, %eax
+; AVX-NEXT:    addb %cl, %al
+; AVX-NEXT:    movzbl %al, %eax
 ; AVX-NEXT:    movzbl %sil, %ecx
 ; AVX-NEXT:    cmovnol %eax, %ecx
 ; AVX-NEXT:    movb %cl, (%rdx)
@@ -470,9 +470,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind {
 ; SSE-NEXT:    movswl %si, %esi
 ; SSE-NEXT:    sarl $15, %esi
 ; SSE-NEXT:    xorl $-32768, %esi # imm = 0x8000
-; SSE-NEXT:    addw %ax, %cx
-; SSE-NEXT:    cmovol %esi, %ecx
-; SSE-NEXT:    movw %cx, (%rdx)
+; SSE-NEXT:    addw %cx, %ax
+; SSE-NEXT:    cmovol %esi, %eax
+; SSE-NEXT:    movw %ax, (%rdx)
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: v1i16:
@@ -483,9 +483,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind {
 ; AVX-NEXT:    movswl %si, %esi
 ; AVX-NEXT:    sarl $15, %esi
 ; AVX-NEXT:    xorl $-32768, %esi # imm = 0x8000
-; AVX-NEXT:    addw %ax, %cx
-; AVX-NEXT:    cmovol %esi, %ecx
-; AVX-NEXT:    movw %cx, (%rdx)
+; AVX-NEXT:    addw %cx, %ax
+; AVX-NEXT:    cmovol %esi, %eax
+; AVX-NEXT:    movw %ax, (%rdx)
 ; AVX-NEXT:    retq
   %x = load <1 x i16>, <1 x i16>* %px
   %y = load <1 x i16>, <1 x i16>* %py
-- 
2.7.4