[TwoAddressInstructionPass] Put all new instructions into DistanceMap

author Guozhi Wei <carrot@google.com>

Thu, 28 Oct 2021 18:08:36 +0000 (11:08 -0700)

committer Guozhi Wei <carrot@google.com>

Thu, 28 Oct 2021 18:11:59 +0000 (11:11 -0700)
author Guozhi Wei <carrot@google.com>
Thu, 28 Oct 2021 18:08:36 +0000 (11:08 -0700)
committer Guozhi Wei <carrot@google.com>
Thu, 28 Oct 2021 18:11:59 +0000 (11:11 -0700)
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp

index 461d25e..fdd2bc6 100644 (file)
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -134,7 +134,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
  
    bool convertInstTo3Addr(MachineBasicBlock::iterator &mi,
                            MachineBasicBlock::iterator &nmi, Register RegA,
-                          Register RegB, unsigned Dist);
+                          Register RegB, unsigned &Dist);
  
    bool isDefTooClose(Register Reg, unsigned Dist, MachineInstr *MI);
  
@@ -146,7 +146,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
    bool tryInstructionTransform(MachineBasicBlock::iterator &mi,
                                 MachineBasicBlock::iterator &nmi,
                                 unsigned SrcIdx, unsigned DstIdx,
-                               unsigned Dist, bool shouldOnlyCommute);
+                               unsigned &Dist, bool shouldOnlyCommute);
  
    bool tryInstructionCommute(MachineInstr *MI,
                               unsigned DstOpIdx,
@@ -674,7 +674,7 @@ bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA,
  /// Return true if this transformation was successful.
  bool TwoAddressInstructionPass::convertInstTo3Addr(
      MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
-    Register RegA, Register RegB, unsigned Dist) {
+    Register RegA, Register RegB, unsigned &Dist) {
    MachineInstrSpan MIS(mi, MBB);
    MachineInstr *NewMI = TII->convertToThreeAddress(*mi, LV);
    if (!NewMI)
@@ -723,7 +723,9 @@ bool TwoAddressInstructionPass::convertInstTo3Addr(
    if (LIS && !SingleInst)
      LIS->repairIntervalsInRange(MBB, MIS.begin(), MIS.end(), OrigRegs);
  
-  DistanceMap.insert(std::make_pair(NewMI, Dist));
+  for (MachineInstr &MI : MIS)
+    DistanceMap.insert(std::make_pair(&MI, Dist++));
+  Dist--;
    mi = NewMI;
    nmi = std::next(mi);
  
@@ -1226,7 +1228,7 @@ bool TwoAddressInstructionPass::
  tryInstructionTransform(MachineBasicBlock::iterator &mi,
                          MachineBasicBlock::iterator &nmi,
                          unsigned SrcIdx, unsigned DstIdx,
-                        unsigned Dist, bool shouldOnlyCommute) {
+                        unsigned &Dist, bool shouldOnlyCommute) {
    if (OptLevel == CodeGenOpt::None)
      return false;
  
@@ -1334,6 +1336,8 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
          // look "normal" to the transformation logic.
          MBB->insert(mi, NewMIs[0]);
          MBB->insert(mi, NewMIs[1]);
+        DistanceMap.insert(std::make_pair(NewMIs[0], Dist++));
+        DistanceMap.insert(std::make_pair(NewMIs[1], Dist));
  
          LLVM_DEBUG(dbgs() << "2addr:    NEW LOAD: " << *NewMIs[0]
                            << "2addr:    NEW INST: " << *NewMIs[1]);
@@ -1389,6 +1393,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
            }
  
            MI.eraseFromParent();
+          DistanceMap.erase(&MI);
  
            // Update LiveIntervals.
            if (LIS) {
@@ -1405,6 +1410,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
            LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
            NewMIs[0]->eraseFromParent();
            NewMIs[1]->eraseFromParent();
+          DistanceMap.erase(NewMIs[0]);
+          DistanceMap.erase(NewMIs[1]);
+          Dist--;
          }
        }
      }
diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll

index 0db0d4e..8b6a7f6 100644 (file)
--- a/llvm/test/CodeGen/X86/addcarry.ll
+++ b/llvm/test/CodeGen/X86/addcarry.ll
@@ -179,7 +179,7 @@ define i8 @e(i32* nocapture %a, i32 %b) nounwind {
  ; CHECK-NEXT:    leal (%rsi,%rcx), %edx
  ; CHECK-NEXT:    addl %esi, %edx
  ; CHECK-NEXT:    setb %al
-; CHECK-NEXT:    addl %esi, %ecx
+; CHECK-NEXT:    addl %ecx, %esi
  ; CHECK-NEXT:    movl %edx, (%rdi)
  ; CHECK-NEXT:    adcb $0, %al
  ; CHECK-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/distancemap.mir b/llvm/test/CodeGen/X86/distancemap.mir

new file mode 100644 (file)

index 0000000..b389a0c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/distancemap.mir
@@ -0,0 +1,95 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -run-pass=twoaddressinstruction -verify-machineinstrs | FileCheck %s
+
+# In TwoAddressInstructionPass, new instructions should be added to DistanceMap.
+# In this case, function convertInstTo3Addr is called on the first ADD
+# instruction, extra COPY instructions are generated. If they are not added to
+# DistanceMap, function noUseAfterLastDef computes a wrong value for the later
+# ADD instruction, and a different commute decision is made.
+
+--- |
+  ; ModuleID = 't.ll'
+  source_filename = "t.ll"
+  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+  target triple = "x86_64-unknown-linux"
+
+  declare i16 @llvm.sadd.sat.i16(i16, i16)
+
+  define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounwind {
+    %a = mul i16 %y, %z
+    %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %a)
+    ret i16 %tmp
+  }
+
+...
+---
+name:            func16
+alignment:       16
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+  - { id: 1, class: gr32 }
+  - { id: 2, class: gr32 }
+  - { id: 3, class: gr16 }
+  - { id: 4, class: gr32 }
+  - { id: 5, class: gr16 }
+  - { id: 6, class: gr16 }
+  - { id: 7, class: gr32 }
+  - { id: 8, class: gr32 }
+  - { id: 9, class: gr32 }
+  - { id: 10, class: gr16 }
+  - { id: 11, class: gr32 }
+  - { id: 13, class: gr32 }
+  - { id: 14, class: gr16 }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.0 :
+    liveins: $edi, $esi, $edx
+    ; CHECK-LABEL: name: func16
+    ; CHECK: liveins: $edi, $esi, $edx
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY killed $edx
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY killed $esi
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY killed $edi
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr16 = COPY killed [[COPY2]].sub_16bit
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr32 = COPY [[COPY1]]
+    ; CHECK-NEXT: [[IMUL32rr:%[0-9]+]]:gr32 = IMUL32rr [[IMUL32rr]], killed [[COPY]], implicit-def dead $eflags
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY killed [[IMUL32rr]].sub_16bit
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:gr64_nosp = IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF]].sub_16bit:gr64_nosp = COPY [[COPY3]]
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:gr64_nosp = IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF1]].sub_16bit:gr64_nosp = COPY [[COPY4]]
+    ; CHECK-NEXT: [[LEA64_32r:%[0-9]+]]:gr32 = LEA64_32r killed [[DEF]], 1, killed [[DEF1]], 0, $noreg
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr16 = COPY killed [[LEA64_32r]].sub_16bit
+    ; CHECK-NEXT: [[MOVSX32rr16_:%[0-9]+]]:gr32 = MOVSX32rr16 killed [[COPY5]]
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr32 = COPY [[MOVSX32rr16_]]
+    ; CHECK-NEXT: [[SAR32ri:%[0-9]+]]:gr32 = SAR32ri [[SAR32ri]], 15, implicit-def dead $eflags
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr32 = COPY [[SAR32ri]]
+    ; CHECK-NEXT: [[XOR32ri:%[0-9]+]]:gr32 = XOR32ri [[XOR32ri]], -32768, implicit-def dead $eflags
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr16 = COPY [[COPY3]]
+    ; CHECK-NEXT: [[ADD16rr:%[0-9]+]]:gr16 = ADD16rr [[ADD16rr]], killed [[COPY4]], implicit-def $eflags
+    ; CHECK-NEXT: undef %11.sub_16bit:gr32 = COPY killed [[ADD16rr]]
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr32 = COPY [[XOR32ri]]
+    ; CHECK-NEXT: [[CMOV32rr:%[0-9]+]]:gr32 = CMOV32rr [[CMOV32rr]], killed %11, 1, implicit killed $eflags
+    ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr16 = COPY killed [[CMOV32rr]].sub_16bit
+    ; CHECK-NEXT: $ax = COPY killed [[COPY6]]
+    ; CHECK-NEXT: RET 0, killed $ax
+    %2:gr32 = COPY killed $edx
+    %1:gr32 = COPY killed $esi
+    %0:gr32 = COPY killed $edi
+    %3:gr16 = COPY killed %0.sub_16bit:gr32
+    %4:gr32 = IMUL32rr killed %1:gr32, killed %2:gr32, implicit-def dead $eflags
+    %5:gr16 = COPY killed %4.sub_16bit:gr32
+    %6:gr16 = ADD16rr %3:gr16, %5:gr16, implicit-def dead $eflags
+    %7:gr32 = MOVSX32rr16 killed %6:gr16
+    %8:gr32 = SAR32ri killed %7:gr32, 15, implicit-def dead $eflags
+    %9:gr32 = XOR32ri killed %8:gr32, -32768, implicit-def dead $eflags
+    %10:gr16 = ADD16rr killed %3:gr16, killed %5:gr16, implicit-def $eflags
+    %11:gr32 = INSERT_SUBREG undef %12:gr32, killed %10:gr16, %subreg.sub_16bit
+    %13:gr32 = CMOV32rr killed %11:gr32, killed %9:gr32, 0, implicit killed $eflags
+    %14:gr16 = COPY killed %13.sub_16bit:gr32
+    $ax = COPY killed %14:gr16
+    RET 0, killed $ax
+
+...
diff --git a/llvm/test/CodeGen/X86/sadd_sat_plus.ll b/llvm/test/CodeGen/X86/sadd_sat_plus.ll

index 6deab30..06799bd 100644 (file)
--- a/llvm/test/CodeGen/X86/sadd_sat_plus.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_plus.ll
@@ -95,8 +95,8 @@ define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounw
  ; X64-NEXT:    cwtl
  ; X64-NEXT:    sarl $15, %eax
  ; X64-NEXT:    xorl $-32768, %eax # imm = 0x8000
-; X64-NEXT:    addw %di, %si
-; X64-NEXT:    cmovnol %esi, %eax
+; X64-NEXT:    addw %si, %di
+; X64-NEXT:    cmovnol %edi, %eax
  ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
  ; X64-NEXT:    retq
    %a = mul i16 %y, %z
@@ -131,8 +131,8 @@ define signext i8 @func8(i8 signext %x, i8 signext %y, i8 signext %z) nounwind {
  ; X64-NEXT:    leal (%rdi,%rax), %ecx
  ; X64-NEXT:    sarb $7, %cl
  ; X64-NEXT:    xorb $-128, %cl
-; X64-NEXT:    addb %dil, %al
-; X64-NEXT:    movzbl %al, %edx
+; X64-NEXT:    addb %al, %dil
+; X64-NEXT:    movzbl %dil, %edx
  ; X64-NEXT:    movzbl %cl, %eax
  ; X64-NEXT:    cmovnol %edx, %eax
  ; X64-NEXT:    # kill: def $al killed $al killed $eax
diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll

index 334c7fb..e83209a 100644 (file)
--- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll
@@ -434,8 +434,8 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
  ; SSE-NEXT:    leal (%rax,%rcx), %esi
  ; SSE-NEXT:    sarb $7, %sil
  ; SSE-NEXT:    xorb $-128, %sil
-; SSE-NEXT:    addb %al, %cl
-; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    addb %cl, %al
+; SSE-NEXT:    movzbl %al, %eax
  ; SSE-NEXT:    movzbl %sil, %ecx
  ; SSE-NEXT:    cmovnol %eax, %ecx
  ; SSE-NEXT:    movb %cl, (%rdx)
@@ -448,8 +448,8 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
  ; AVX-NEXT:    leal (%rax,%rcx), %esi
  ; AVX-NEXT:    sarb $7, %sil
  ; AVX-NEXT:    xorb $-128, %sil
-; AVX-NEXT:    addb %al, %cl
-; AVX-NEXT:    movzbl %cl, %eax
+; AVX-NEXT:    addb %cl, %al
+; AVX-NEXT:    movzbl %al, %eax
  ; AVX-NEXT:    movzbl %sil, %ecx
  ; AVX-NEXT:    cmovnol %eax, %ecx
  ; AVX-NEXT:    movb %cl, (%rdx)
@@ -470,9 +470,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind {
  ; SSE-NEXT:    movswl %si, %esi
  ; SSE-NEXT:    sarl $15, %esi
  ; SSE-NEXT:    xorl $-32768, %esi # imm = 0x8000
-; SSE-NEXT:    addw %ax, %cx
-; SSE-NEXT:    cmovol %esi, %ecx
-; SSE-NEXT:    movw %cx, (%rdx)
+; SSE-NEXT:    addw %cx, %ax
+; SSE-NEXT:    cmovol %esi, %eax
+; SSE-NEXT:    movw %ax, (%rdx)
  ; SSE-NEXT:    retq
  ;
  ; AVX-LABEL: v1i16:
@@ -483,9 +483,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind {
  ; AVX-NEXT:    movswl %si, %esi
  ; AVX-NEXT:    sarl $15, %esi
  ; AVX-NEXT:    xorl $-32768, %esi # imm = 0x8000
-; AVX-NEXT:    addw %ax, %cx
-; AVX-NEXT:    cmovol %esi, %ecx
-; AVX-NEXT:    movw %cx, (%rdx)
+; AVX-NEXT:    addw %cx, %ax
+; AVX-NEXT:    cmovol %esi, %eax
+; AVX-NEXT:    movw %ax, (%rdx)
  ; AVX-NEXT:    retq
    %x = load <1 x i16>, <1 x i16>* %px
    %y = load <1 x i16>, <1 x i16>* %py
author	Guozhi Wei <carrot@google.com>
	Thu, 28 Oct 2021 18:08:36 +0000 (11:08 -0700)
committer	Guozhi Wei <carrot@google.com>
	Thu, 28 Oct 2021 18:11:59 +0000 (11:11 -0700)
llvm/lib/CodeGen/TwoAddressInstructionPass.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/addcarry.ll		patch \| blob \| history
llvm/test/CodeGen/X86/distancemap.mir	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/X86/sadd_sat_plus.ll		patch \| blob \| history
llvm/test/CodeGen/X86/sadd_sat_vec.ll		patch \| blob \| history