[BPF] Fix a bug in peephole optimization

author Yonghong Song <yhs@fb.com>

Wed, 20 Nov 2019 17:52:29 +0000 (09:52 -0800)

committer Yonghong Song <yhs@fb.com>

Wed, 20 Nov 2019 23:19:59 +0000 (15:19 -0800)
author Yonghong Song <yhs@fb.com>
Wed, 20 Nov 2019 17:52:29 +0000 (09:52 -0800)
committer Yonghong Song <yhs@fb.com>
Wed, 20 Nov 2019 23:19:59 +0000 (15:19 -0800)
diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp

index e9eecc5..c0f99fa 100644 (file)
--- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
@@ -51,6 +51,9 @@ private:
    // Initialize class variables.
    void initialize(MachineFunction &MFParm);
  
+  bool isCopyFrom32Def(MachineInstr *CopyMI);
+  bool isInsnFrom32Def(MachineInstr *DefInsn);
+  bool isPhiFrom32Def(MachineInstr *MovMI);
    bool isMovFrom32Def(MachineInstr *MovMI);
    bool eliminateZExtSeq(void);
  
@@ -75,42 +78,77 @@ void BPFMIPeephole::initialize(MachineFunction &MFParm) {
    LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
  }
  
-bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
+bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI)
  {
-  MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());
+  MachineOperand &opnd = CopyMI->getOperand(1);
  
-  LLVM_DEBUG(dbgs() << "  Def of Mov Src:");
-  LLVM_DEBUG(DefInsn->dump());
+  if (!opnd.isReg())
+    return false;
  
-  if (!DefInsn)
+  // Return false if getting value from a 32bit physical register.
+  // Most likely, this physical register is aliased to
+  // function call return value or current function parameters.
+  Register Reg = opnd.getReg();
+  if (!Register::isVirtualRegister(Reg))
      return false;
  
-  if (DefInsn->isPHI()) {
-    for (unsigned i = 1, e = DefInsn->getNumOperands(); i < e; i += 2) {
-      MachineOperand &opnd = DefInsn->getOperand(i);
+  if (MRI->getRegClass(Reg) == &BPF::GPRRegClass)
+    return false;
  
-      if (!opnd.isReg())
-        return false;
+  MachineInstr *DefInsn = MRI->getVRegDef(Reg);
+  if (!isInsnFrom32Def(DefInsn))
+    return false;
  
-      MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
-      // quick check on PHI incoming definitions.
-      if (!PhiDef || PhiDef->isPHI() || PhiDef->getOpcode() == BPF::COPY)
-        return false;
-    }
-  }
+  return true;
+}
  
-  if (DefInsn->getOpcode() == BPF::COPY) {
-    MachineOperand &opnd = DefInsn->getOperand(1);
+bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)
+{
+  for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) {
+    MachineOperand &opnd = PhiMI->getOperand(i);
  
      if (!opnd.isReg())
        return false;
  
-    Register Reg = opnd.getReg();
-    if ((Register::isVirtualRegister(Reg) &&
-         MRI->getRegClass(Reg) == &BPF::GPRRegClass))
+    MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
+    if (!PhiDef)
+      return false;
+    if (PhiDef->isPHI() && !isPhiFrom32Def(PhiDef))
+      return false;
+    if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef))
+      return false;
+  }
+
+  return true;
+}
+
+// The \p DefInsn instruction defines a virtual register.
+bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)
+{
+  if (!DefInsn)
+    return false;
+
+  if (DefInsn->isPHI()) {
+    if (!isPhiFrom32Def(DefInsn))
+      return false;
+  } else if (DefInsn->getOpcode() == BPF::COPY) {
+    if (!isCopyFrom32Def(DefInsn))
        return false;
    }
  
+  return true;
+}
+
+bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
+{
+  MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());
+
+  LLVM_DEBUG(dbgs() << "  Def of Mov Src:");
+  LLVM_DEBUG(DefInsn->dump());
+
+  if (!isInsnFrom32Def(DefInsn))
+    return false;
+
    LLVM_DEBUG(dbgs() << "  One ZExt elim sequence identified.\n");
  
    return true;
diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll

index a48141c..160be56 100644 (file)
--- a/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll
+++ b/llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll
@@ -55,6 +55,9 @@ entry:
    %c.d = select i1 %cmp, i32 %c, i32 %d
    ret i32 %c.d
  }
+; CHECK-LABEL: select_cc_32
+; CHECK: r{{[0-9]+}} <<= 32
+; CHECK-NEXT: r{{[0-9]+}} >>= 32
  
  ; Function Attrs: norecurse nounwind readnone
  define dso_local i64 @select_cc_32_64(i32 %a, i32 %b, i64 %c, i64 %d) local_unnamed_addr #0 {
@@ -63,6 +66,9 @@ entry:
    %c.d = select i1 %cmp, i64 %c, i64 %d
    ret i64 %c.d
  }
+; CHECK-LABEL: select_cc_32_64
+; CHECK: r{{[0-9]+}} <<= 32
+; CHECK-NEXT: r{{[0-9]+}} >>= 32
  
  ; Function Attrs: norecurse nounwind readnone
  define dso_local i32 @select_cc_64_32(i64 %a, i64 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
@@ -71,6 +77,8 @@ entry:
    %c.d = select i1 %cmp, i32 %c, i32 %d
    ret i32 %c.d
  }
+; CHECK-LABEL: select_cc_64_32
+; CHECK-NOT: r{{[0-9]+}} <<= 32
  
  ; Function Attrs: norecurse nounwind readnone
  define dso_local i32 @selecti_cc_32(i32 %a, i32 %c, i32 %d) local_unnamed_addr #0 {
@@ -79,6 +87,9 @@ entry:
    %c.d = select i1 %cmp, i32 %c, i32 %d
    ret i32 %c.d
  }
+; CHECK-LABEL: selecti_cc_32
+; CHECK: r{{[0-9]+}} <<= 32
+; CHECK-NEXT: r{{[0-9]+}} >>= 32
  
  ; Function Attrs: norecurse nounwind readnone
  define dso_local i64 @selecti_cc_32_64(i32 %a, i64 %c, i64 %d) local_unnamed_addr #0 {
@@ -87,6 +98,9 @@ entry:
    %c.d = select i1 %cmp, i64 %c, i64 %d
    ret i64 %c.d
  }
+; CHECK-LABEL: selecti_cc_32_64
+; CHECK: r{{[0-9]+}} <<= 32
+; CHECK-NEXT: r{{[0-9]+}} >>= 32
  
  ; Function Attrs: norecurse nounwind readnone
  define dso_local i32 @selecti_cc_64_32(i64 %a, i32 %c, i32 %d) local_unnamed_addr #0 {
@@ -95,6 +109,5 @@ entry:
    %c.d = select i1 %cmp, i32 %c, i32 %d
    ret i32 %c.d
  }
-; There shouldn't be any type promotion, all of them are expected to be
-; eliminated by peephole optimization.
+; CHECK-LABEL: selecti_cc_64_32
  ; CHECK-NOT: r{{[0-9]+}} <<= 32
diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll

new file mode 100644 (file)

index 0000000..5a72f59
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll
@@ -0,0 +1,34 @@
+; RUN: llc -O2 -march=bpfel -mcpu=v2 -mattr=+alu32 < %s | FileCheck %s
+;
+; For the below test case, 'b' in 'ret == b' needs SLL/SLR.
+; 'ret' in 'ret == b' does not need SLL/SLR as all 'ret' values
+; are assigned through 'w<reg> = <value>' alu32 operations.
+;
+; extern int helper(int);
+; int test(int a, int b, int c, int d) {
+;   int ret;
+;   if (a < b)
+;     ret = (c < d) ? -1 : 0;
+;   else
+;     ret = (c < a) ? 1 : 2;
+;   return helper(ret == b);
+; }
+
+define dso_local i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr {
+entry:
+  %cmp = icmp slt i32 %a, %b
+  %cmp1 = icmp slt i32 %c, %d
+  %cond = sext i1 %cmp1 to i32
+  %cmp2 = icmp slt i32 %c, %a
+  %cond3 = select i1 %cmp2, i32 1, i32 2
+  %ret.0 = select i1 %cmp, i32 %cond, i32 %cond3
+  %cmp4 = icmp eq i32 %ret.0, %b
+  %conv = zext i1 %cmp4 to i32
+  %call = tail call i32 @helper(i32 %conv)
+  ret i32 %call
+}
+; CHECK: r{{[0-9]+}} >>= 32
+; CHECK-NOT: r{{[0-9]+}} >>= 32
+; CHECK: if r{{[0-9]+}} == r{{[0-9]+}} goto
+
+declare dso_local i32 @helper(i32) local_unnamed_addr
diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll

new file mode 100644 (file)

index 0000000..46a1b23
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll
@@ -0,0 +1,34 @@
+; RUN: llc -O2 -march=bpfel -mcpu=v2 -mattr=+alu32 < %s | FileCheck %s
+;
+; For the below test case, both 'ret' and 'b' at 'ret == b'
+; need SLL/SLR. For 'ret', 'ret = a' may receive the value
+; from argument with high 32-bit invalid data.
+;
+; extern int helper(int);
+; int test(int a, int b, int c, int d) {
+;   int ret;
+;   if (a < b)
+;     ret = (c < d) ? a : 0;
+;   else
+;     ret = (c < a) ? 1 : 2;
+;   return helper(ret == b);
+; }
+
+define dso_local i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr {
+entry:
+  %cmp = icmp slt i32 %a, %b
+  %cmp1 = icmp slt i32 %c, %d
+  %cond = select i1 %cmp1, i32 %a, i32 0
+  %cmp2 = icmp slt i32 %c, %a
+  %cond3 = select i1 %cmp2, i32 1, i32 2
+  %ret.0 = select i1 %cmp, i32 %cond, i32 %cond3
+  %cmp4 = icmp eq i32 %ret.0, %b
+  %conv = zext i1 %cmp4 to i32
+  %call = tail call i32 @helper(i32 %conv)
+  ret i32 %call
+}
+; CHECK: r{{[0-9]+}} >>= 32
+; CHECK: r{{[0-9]+}} >>= 32
+; CHECK: if r{{[0-9]+}} == r{{[0-9]+}} goto
+
+declare dso_local i32 @helper(i32) local_unnamed_addr
diff --git a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll

index cf68ec5..63a7c25 100644 (file)
--- a/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll
+++ b/llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll
@@ -47,8 +47,8 @@ define dso_local i64 @select_u(i32 %a, i32 %b, i64 %c, i64 %d) local_unnamed_add
  entry:
    %cmp = icmp ugt i32 %a, %b
    %c.d = select i1 %cmp, i64 %c, i64 %d
-; CHECK-NOT: r{{[0-9]+}} <<= 32
-; CHECK-NOT: r{{[0-9]+}} >>= 32
+; CHECK: r{{[0-9]+}} <<= 32
+; CHECK-NEXT: r{{[0-9]+}} >>= 32
  ; CHECK: if r{{[0-9]+}} {{<|>}} r{{[0-9]+}} goto
    ret i64 %c.d
  }
@@ -58,8 +58,8 @@ define dso_local i64 @select_u_2(i32 %a, i64 %b, i64 %c, i64 %d) local_unnamed_a
  ; CHECK-LABEL: select_u_2:
  entry:
    %conv = zext i32 %a to i64
-; CHECK-NOT: r{{[0-9]+}} <<= 32
-; CHECK-NOT: r{{[0-9]+}} >>= 32
+; CHECK: r{{[0-9]+}} <<= 32
+; CHECK-NEXT: r{{[0-9]+}} >>= 32
    %cmp = icmp ugt i64 %conv, %b
    %c.d = select i1 %cmp, i64 %c, i64 %d
    ret i64 %c.d
@@ -100,8 +100,23 @@ define dso_local i32* @inc_p(i32* readnone %p, i32 %a) local_unnamed_addr #0 {
  ; CHECK-LABEL: inc_p:
  entry:
    %idx.ext = zext i32 %a to i64
-; CHECK-NOT: r{{[0-9]+}} <<= 32
-; CHECK-NOT: r{{[0-9]+}} >>= 32
+; CHECK: r{{[0-9]+}} <<= 32
+; CHECK-NEXT: r{{[0-9]+}} >>= 32
    %add.ptr = getelementptr inbounds i32, i32* %p, i64 %idx.ext
    ret i32* %add.ptr
  }
+
+define dso_local i32 @test() local_unnamed_addr {
+; CHECK-LABEL: test:
+entry:
+  %call = tail call i32 bitcast (i32 (...)* @helper to i32 ()*)()
+  %cmp = icmp sgt i32 %call, 6
+; The shifts can't be optimized out because %call comes from function call
+; return i32 so the high bits might be invalid.
+; CHECK: r{{[0-9]+}} <<= 32
+; CHECK-NEXT: r{{[0-9]+}} s>>= 32
+  %cond = zext i1 %cmp to i32
+; CHECK: if r{{[0-9]+}} s{{<|>}} {{[0-9]+}} goto
+  ret i32 %cond
+}
+declare dso_local i32 @helper(...) local_unnamed_addr
author	Yonghong Song <yhs@fb.com>
	Wed, 20 Nov 2019 17:52:29 +0000 (09:52 -0800)
committer	Yonghong Song <yhs@fb.com>
	Wed, 20 Nov 2019 23:19:59 +0000 (15:19 -0800)
llvm/lib/Target/BPF/BPFMIPeephole.cpp		patch \| blob \| history
llvm/test/CodeGen/BPF/32-bit-subreg-cond-select.ll		patch \| blob \| history
llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-1.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/BPF/32-bit-subreg-peephole-phi-2.ll	[new file with mode: 0644]	patch \| blob
llvm/test/CodeGen/BPF/32-bit-subreg-peephole.ll		patch \| blob \| history