From: Vasileios Kalintiris <Vasileios.Kalintiris@imgtec.com>
Date: Wed, 13 Apr 2016 15:07:45 +0000 (+0000)
Subject: [mips] Sign-extend i32 values truncated from previously zero-extended i32 values.
X-Git-Tag: llvmorg-3.9.0-rc1~9219
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3751d4114c8b5c8a0e78fb3d746123afc3758030;p=platform%2Fupstream%2Fllvm.git

[mips] Sign-extend i32 values truncated from previously zero-extended i32 values.

Summary:
This is a special case for MIPS64 because the architecture requires
properly 32-bit sign-extended values in the register containers.

Additionaly, we merge consecutive trunc + AssertZExt nodes in order
to avoid unnecessary sign-extensions when the extension comes from a
type smaller than i32.

Reviewers: dsanders

Subscribers: dsanders, sdardis, llvm-commits

Differential Revision: http://reviews.llvm.org/D18893

llvm-svn: 266203
---

diff --git a/llvm/lib/Target/Mips/Mips64InstrInfo.td b/llvm/lib/Target/Mips/Mips64InstrInfo.td
index 12ff9f3..93a10c6 100644
--- a/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -63,6 +63,10 @@ def PowerOf2HI : PatLeaf<(imm), [{
     return false;
 }]>;
 
+def assertzext_lt_i32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
+  return cast<VTSDNode>(N->getOperand(1))->getVT().bitsLT(MVT::i32);
+}]>;
+
 //===----------------------------------------------------------------------===//
 // Instructions specific format
 //===----------------------------------------------------------------------===//
@@ -511,7 +515,17 @@ defm : SetgeImmPats<GPR64, SLTi64, SLTiu64>;
 // truncate
 def : MipsPat<(trunc (assertsext GPR64:$src)),
               (EXTRACT_SUBREG GPR64:$src, sub_32)>;
-def : MipsPat<(trunc (assertzext GPR64:$src)),
+// The forward compatibility strategy employed by MIPS requires us to treat
+// values as being sign extended to an infinite number of bits. This allows
+// existing software to run without modification on any future MIPS
+// implementation (e.g. 128-bit, or 1024-bit). Being compatible with this
+// strategy requires that truncation acts as a sign-extension for values being
+// fed into instructions operating on 32-bit values. Such instructions have
+// undefined results if this is not true.
+// For our case, this means that we can't issue an extract_subreg for nodes
+// such as (trunc:i32 (assertzext:i64 X, i32)), because the sign-bit of the
+// lower subreg would not be replicated into the upper half.
+def : MipsPat<(trunc (assertzext_lt_i32 GPR64:$src)),
               (EXTRACT_SUBREG GPR64:$src, sub_32)>;
 def : MipsPat<(i32 (trunc GPR64:$src)),
               (SLL (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>;
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index be5f63f..37bb429 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -428,6 +428,7 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
   setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::OR);
   setTargetDAGCombine(ISD::ADD);
+  setTargetDAGCombine(ISD::AssertZext);
 
   setMinFunctionAlignment(Subtarget.isGP64bit() ? 3 : 2);
 
@@ -807,6 +808,37 @@ static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(ISD::ADD, DL, ValTy, Add1, Lo);
 }
 
+static SDValue performAssertZextCombine(SDNode *N, SelectionDAG &DAG,
+                                        TargetLowering::DAGCombinerInfo &DCI,
+                                        const MipsSubtarget &Subtarget) {
+  SDValue N0 = N->getOperand(0);
+  EVT NarrowerVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+
+  if (N0.getOpcode() != ISD::TRUNCATE)
+    return SDValue();
+
+  if (N0.getOperand(0).getOpcode() != ISD::AssertZext)
+    return SDValue();
+
+  // fold (AssertZext (trunc (AssertZext x))) -> (trunc (AssertZext x))
+  // if the type of the extension of the innermost AssertZext node is
+  // smaller from that of the outermost node, eg:
+  // (AssertZext:i32 (trunc:i32 (AssertZext:i64 X, i32)), i8)
+  //   -> (trunc:i32 (AssertZext X, i8))
+  SDValue WiderAssertZext = N0.getOperand(0);
+  EVT WiderVT = cast<VTSDNode>(WiderAssertZext->getOperand(1))->getVT();
+
+  if (NarrowerVT.bitsLT(WiderVT)) {
+    SDValue NewAssertZext = DAG.getNode(
+        ISD::AssertZext, SDLoc(N), WiderAssertZext.getValueType(),
+        WiderAssertZext.getOperand(0), DAG.getValueType(NarrowerVT));
+    return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0),
+                       NewAssertZext);
+  }
+
+  return SDValue();
+}
+
 SDValue  MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
   const {
   SelectionDAG &DAG = DCI.DAG;
@@ -828,6 +860,8 @@ SDValue  MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
     return performORCombine(N, DAG, DCI, Subtarget);
   case ISD::ADD:
     return performADDCombine(N, DAG, DCI, Subtarget);
+  case ISD::AssertZext:
+    return performAssertZextCombine(N, DAG, DCI, Subtarget);
   }
 
   return SDValue();
diff --git a/llvm/test/CodeGen/Mips/assertzext-trunc.ll b/llvm/test/CodeGen/Mips/assertzext-trunc.ll
new file mode 100644
index 0000000..5921f7c
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/assertzext-trunc.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
+; RUN:   -check-prefix=ALL -check-prefix=PRE-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
+; RUN:   -check-prefix=ALL -check-prefix=PRE-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
+; RUN:   -check-prefix=ALL -check-prefix=PRE-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
+; RUN:   -check-prefix=ALL -check-prefix=PRE-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \
+; RUN:   -check-prefix=ALL -check-prefix=PRE-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \
+; RUN:   -check-prefix=ALL -check-prefix=PRE-R6
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
+; RUN:   -check-prefix=ALL -check-prefix=R6
+
+; Check that we don't emit redundant SLLs for sequences of
+; (AssertZext:i32 (trunc:i32 (AssertZext:i64 X, i32)), i8)
+define zeroext i8 @udiv_i8(i8 zeroext %a, i8 zeroext %b) {
+entry:
+; ALL-LABEL: udiv_i8:
+
+  ; PRE-R6-NOT:   sll     {{.*}}
+  ; PRE-R6:       divu    $zero, $4, $5
+  ; PRE-R6:       teq     $5, $zero, 7
+  ; PRE-R6:       mflo    $2
+
+  ; R6-NOT:       sll     {{.*}}
+  ; R6:           divu    $2, $4, $5
+  ; R6:           teq     $5, $zero, 7
+
+  %r = udiv i8 %a, %b
+  ret i8 %r
+}
+
+; Check that we do sign-extend when we have a (trunc:i32 (AssertZext:i64 X, i32))
+define i64 @foo1(i64 zeroext %var) {
+entry:
+; ALL-LABEL: foo1:
+
+  %shr = lshr i64 %var, 32
+  %cmp = icmp eq i64 %shr, 0
+  br i1 %cmp, label %if.end6, label %if.then
+
+  ; ALL:    dsrl   $[[T0:[0-9]+]], $4, 32
+  ; ALL:    sll    $[[T1:[0-9]+]], $[[T0]], 0
+  if.then:                                          ; preds = %entry
+  %conv = trunc i64 %shr to i32
+  %cmp2 = icmp slt i32 %conv, 0
+  br i1 %cmp2, label %if.then4, label %if.else
+
+  if.then4:                                         ; preds = %if.then
+  %add = add i64 %var, 16
+  br label %if.end6
+
+  if.else:                                          ; preds = %if.then
+  %add5 = add i64 %var, 32
+  br label %if.end6
+
+  if.end6:                                          ; preds = %entry, %if.then4, %if.else
+  %var.addr.0 = phi i64 [ %add, %if.then4 ], [ %add5, %if.else ], [ %var, %entry ]
+  ret i64 %var.addr.0
+}
diff --git a/llvm/test/CodeGen/Mips/divrem.ll b/llvm/test/CodeGen/Mips/divrem.ll
index d5daed1..200c2ad 100644
--- a/llvm/test/CodeGen/Mips/divrem.ll
+++ b/llvm/test/CodeGen/Mips/divrem.ll
@@ -81,7 +81,7 @@ entry:
   ret i32 %rem
 }
 
-define i32 @udiv1(i32 zeroext %a0, i32 zeroext %a1) nounwind readnone {
+define i32 @udiv1(i32 signext %a0, i32 signext %a1) nounwind readnone {
 entry:
 ; ALL-LABEL: udiv1:
 
@@ -107,7 +107,7 @@ entry:
   ret i32 %div
 }
 
-define i32 @urem1(i32 zeroext %a0, i32 zeroext %a1) nounwind readnone {
+define i32 @urem1(i32 signext %a0, i32 signext %a1) nounwind readnone {
 entry:
 ; ALL-LABEL: urem1:
 
@@ -175,7 +175,7 @@ entry:
   ret i32 %div
 }
 
-define i32 @udivrem1(i32 zeroext %a0, i32 zeroext %a1, i32* nocapture %r) nounwind {
+define i32 @udivrem1(i32 signext %a0, i32 signext %a1, i32* nocapture %r) nounwind {
 entry:
 ; ALL-LABEL: udivrem1:
 
diff --git a/llvm/test/CodeGen/Mips/octeon_popcnt.ll b/llvm/test/CodeGen/Mips/octeon_popcnt.ll
index 3432b39..13488ed 100644
--- a/llvm/test/CodeGen/Mips/octeon_popcnt.ll
+++ b/llvm/test/CodeGen/Mips/octeon_popcnt.ll
@@ -21,7 +21,7 @@ define i16 @cnt16(i16 %x) nounwind readnone {
 ; MIPS64-NOT: pop
 }
 
-define i32 @cnt32(i32 zeroext %x) nounwind readnone {
+define i32 @cnt32(i32 signext %x) nounwind readnone {
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %cnt
 ; OCTEON-LABEL: cnt32: