From: Vasileios Kalintiris Date: Wed, 13 Apr 2016 15:07:45 +0000 (+0000) Subject: [mips] Sign-extend i32 values truncated from previously zero-extended i32 values. X-Git-Tag: llvmorg-3.9.0-rc1~9219 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3751d4114c8b5c8a0e78fb3d746123afc3758030;p=platform%2Fupstream%2Fllvm.git [mips] Sign-extend i32 values truncated from previously zero-extended i32 values. Summary: This is a special case for MIPS64 because the architecture requires properly 32-bit sign-extended values in the register containers. Additionaly, we merge consecutive trunc + AssertZExt nodes in order to avoid unnecessary sign-extensions when the extension comes from a type smaller than i32. Reviewers: dsanders Subscribers: dsanders, sdardis, llvm-commits Differential Revision: http://reviews.llvm.org/D18893 llvm-svn: 266203 --- diff --git a/llvm/lib/Target/Mips/Mips64InstrInfo.td b/llvm/lib/Target/Mips/Mips64InstrInfo.td index 12ff9f3..93a10c6 100644 --- a/llvm/lib/Target/Mips/Mips64InstrInfo.td +++ b/llvm/lib/Target/Mips/Mips64InstrInfo.td @@ -63,6 +63,10 @@ def PowerOf2HI : PatLeaf<(imm), [{ return false; }]>; +def assertzext_lt_i32 : PatFrag<(ops node:$src), (assertzext node:$src), [{ + return cast(N->getOperand(1))->getVT().bitsLT(MVT::i32); +}]>; + //===----------------------------------------------------------------------===// // Instructions specific format //===----------------------------------------------------------------------===// @@ -511,7 +515,17 @@ defm : SetgeImmPats; // truncate def : MipsPat<(trunc (assertsext GPR64:$src)), (EXTRACT_SUBREG GPR64:$src, sub_32)>; -def : MipsPat<(trunc (assertzext GPR64:$src)), +// The forward compatibility strategy employed by MIPS requires us to treat +// values as being sign extended to an infinite number of bits. This allows +// existing software to run without modification on any future MIPS +// implementation (e.g. 128-bit, or 1024-bit). Being compatible with this +// strategy requires that truncation acts as a sign-extension for values being +// fed into instructions operating on 32-bit values. Such instructions have +// undefined results if this is not true. +// For our case, this means that we can't issue an extract_subreg for nodes +// such as (trunc:i32 (assertzext:i64 X, i32)), because the sign-bit of the +// lower subreg would not be replicated into the upper half. +def : MipsPat<(trunc (assertzext_lt_i32 GPR64:$src)), (EXTRACT_SUBREG GPR64:$src, sub_32)>; def : MipsPat<(i32 (trunc GPR64:$src)), (SLL (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>; diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index be5f63f..37bb429 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -428,6 +428,7 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM, setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::AssertZext); setMinFunctionAlignment(Subtarget.isGP64bit() ? 3 : 2); @@ -807,6 +808,37 @@ static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::ADD, DL, ValTy, Add1, Lo); } +static SDValue performAssertZextCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget &Subtarget) { + SDValue N0 = N->getOperand(0); + EVT NarrowerVT = cast(N->getOperand(1))->getVT(); + + if (N0.getOpcode() != ISD::TRUNCATE) + return SDValue(); + + if (N0.getOperand(0).getOpcode() != ISD::AssertZext) + return SDValue(); + + // fold (AssertZext (trunc (AssertZext x))) -> (trunc (AssertZext x)) + // if the type of the extension of the innermost AssertZext node is + // smaller from that of the outermost node, eg: + // (AssertZext:i32 (trunc:i32 (AssertZext:i64 X, i32)), i8) + // -> (trunc:i32 (AssertZext X, i8)) + SDValue WiderAssertZext = N0.getOperand(0); + EVT WiderVT = cast(WiderAssertZext->getOperand(1))->getVT(); + + if (NarrowerVT.bitsLT(WiderVT)) { + SDValue NewAssertZext = DAG.getNode( + ISD::AssertZext, SDLoc(N), WiderAssertZext.getValueType(), + WiderAssertZext.getOperand(0), DAG.getValueType(NarrowerVT)); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), + NewAssertZext); + } + + return SDValue(); +} + SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -828,6 +860,8 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return performORCombine(N, DAG, DCI, Subtarget); case ISD::ADD: return performADDCombine(N, DAG, DCI, Subtarget); + case ISD::AssertZext: + return performAssertZextCombine(N, DAG, DCI, Subtarget); } return SDValue(); diff --git a/llvm/test/CodeGen/Mips/assertzext-trunc.ll b/llvm/test/CodeGen/Mips/assertzext-trunc.ll new file mode 100644 index 0000000..5921f7c --- /dev/null +++ b/llvm/test/CodeGen/Mips/assertzext-trunc.ll @@ -0,0 +1,62 @@ +; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=PRE-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=PRE-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=PRE-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=PRE-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r3 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=PRE-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r5 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=PRE-R6 +; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \ +; RUN: -check-prefix=ALL -check-prefix=R6 + +; Check that we don't emit redundant SLLs for sequences of +; (AssertZext:i32 (trunc:i32 (AssertZext:i64 X, i32)), i8) +define zeroext i8 @udiv_i8(i8 zeroext %a, i8 zeroext %b) { +entry: +; ALL-LABEL: udiv_i8: + + ; PRE-R6-NOT: sll {{.*}} + ; PRE-R6: divu $zero, $4, $5 + ; PRE-R6: teq $5, $zero, 7 + ; PRE-R6: mflo $2 + + ; R6-NOT: sll {{.*}} + ; R6: divu $2, $4, $5 + ; R6: teq $5, $zero, 7 + + %r = udiv i8 %a, %b + ret i8 %r +} + +; Check that we do sign-extend when we have a (trunc:i32 (AssertZext:i64 X, i32)) +define i64 @foo1(i64 zeroext %var) { +entry: +; ALL-LABEL: foo1: + + %shr = lshr i64 %var, 32 + %cmp = icmp eq i64 %shr, 0 + br i1 %cmp, label %if.end6, label %if.then + + ; ALL: dsrl $[[T0:[0-9]+]], $4, 32 + ; ALL: sll $[[T1:[0-9]+]], $[[T0]], 0 + if.then: ; preds = %entry + %conv = trunc i64 %shr to i32 + %cmp2 = icmp slt i32 %conv, 0 + br i1 %cmp2, label %if.then4, label %if.else + + if.then4: ; preds = %if.then + %add = add i64 %var, 16 + br label %if.end6 + + if.else: ; preds = %if.then + %add5 = add i64 %var, 32 + br label %if.end6 + + if.end6: ; preds = %entry, %if.then4, %if.else + %var.addr.0 = phi i64 [ %add, %if.then4 ], [ %add5, %if.else ], [ %var, %entry ] + ret i64 %var.addr.0 +} diff --git a/llvm/test/CodeGen/Mips/divrem.ll b/llvm/test/CodeGen/Mips/divrem.ll index d5daed1..200c2ad 100644 --- a/llvm/test/CodeGen/Mips/divrem.ll +++ b/llvm/test/CodeGen/Mips/divrem.ll @@ -81,7 +81,7 @@ entry: ret i32 %rem } -define i32 @udiv1(i32 zeroext %a0, i32 zeroext %a1) nounwind readnone { +define i32 @udiv1(i32 signext %a0, i32 signext %a1) nounwind readnone { entry: ; ALL-LABEL: udiv1: @@ -107,7 +107,7 @@ entry: ret i32 %div } -define i32 @urem1(i32 zeroext %a0, i32 zeroext %a1) nounwind readnone { +define i32 @urem1(i32 signext %a0, i32 signext %a1) nounwind readnone { entry: ; ALL-LABEL: urem1: @@ -175,7 +175,7 @@ entry: ret i32 %div } -define i32 @udivrem1(i32 zeroext %a0, i32 zeroext %a1, i32* nocapture %r) nounwind { +define i32 @udivrem1(i32 signext %a0, i32 signext %a1, i32* nocapture %r) nounwind { entry: ; ALL-LABEL: udivrem1: diff --git a/llvm/test/CodeGen/Mips/octeon_popcnt.ll b/llvm/test/CodeGen/Mips/octeon_popcnt.ll index 3432b39..13488ed 100644 --- a/llvm/test/CodeGen/Mips/octeon_popcnt.ll +++ b/llvm/test/CodeGen/Mips/octeon_popcnt.ll @@ -21,7 +21,7 @@ define i16 @cnt16(i16 %x) nounwind readnone { ; MIPS64-NOT: pop } -define i32 @cnt32(i32 zeroext %x) nounwind readnone { +define i32 @cnt32(i32 signext %x) nounwind readnone { %cnt = tail call i32 @llvm.ctpop.i32(i32 %x) ret i32 %cnt ; OCTEON-LABEL: cnt32: