From ff40fb07ad6309131c2448ca00572a078c7a2d59 Mon Sep 17 00:00:00 2001 From: Esme-Yi Date: Thu, 14 Jan 2021 02:15:19 +0000 Subject: [PATCH] [PowerPC] Try to fold sqrt/sdiv test results with the branch. Summary: The patch tries to fold sqrt/sdiv test node, i.g FTSQRT, XVTDIVDP, and the branch, i.e br_cc if they meet these patterns: (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp) (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp) (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp) (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp) (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp) (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp) (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp) (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp) Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D94054 --- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 78 +++++++++++ llvm/test/CodeGen/PowerPC/fold_swtest_br.ll | 204 ++++++++++++++++++++++++++++ 2 files changed, 282 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/fold_swtest_br.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 07b81a4..693b0ad 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -352,6 +352,7 @@ namespace { private: bool trySETCC(SDNode *N); + bool tryFoldSWTestBRCC(SDNode *N); bool tryAsSingleRLDICL(SDNode *N); bool tryAsSingleRLDICR(SDNode *N); bool tryAsSingleRLWINM(SDNode *N); @@ -4378,6 +4379,81 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, return true; } +// Return true if it's a software square-root/divide operand. +static bool isSWTestOp(SDValue N) { + if (N.getOpcode() == PPCISD::FTSQRT) + return true; + if (N.getNumOperands() < 1 || !isa(N.getOperand(0))) + return false; + switch (N.getConstantOperandVal(0)) { + case Intrinsic::ppc_vsx_xvtdivdp: + case Intrinsic::ppc_vsx_xvtdivsp: + case Intrinsic::ppc_vsx_xvtsqrtdp: + case Intrinsic::ppc_vsx_xvtsqrtsp: + return true; + } + return false; +} + +bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) { + assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected."); + // We are looking for following patterns, where `truncate to i1` actually has + // the same semantic with `and 1`. + // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp) + // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp) + // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp) + // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp) + // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp) + // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp) + // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp) + // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp) + ISD::CondCode CC = cast(N->getOperand(1))->get(); + if (CC != ISD::SETEQ && CC != ISD::SETNE) + return false; + + SDValue CmpRHS = N->getOperand(3); + if (!isa(CmpRHS) || + cast(CmpRHS)->getSExtValue() != 0) + return false; + + SDValue CmpLHS = N->getOperand(2); + if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0))) + return false; + + unsigned PCC = 0; + bool IsCCNE = CC == ISD::SETNE; + if (CmpLHS.getOpcode() == ISD::AND && + isa(CmpLHS.getOperand(1))) + switch (CmpLHS.getConstantOperandVal(1)) { + case 1: + PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU; + break; + case 2: + PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE; + break; + case 4: + PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE; + break; + case 8: + PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE; + break; + default: + return false; + } + else if (CmpLHS.getOpcode() == ISD::TRUNCATE && + CmpLHS.getValueType() == MVT::i1) + PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU; + + if (PCC) { + SDLoc dl(N); + SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4), + N->getOperand(0)}; + CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); + return true; + } + return false; +} + bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) { assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); unsigned Imm; @@ -5247,6 +5323,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) { return; } case ISD::BR_CC: { + if (tryFoldSWTestBRCC(N)) + return; ISD::CondCode CC = cast(N->getOperand(1))->get(); unsigned PCC = getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget); diff --git a/llvm/test/CodeGen/PowerPC/fold_swtest_br.ll b/llvm/test/CodeGen/PowerPC/fold_swtest_br.ll new file mode 100644 index 0000000..0d5581f --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fold_swtest_br.ll @@ -0,0 +1,204 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s + +@val = external local_unnamed_addr global i32, align 4 +declare i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double>) + +define dso_local signext i32 @xvtsqrtdp_and_1_eq(<2 x double> %input) { +; CHECK-LABEL: xvtsqrtdp_and_1_eq: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtsqrtdp cr0, v2 +; CHECK-NEXT: bnu cr0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: li r4, 100 +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: stw r4, 0(r3) +; CHECK-NEXT: .LBB0_2: # %if.end +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input) + %1 = and i32 %0, 1 + %cmp.not = icmp eq i32 %1, 0 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: ; preds = %entry + store i32 100, i32* @val, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i32 1 +} + +define dso_local signext i32 @xvtsqrtdp_and_2_eq(<2 x double> %input) { +; CHECK-LABEL: xvtsqrtdp_and_2_eq: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtsqrtdp cr0, v2 +; CHECK-NEXT: bne cr0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: li r4, 100 +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: stw r4, 0(r3) +; CHECK-NEXT: .LBB1_2: # %if.end +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input) + %1 = and i32 %0, 2 + %cmp.not = icmp eq i32 %1, 0 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: ; preds = %entry + store i32 100, i32* @val, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i32 1 +} + +define dso_local signext i32 @xvtsqrtdp_and_4_eq(<2 x double> %input) { +; CHECK-LABEL: xvtsqrtdp_and_4_eq: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtsqrtdp cr0, v2 +; CHECK-NEXT: ble cr0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: li r4, 100 +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: stw r4, 0(r3) +; CHECK-NEXT: .LBB2_2: # %if.end +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input) + %1 = and i32 %0, 4 + %cmp.not = icmp eq i32 %1, 0 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: ; preds = %entry + store i32 100, i32* @val, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i32 1 +} + +define dso_local signext i32 @xvtsqrtdp_and_8_eq(<2 x double> %input) { +; CHECK-LABEL: xvtsqrtdp_and_8_eq: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtsqrtdp cr0, v2 +; CHECK-NEXT: bge cr0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: li r4, 100 +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: stw r4, 0(r3) +; CHECK-NEXT: .LBB3_2: # %if.end +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input) + %1 = and i32 %0, 8 + %cmp.not = icmp eq i32 %1, 0 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: ; preds = %entry + store i32 100, i32* @val, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i32 1 +} + +define dso_local signext i32 @xvtsqrtdp_and_1_ne(<2 x double> %input) { +; CHECK-LABEL: xvtsqrtdp_and_1_ne: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtsqrtdp cr0, v2 +; CHECK-NEXT: bun cr0, .LBB4_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: li r4, 100 +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: stw r4, 0(r3) +; CHECK-NEXT: .LBB4_2: # %if.end +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input) + %1 = and i32 %0, 1 + %cmp.not = icmp ne i32 %1, 0 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: ; preds = %entry + store i32 100, i32* @val, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i32 1 +} + +define dso_local signext i32 @xvtsqrtdp_and_2_ne(<2 x double> %input) { +; CHECK-LABEL: xvtsqrtdp_and_2_ne: +; CHECK: # %bb.0: # %if.end +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: blr +if.end: ; preds = %if.then, %entry + ret i32 1 +} + +define dso_local signext i32 @xvtsqrtdp_and_4_ne(<2 x double> %input) { +; CHECK-LABEL: xvtsqrtdp_and_4_ne: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtsqrtdp cr0, v2 +; CHECK-NEXT: bgt cr0, .LBB6_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: li r4, 100 +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: stw r4, 0(r3) +; CHECK-NEXT: .LBB6_2: # %if.end +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input) + %1 = and i32 %0, 4 + %cmp.not = icmp ne i32 %1, 0 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: ; preds = %entry + store i32 100, i32* @val, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i32 1 +} + +define dso_local signext i32 @xvtsqrtdp_and_8_ne(<2 x double> %input) { +; CHECK-LABEL: xvtsqrtdp_and_8_ne: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvtsqrtdp cr0, v2 +; CHECK-NEXT: blt cr0, .LBB7_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: li r4, 100 +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: stw r4, 0(r3) +; CHECK-NEXT: .LBB7_2: # %if.end +; CHECK-NEXT: li r3, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call i32 @llvm.ppc.vsx.xvtsqrtdp(<2 x double> %input) + %1 = and i32 %0, 8 + %cmp.not = icmp ne i32 %1, 0 + br i1 %cmp.not, label %if.end, label %if.then + +if.then: ; preds = %entry + store i32 100, i32* @val, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i32 1 +} -- 2.7.4