From 64e5d7d3ae3d138709bca57a972bce4803982b70 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Fri, 20 Oct 2017 19:33:12 +0000 Subject: [PATCH] [Hexagon] Reorganize and update instruction patterns llvm-svn: 316228 --- llvm/lib/Target/Hexagon/CMakeLists.txt | 1 + llvm/lib/Target/Hexagon/Hexagon.td | 1 - llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp | 26 +- llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 106 +- llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 9 + llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 6 +- .../lib/Target/Hexagon/HexagonIntrinsicsDerived.td | 40 - llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp | 6 +- llvm/lib/Target/Hexagon/HexagonOperands.td | 12 - llvm/lib/Target/Hexagon/HexagonPatterns.td | 4716 +++++++++----------- llvm/test/CodeGen/Hexagon/PR33749.ll | 50 + llvm/test/CodeGen/Hexagon/addrmode-indoff.ll | 94 +- llvm/test/CodeGen/Hexagon/block-addr.ll | 3 +- llvm/test/CodeGen/Hexagon/hwloop-loop1.ll | 2 +- .../Hexagon/ifcvt-diamond-bug-2016-08-26.ll | 19 +- llvm/test/CodeGen/Hexagon/sdata-array.ll | 4 +- llvm/test/CodeGen/Hexagon/store-imm-amode.ll | 97 + .../test/CodeGen/Hexagon/store-imm-stack-object.ll | 3 +- llvm/test/CodeGen/Hexagon/store-shift.ll | 2 +- llvm/test/CodeGen/Hexagon/tfr-to-combine.ll | 35 +- llvm/test/CodeGen/Hexagon/tls_pic.ll | 8 +- llvm/test/CodeGen/Hexagon/tls_static.ll | 4 +- llvm/test/CodeGen/Hexagon/vect/vect-load-1.ll | 9 +- llvm/test/CodeGen/Hexagon/vect/vect-mul-v4i8.ll | 2 +- llvm/test/CodeGen/Hexagon/vect/vect-mul-v8i8.ll | 4 +- 25 files changed, 2524 insertions(+), 2735 deletions(-) delete mode 100644 llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td create mode 100644 llvm/test/CodeGen/Hexagon/PR33749.ll create mode 100644 llvm/test/CodeGen/Hexagon/store-imm-amode.ll diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt index ef5f88c..ac6a5fc 100644 --- a/llvm/lib/Target/Hexagon/CMakeLists.txt +++ b/llvm/lib/Target/Hexagon/CMakeLists.txt @@ -68,3 +68,4 @@ add_subdirectory(AsmParser) add_subdirectory(TargetInfo) add_subdirectory(MCTargetDesc) add_subdirectory(Disassembler) + diff --git a/llvm/lib/Target/Hexagon/Hexagon.td b/llvm/lib/Target/Hexagon/Hexagon.td index 23221a9..3218f25 100644 --- a/llvm/lib/Target/Hexagon/Hexagon.td +++ b/llvm/lib/Target/Hexagon/Hexagon.td @@ -282,7 +282,6 @@ include "HexagonPseudo.td" include "HexagonPatterns.td" include "HexagonDepMappings.td" include "HexagonIntrinsics.td" -include "HexagonIntrinsicsDerived.td" include "HexagonMapAsm2IntrinV62.gen.td" def HexagonInstrInfo : InstrInfo; diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp index d01ff01..c199851 100644 --- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp +++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp @@ -511,8 +511,8 @@ HexagonHardwareLoops::getComparisonKind(unsigned CondOpc, int64_t IVBump) const { Comparison::Kind Cmp = (Comparison::Kind)0; switch (CondOpc) { - case Hexagon::C2_cmpeqi: case Hexagon::C2_cmpeq: + case Hexagon::C2_cmpeqi: case Hexagon::C2_cmpeqp: Cmp = Comparison::EQ; break; @@ -520,21 +520,35 @@ HexagonHardwareLoops::getComparisonKind(unsigned CondOpc, case Hexagon::C4_cmpneqi: Cmp = Comparison::NE; break; + case Hexagon::C2_cmplt: + Cmp = Comparison::LTs; + break; + case Hexagon::C2_cmpltu: + Cmp = Comparison::LTu; + break; case Hexagon::C4_cmplte: + case Hexagon::C4_cmpltei: Cmp = Comparison::LEs; break; case Hexagon::C4_cmplteu: + case Hexagon::C4_cmplteui: Cmp = Comparison::LEu; break; - case Hexagon::C2_cmpgtui: + case Hexagon::C2_cmpgt: + case Hexagon::C2_cmpgti: + case Hexagon::C2_cmpgtp: + Cmp = Comparison::GTs; + break; case Hexagon::C2_cmpgtu: + case Hexagon::C2_cmpgtui: case Hexagon::C2_cmpgtup: Cmp = Comparison::GTu; break; - case Hexagon::C2_cmpgti: - case Hexagon::C2_cmpgt: - case Hexagon::C2_cmpgtp: - Cmp = Comparison::GTs; + case Hexagon::C2_cmpgei: + Cmp = Comparison::GEs; + break; + case Hexagon::C2_cmpgeui: + Cmp = Comparison::GEs; break; default: return (Comparison::Kind)0; diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 2c40a1b..946f99c 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -43,6 +43,9 @@ cl::opt RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden, cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced")); +static cl::opt CheckSingleUse("hexagon-isel-su", cl::Hidden, + cl::init(true), cl::desc("Enable checking of SDNode's single-use status")); + //===----------------------------------------------------------------------===// // Instruction Selector Implementation //===----------------------------------------------------------------------===// @@ -82,10 +85,19 @@ public: // Complex Pattern Selectors. inline bool SelectAddrGA(SDValue &N, SDValue &R); inline bool SelectAddrGP(SDValue &N, SDValue &R); - bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP); + inline bool SelectAnyImm(SDValue &N, SDValue &R); + inline bool SelectAnyInt(SDValue &N, SDValue &R); + bool SelectAnyImmediate(SDValue &N, SDValue &R, uint32_t LogAlign); + bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP, + uint32_t LogAlign); bool SelectAddrFI(SDValue &N, SDValue &R); bool DetectUseSxtw(SDValue &N, SDValue &R); + inline bool SelectAnyImm0(SDValue &N, SDValue &R); + inline bool SelectAnyImm1(SDValue &N, SDValue &R); + inline bool SelectAnyImm2(SDValue &N, SDValue &R); + inline bool SelectAnyImm3(SDValue &N, SDValue &R); + StringRef getPassName() const override { return "Hexagon DAG->DAG Pattern Instruction Selection"; } @@ -126,6 +138,7 @@ private: bool isAlignedMemNode(const MemSDNode *N) const; bool isSmallStackStore(const StoreSDNode *N) const; bool isPositiveHalfWord(const SDNode *N) const; + bool hasOneUse(const SDNode *N) const; // DAG preprocessing functions. void ppSimplifyOrSelect0(std::vector &&Nodes); @@ -1250,15 +1263,88 @@ bool HexagonDAGToDAGISel::SelectAddrFI(SDValue &N, SDValue &R) { } inline bool HexagonDAGToDAGISel::SelectAddrGA(SDValue &N, SDValue &R) { - return SelectGlobalAddress(N, R, false); + return SelectGlobalAddress(N, R, false, 0); } inline bool HexagonDAGToDAGISel::SelectAddrGP(SDValue &N, SDValue &R) { - return SelectGlobalAddress(N, R, true); + return SelectGlobalAddress(N, R, true, 0); +} + +inline bool HexagonDAGToDAGISel::SelectAnyImm(SDValue &N, SDValue &R) { + return SelectAnyImmediate(N, R, 0); +} + +inline bool HexagonDAGToDAGISel::SelectAnyImm0(SDValue &N, SDValue &R) { + return SelectAnyImmediate(N, R, 0); +} +inline bool HexagonDAGToDAGISel::SelectAnyImm1(SDValue &N, SDValue &R) { + return SelectAnyImmediate(N, R, 1); +} +inline bool HexagonDAGToDAGISel::SelectAnyImm2(SDValue &N, SDValue &R) { + return SelectAnyImmediate(N, R, 2); +} +inline bool HexagonDAGToDAGISel::SelectAnyImm3(SDValue &N, SDValue &R) { + return SelectAnyImmediate(N, R, 3); +} + +inline bool HexagonDAGToDAGISel::SelectAnyInt(SDValue &N, SDValue &R) { + EVT T = N.getValueType(); + if (!T.isInteger() || T.getSizeInBits() != 32 || !isa(N)) + return false; + R = N; + return true; +} + +bool HexagonDAGToDAGISel::SelectAnyImmediate(SDValue &N, SDValue &R, + uint32_t LogAlign) { + auto IsAligned = [LogAlign] (uint64_t V) -> bool { + return alignTo(V, 1u << LogAlign) == V; + }; + + switch (N.getOpcode()) { + case ISD::Constant: { + if (N.getValueType() != MVT::i32) + return false; + int32_t V = cast(N)->getZExtValue(); + if (!IsAligned(V)) + return false; + R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType()); + return true; + } + case HexagonISD::JT: + case HexagonISD::CP: + // These are assumed to always be aligned at at least 8-byte boundary. + if (LogAlign > 3) + return false; + R = N.getOperand(0); + return true; + case ISD::ExternalSymbol: + // Symbols may be aligned at any boundary. + if (LogAlign > 0) + return false; + R = N; + return true; + case ISD::BlockAddress: + // Block address is always aligned at at least 4-byte boundary. + if (LogAlign > 2 || !IsAligned(cast(N)->getOffset())) + return false; + R = N; + return true; + } + + if (SelectGlobalAddress(N, R, false, LogAlign) || + SelectGlobalAddress(N, R, true, LogAlign)) + return true; + + return false; } bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R, - bool UseGP) { + bool UseGP, uint32_t LogAlign) { + auto IsAligned = [LogAlign] (uint64_t V) -> bool { + return alignTo(V, 1u << LogAlign) == V; + }; + switch (N.getOpcode()) { case ISD::ADD: { SDValue N0 = N.getOperand(0); @@ -1270,6 +1356,9 @@ bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R, return false; if (ConstantSDNode *Const = dyn_cast(N1)) { SDValue Addr = N0.getOperand(0); + // For the purpose of alignment, sextvalue and zextvalue are the same. + if (!IsAligned(Const->getZExtValue())) + return false; if (GlobalAddressSDNode *GA = dyn_cast(Addr)) { if (GA->getOpcode() == ISD::TargetGlobalAddress) { uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue(); @@ -1281,6 +1370,8 @@ bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R, } break; } + case HexagonISD::CP: + case HexagonISD::JT: case HexagonISD::CONST32: // The operand(0) of CONST32 is TargetGlobalAddress, which is what we // want in the instruction. @@ -1434,7 +1525,8 @@ bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits, bool HexagonDAGToDAGISel::isOrEquivalentToAdd(const SDNode *N) const { assert(N->getOpcode() == ISD::OR); auto *C = dyn_cast(N->getOperand(1)); - assert(C); + if (!C) + return false; // Detect when "or" is used to add an offset to a stack object. if (auto *FN = dyn_cast(N->getOperand(0))) { @@ -1480,6 +1572,10 @@ bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const { return false; } +bool HexagonDAGToDAGISel::hasOneUse(const SDNode *N) const { + return !CheckSingleUse || N->hasOneUse(); +} + //////////////////////////////////////////////////////////////////////////////// // Rebalancing of address calculation trees diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 957fc8ca..5a1b21a 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1967,6 +1967,15 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SRL, VT, Custom); } + // Extending loads from (native) vectors of i8 into (native) vectors of i16 + // are legal. + setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, MVT::v2i8, Legal); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, MVT::v2i8, Legal); + setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, MVT::v2i8, Legal); + setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Legal); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal); + setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal); + // Types natively supported: for (MVT NativeVT : {MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v32i1, MVT::v64i1, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v1i32, diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index b084e04..a5381c1 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1590,10 +1590,14 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg, case Hexagon::A4_cmpbgtui: case Hexagon::A4_cmpheqi: case Hexagon::A4_cmphgti: - case Hexagon::A4_cmphgtui: + case Hexagon::A4_cmphgtui: { SrcReg2 = 0; + const MachineOperand &Op2 = MI.getOperand(2); + if (!Op2.isImm()) + return false; Value = MI.getOperand(2).getImm(); return true; + } } return false; diff --git a/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td deleted file mode 100644 index 400c173..0000000 --- a/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td +++ /dev/null @@ -1,40 +0,0 @@ -//===-- HexagonIntrinsicsDerived.td - Derived intrinsics ---*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Multiply 64-bit and use lower result -// -// Optimized with intrinisics accumulates -// -def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2), - (i64 - (A2_combinew - (M2_maci - (M2_maci - (i32 - (EXTRACT_SUBREG - (i64 - (M2_dpmpyuu_s0 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), - isub_lo)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), - isub_lo)))), - isub_hi)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), isub_lo)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), isub_hi))), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), isub_lo)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), isub_hi))), - (i32 - (EXTRACT_SUBREG - (i64 - (M2_dpmpyuu_s0 - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), isub_lo)), - (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), - isub_lo)))), isub_lo))))>; - - - diff --git a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp index 2cc8db8..f197cc4 100644 --- a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -228,7 +228,11 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII, // If the second operand of the compare is an imm, make sure it's in the // range specified by the arch. if (!secondReg) { - int64_t v = MI.getOperand(2).getImm(); + const MachineOperand &Op2 = MI.getOperand(2); + if (!Op2.isImm()) + return false; + + int64_t v = Op2.getImm(); bool Valid = false; switch (MI.getOpcode()) { diff --git a/llvm/lib/Target/Hexagon/HexagonOperands.td b/llvm/lib/Target/Hexagon/HexagonOperands.td index f80e0ef..232946e 100644 --- a/llvm/lib/Target/Hexagon/HexagonOperands.td +++ b/llvm/lib/Target/Hexagon/HexagonOperands.td @@ -29,17 +29,5 @@ def u64_0Imm : Operand { let ParserMatchClass = u64_0ImmOperand; } def n1ConstOperand : AsmOperandClass { let Name = "n1Const"; } def n1Const : Operand { let ParserMatchClass = n1ConstOperand; } -// This complex pattern exists only to create a machine instruction operand -// of type "frame index". There doesn't seem to be a way to do that directly -// in the patterns. -def AddrFI : ComplexPattern; - -// These complex patterns are not strictly necessary, since global address -// folding will happen during DAG combining. For distinguishing between GA -// and GP, pat frags with HexagonCONST32 and HexagonCONST32_GP can be used. -def AddrGA : ComplexPattern; -def AddrGP : ComplexPattern; - - def bblabel : Operand; def bbl : SDNode<"ISD::BasicBlock", SDTPtrLeaf, [], "BasicBlockSDNode">; diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 72d7569..89be3bd5 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -7,16 +7,105 @@ // //===----------------------------------------------------------------------===// -// Pattern fragment that combines the value type and the register class -// into a single parameter. +// Table of contents: +// (0) Definitions +// (1) Immediates +// (2) Type casts +// (3) Extend/truncate +// (4) Logical +// (5) Compare +// (6) Select +// (7) Insert/extract +// (8) Shift/permute +// (9) Arithmetic/bitwise +// (10) Bit +// (11) Load +// (12) Store +// (13) Memop +// (14) PIC +// (15) Call +// (16) Branch +// (17) Misc + +// Guidelines (in no particular order): +// 1. Avoid relying on pattern ordering to give preference to one pattern +// over another, prefer using AddedComplexity instead. The reason for +// this is to avoid unintended conseqeuences (caused by altering the +// order) when making changes. The current order of patterns in this +// file obviously does play some role, but none of the ordering was +// deliberately chosen (other than to create a logical structure of +// this file). When making changes, adding AddedComplexity to existing +// patterns may be needed. +// 2. Maintain the logical structure of the file, try to put new patterns +// in designated sections. +// 3. Do not use A2_combinew instruction directly, use Combinew fragment +// instead. It uses REG_SEQUENCE, which is more amenable to optimizations. +// 4. Most selection macros are based on PatFrags. For DAGs that involve +// SDNodes, use pf1/pf2 to convert them to PatFrags. Use common frags +// whenever possible (see the Definitions section). When adding new +// macro, try to make is general to enable reuse across sections. +// 5. Compound instructions (e.g. Rx+Rs*Rt) are generated under the condition +// that the nested operation has only one use. Having it separated in case +// of multiple uses avoids duplication of (processor) work. +// 6. The v4 vector instructions (64-bit) are treated as core instructions, +// for example, A2_vaddh is in the "arithmetic" section with A2_add. +// 7. When adding a pattern for an instruction with a constant-extendable +// operand, allow all possible kinds of inputs for the immediate value +// (see AnyImm/anyimm and their variants in the Definitions section). + + +// --(0) Definitions ----------------------------------------------------- +// + +// This complex pattern exists only to create a machine instruction operand +// of type "frame index". There doesn't seem to be a way to do that directly +// in the patterns. +def AddrFI: ComplexPattern; + +// These complex patterns are not strictly necessary, since global address +// folding will happen during DAG combining. For distinguishing between GA +// and GP, pat frags with HexagonCONST32 and HexagonCONST32_GP can be used. +def AddrGA: ComplexPattern; +def AddrGP: ComplexPattern; +def AnyImm: ComplexPattern; +def AnyInt: ComplexPattern; + +// Global address or a constant being a multiple of 2^n. +def AnyImm0: ComplexPattern; +def AnyImm1: ComplexPattern; +def AnyImm2: ComplexPattern; +def AnyImm3: ComplexPattern; + + +// Type helper frags. +def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; +def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; +def V8I1: PatLeaf<(v8i1 PredRegs:$R)>; +def V4I8: PatLeaf<(v4i8 IntRegs:$R)>; +def V2I16: PatLeaf<(v2i16 IntRegs:$R)>; + +def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; +def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; +def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; + +def HVI8: PatLeaf<(VecI8 HvxVR:$R)>; +def HVI16: PatLeaf<(VecI16 HvxVR:$R)>; +def HVI32: PatLeaf<(VecI32 HvxVR:$R)>; +def HVI64: PatLeaf<(VecI64 HvxVR:$R)>; + +def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; +def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; +def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; +def HWI64: PatLeaf<(VecPI64 HvxWR:$R)>; // Pattern fragments to extract the low and high subregisters from a // 64-bit value. def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>; def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_hi)>; -def IsOrAdd: PatFrag<(ops node:$Addr, node:$off), - (or node:$Addr, node:$off), [{ return isOrEquivalentToAdd(N); }]>; +def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{ + return isOrEquivalentToAdd(N); +}]>; def IsVecOff : PatLeaf<(i32 imm), [{ int32_t V = N->getSExtValue(); @@ -28,37 +117,37 @@ def IsVecOff : PatLeaf<(i32 imm), [{ return isInt<4>(V >> L); }]>; -def IsPow2_32 : PatLeaf<(i32 imm), [{ +def IsPow2_32: PatLeaf<(i32 imm), [{ uint32_t V = N->getZExtValue(); return isPowerOf2_32(V); }]>; -def IsPow2_64 : PatLeaf<(i64 imm), [{ +def IsPow2_64: PatLeaf<(i64 imm), [{ uint64_t V = N->getZExtValue(); return isPowerOf2_64(V); }]>; -def IsNPow2_32 : PatLeaf<(i32 imm), [{ +def IsNPow2_32: PatLeaf<(i32 imm), [{ uint32_t NV = ~N->getZExtValue(); return isPowerOf2_32(NV); }]>; -def IsPow2_64L : PatLeaf<(i64 imm), [{ +def IsPow2_64L: PatLeaf<(i64 imm), [{ uint64_t V = N->getZExtValue(); return isPowerOf2_64(V) && Log2_64(V) < 32; }]>; -def IsPow2_64H : PatLeaf<(i64 imm), [{ +def IsPow2_64H: PatLeaf<(i64 imm), [{ uint64_t V = N->getZExtValue(); return isPowerOf2_64(V) && Log2_64(V) >= 32; }]>; -def IsNPow2_64L : PatLeaf<(i64 imm), [{ +def IsNPow2_64L: PatLeaf<(i64 imm), [{ uint64_t NV = ~N->getZExtValue(); return isPowerOf2_64(NV) && Log2_64(NV) < 32; }]>; -def IsNPow2_64H : PatLeaf<(i64 imm), [{ +def IsNPow2_64H: PatLeaf<(i64 imm), [{ uint64_t NV = ~N->getZExtValue(); return isPowerOf2_64(NV) && Log2_64(NV) >= 32; }]>; @@ -68,64 +157,483 @@ class IsUGT: PatLeaf<(i32 imm), "return isUInt<" # Width # ">(V) && V > " # Arg # ";" >; -def SDEC1 : SDNodeXFormgetSExtValue(); return CurDAG->getTargetConstant(V-1, SDLoc(N), MVT::i32); }]>; -def UDEC1 : SDNodeXFormgetZExtValue(); assert(V >= 1); return CurDAG->getTargetConstant(V-1, SDLoc(N), MVT::i32); }]>; -def UDEC32 : SDNodeXFormgetZExtValue(); assert(V >= 32); return CurDAG->getTargetConstant(V-32, SDLoc(N), MVT::i32); }]>; -def Log2_32 : SDNodeXFormgetZExtValue(); return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32); }]>; -def Log2_64 : SDNodeXFormgetZExtValue(); return CurDAG->getTargetConstant(Log2_64(V), SDLoc(N), MVT::i32); }]>; -def LogN2_32 : SDNodeXFormgetZExtValue(); return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32); }]>; -def LogN2_64 : SDNodeXFormgetZExtValue(); return CurDAG->getTargetConstant(Log2_64(NV), SDLoc(N), MVT::i32); }]>; -def ToZext64: OutPatFrag<(ops node:$Rs), - (i64 (A4_combineir 0, (i32 $Rs)))>; -def ToSext64: OutPatFrag<(ops node:$Rs), - (i64 (A2_sxtw (i32 $Rs)))>; +def NegImm8: SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); +}]>; + +def NegImm16: SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); +}]>; + +def NegImm32: SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); +}]>; + + +// Helpers for type promotions/contractions. +def I1toI32: OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>; +def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_tfrrp (i32 $Rs)))>; +def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>; +def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>; + +def Combinew: OutPatFrag<(ops node:$Rs, node:$Rt), + (REG_SEQUENCE DoubleRegs, $Rs, isub_hi, $Rt, isub_lo)>; + +def addrga: PatLeaf<(i32 AddrGA:$Addr)>; +def addrgp: PatLeaf<(i32 AddrGP:$Addr)>; +def anyimm: PatLeaf<(i32 AnyImm:$Imm)>; +def anyint: PatLeaf<(i32 AnyInt:$Imm)>; + +// Global address or an aligned constant. +def anyimm0: PatLeaf<(i32 AnyImm0:$Addr)>; +def anyimm1: PatLeaf<(i32 AnyImm1:$Addr)>; +def anyimm2: PatLeaf<(i32 AnyImm2:$Addr)>; +def anyimm3: PatLeaf<(i32 AnyImm3:$Addr)>; + +def f32ImmPred : PatLeaf<(f32 fpimm:$F)>; +def f64ImmPred : PatLeaf<(f64 fpimm:$F)>; + +// This complex pattern is really only to detect various forms of +// sign-extension i32->i64. The selected value will be of type i64 +// whose low word is the value being extended. The high word is +// unspecified. +def Usxtw: ComplexPattern; + +def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>; +def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>; +def Sext64: PatLeaf<(i64 Usxtw:$Rs)>; + +def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off), + (PS_fi (i32 AddrFI:$Rs), imm:$off)>; + + +def alignedload: PatFrag<(ops node:$a), (load $a), [{ + return isAlignedMemNode(dyn_cast(N)); +}]>; + +def unalignedload: PatFrag<(ops node:$a), (load $a), [{ + return !isAlignedMemNode(dyn_cast(N)); +}]>; + +def alignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ + return isAlignedMemNode(dyn_cast(N)); +}]>; + +def unalignedstore: PatFrag<(ops node:$v, node:$a), (store $v, $a), [{ + return !isAlignedMemNode(dyn_cast(N)); +}]>; + + +// Converters from unary/binary SDNode to PatFrag. +class pf1 : PatFrag<(ops node:$a), (Op node:$a)>; +class pf2 : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>; + +class Not2 + : PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>; + +class Su + : PatFrag; + +// Main selection macros. + +class OpR_R_pat + : Pat<(ResVT (Op RegPred:$Rs)), (MI RegPred:$Rs)>; + +class OpR_RI_pat + : Pat<(ResType (Op RegPred:$Rs, ImmPred:$I)), + (MI RegPred:$Rs, imm:$I)>; + +class OpR_RR_pat + : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), + (MI RsPred:$Rs, RtPred:$Rt)>; + +class AccRRI_pat + : Pat<(AccOp RegPred:$Rx, (Op RegPred:$Rs, ImmPred:$I)), + (MI RegPred:$Rx, RegPred:$Rs, imm:$I)>; + +class AccRRR_pat + : Pat<(AccOp RsPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)), + (MI RsPred:$Rx, RsPred:$Rs, RtPred:$Rt)>; + +multiclass SelMinMax_pats { + def: Pat<(select (i1 (CmpOp Val:$A, Val:$B)), Val:$A, Val:$B), + (InstA Val:$A, Val:$B)>; + def: Pat<(select (i1 (CmpOp Val:$A, Val:$B)), Val:$B, Val:$A), + (InstB Val:$A, Val:$B)>; +} + + +// Frags for commonly used SDNodes. +def Add: pf2; def And: pf2; def Sra: pf2; +def Sub: pf2; def Or: pf2; def Srl: pf2; +def Mul: pf2; def Xor: pf2; def Shl: pf2; + + +// --(1) Immediate ------------------------------------------------------- +// + +def SDTHexagonCONST32 + : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisPtrTy<0>]>; + +def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>; +def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>; +def HexagonCONST32: SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; +def HexagonCONST32_GP: SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; + +def TruncI64ToI32: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); +}]>; + +def: Pat<(s32_0ImmPred:$s16), (A2_tfrsi imm:$s16)>; +def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi (TruncI64ToI32 $s8))>; + +def: Pat<(HexagonCONST32 tglobaltlsaddr:$A), (A2_tfrsi imm:$A)>; +def: Pat<(HexagonCONST32 bbl:$A), (A2_tfrsi imm:$A)>; +def: Pat<(HexagonCONST32 tglobaladdr:$A), (A2_tfrsi imm:$A)>; +def: Pat<(HexagonCONST32_GP tblockaddress:$A), (A2_tfrsi imm:$A)>; +def: Pat<(HexagonCONST32_GP tglobaladdr:$A), (A2_tfrsi imm:$A)>; +def: Pat<(HexagonJT tjumptable:$A), (A2_tfrsi imm:$A)>; +def: Pat<(HexagonCP tconstpool:$A), (A2_tfrsi imm:$A)>; + +def: Pat<(i1 0), (PS_false)>; +def: Pat<(i1 1), (PS_true)>; +def: Pat<(i64 imm:$v), (CONST64 imm:$v)>; + +def ftoi : SDNodeXFormgetValueAPF().bitcastToAPInt(); + return CurDAG->getTargetConstant(I.getZExtValue(), SDLoc(N), + MVT::getIntegerVT(I.getBitWidth())); +}]>; + +def: Pat<(f32ImmPred:$f), (A2_tfrsi (ftoi $f))>; +def: Pat<(f64ImmPred:$f), (CONST64 (ftoi $f))>; + +def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>; + +// --(2) Type cast ------------------------------------------------------- +// + +let Predicates = [HasV5T] in { + def: OpR_R_pat, f64, F32>; + def: OpR_R_pat, f32, F64>; + + def: OpR_R_pat, f32, I32>; + def: OpR_R_pat, f32, I64>; + def: OpR_R_pat, f64, I32>; + def: OpR_R_pat, f64, I64>; + + def: OpR_R_pat, f32, I32>; + def: OpR_R_pat, f32, I64>; + def: OpR_R_pat, f64, I32>; + def: OpR_R_pat, f64, I64>; + + def: OpR_R_pat, i32, F32>; + def: OpR_R_pat, i32, F64>; + def: OpR_R_pat, i64, F32>; + def: OpR_R_pat, i64, F64>; + + def: OpR_R_pat, i32, F32>; + def: OpR_R_pat, i32, F64>; + def: OpR_R_pat, i64, F32>; + def: OpR_R_pat, i64, F64>; +} + +// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. +let Predicates = [HasV5T] in { + def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>; + def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>; + def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>; + def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>; +} + +multiclass Cast_pat { + def: Pat<(Tb (bitconvert (Ta RC:$Rs))), (Tb RC:$Rs)>; + def: Pat<(Ta (bitconvert (Tb RC:$Rs))), (Ta RC:$Rs)>; +} + +// Bit convert vector types to integers. +defm: Cast_pat; +defm: Cast_pat; +defm: Cast_pat; +defm: Cast_pat; +defm: Cast_pat; + + +// --(3) Extend/truncate ------------------------------------------------- +// + +def: Pat<(sext_inreg I32:$Rs, i8), (A2_sxtb I32:$Rs)>; +def: Pat<(sext_inreg I32:$Rs, i16), (A2_sxth I32:$Rs)>; +def: Pat<(sext_inreg I64:$Rs, i32), (A2_sxtw (LoReg $Rs))>; +def: Pat<(sext_inreg I64:$Rs, i16), (A2_sxtw (A2_sxth (LoReg $Rs)))>; +def: Pat<(sext_inreg I64:$Rs, i8), (A2_sxtw (A2_sxtb (LoReg $Rs)))>; + +def: Pat<(i64 (sext I1:$Pu)), + (Combinew (C2_muxii PredRegs:$Pu, -1, 0), + (C2_muxii PredRegs:$Pu, -1, 0))>; + +def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>; +def: Pat<(i32 (zext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; +def: Pat<(i64 (zext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; + +def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>; +def: Pat<(Zext64 I32:$Rs), (ToZext64 $Rs)>; +def: Pat<(Aext64 I32:$Rs), (ToZext64 $Rs)>; + +def: Pat<(i32 (trunc I64:$Rs)), (LoReg $Rs)>; +def: Pat<(i1 (trunc I64:$Rs)), (C2_tfrrp (LoReg $Rs))>; + +let AddedComplexity = 20 in { + def: Pat<(and I32:$Rs, 255), (A2_zxtb I32:$Rs)>; + def: Pat<(and I32:$Rs, 65535), (A2_zxth I32:$Rs)>; +} + +def: Pat<(i32 (anyext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; +def: Pat<(i64 (anyext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; + +def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; +def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; +def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; +def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; + +def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), + (Combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; + +def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)), + (Combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>; + +// Truncate: from vector B copy all 'E'ven 'B'yte elements: +// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6]; +def: Pat<(v4i8 (trunc V4I16:$Rs)), + (S2_vtrunehb V4I16:$Rs)>; + +// Truncate: from vector B copy all 'O'dd 'B'yte elements: +// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7]; +// S2_vtrunohb + +// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements: +// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2]; +// S2_vtruneh + +def: Pat<(v2i16 (trunc V2I32:$Rs)), + (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; + + +// --(4) Logical --------------------------------------------------------- +// + +def: Pat<(not I1:$Ps), (C2_not I1:$Ps)>; +def: Pat<(add I1:$Ps, -1), (C2_not I1:$Ps)>; + +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, I1>; +def: OpR_RR_pat, i1, I1>; + +// op(Ps, op(Pt, Pu)) +def: AccRRR_pat, I1, I1>; +def: AccRRR_pat, I1, I1>; +def: AccRRR_pat, I1, I1>; +def: AccRRR_pat, I1, I1>; + +// op(Ps, op(Pt, ~Pu)) +def: AccRRR_pat>, I1, I1>; +def: AccRRR_pat>, I1, I1>; +def: AccRRR_pat>, I1, I1>; +def: AccRRR_pat>, I1, I1>; + + +// --(5) Compare --------------------------------------------------------- +// + +// Avoid negated comparisons, i.e. those of form "Pd = !cmp(...)". +// These cannot form compounds (e.g. J4_cmpeqi_tp0_jump_nt). + +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; + +def: Pat<(i1 (setge I32:$Rs, s32_0ImmPred:$s10)), + (C2_cmpgti I32:$Rs, (SDEC1 imm:$s10))>; +def: Pat<(i1 (setuge I32:$Rs, u32_0ImmPred:$u9)), + (C2_cmpgtui I32:$Rs, (UDEC1 imm:$u9))>; + +def: Pat<(i1 (setlt I32:$Rs, s32_0ImmPred:$s10)), + (C2_not (C2_cmpgti I32:$Rs, (SDEC1 imm:$s10)))>; +def: Pat<(i1 (setult I32:$Rs, u32_0ImmPred:$u9)), + (C2_not (C2_cmpgtui I32:$Rs, (UDEC1 imm:$u9)))>; +// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones +// that reverse the order of the operands. +class RevCmp + : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment, F.PredicateCode, + F.OperandTransform>; -class T_CMP_pat - : Pat<(i1 (OpNode I32:$src1, ImmPred:$src2)), - (MI IntRegs:$src1, ImmPred:$src2)>; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, I32>; +def: OpR_RR_pat, i1, I32>; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, I64>; +def: OpR_RR_pat, i1, I64>; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, V8I8>; +def: OpR_RR_pat, v8i1, V8I8>; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, V8I8>; +def: OpR_RR_pat, v8i1, V8I8>; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, V4I16>; +def: OpR_RR_pat, v4i1, V4I16>; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, V4I16>; +def: OpR_RR_pat, v4i1, V4I16>; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, V2I32>; +def: OpR_RR_pat, v2i1, V2I32>; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i1, V2I32>; +def: OpR_RR_pat, v2i1, V2I32>; +def: OpR_RR_pat; +def: OpR_RR_pat; -def : T_CMP_pat ; -def : T_CMP_pat ; -def : T_CMP_pat ; +let Predicates = [HasV5T] in { + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat, i1, F32>; + def: OpR_RR_pat, i1, F32>; + def: OpR_RR_pat, i1, F32>; + def: OpR_RR_pat, i1, F32>; + def: OpR_RR_pat; + + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat; + def: OpR_RR_pat, i1, F64>; + def: OpR_RR_pat, i1, F64>; + def: OpR_RR_pat, i1, F64>; + def: OpR_RR_pat, i1, F64>; + def: OpR_RR_pat; +} + +// Avoid C4_cmpneqi, C4_cmpltei, C4_cmplteui, since they cannot form compounds. + +def: Pat<(i1 (setne I32:$Rs, anyimm:$u5)), + (C2_not (C2_cmpeqi I32:$Rs, imm:$u5))>; +def: Pat<(i1 (setle I32:$Rs, anyimm:$u5)), + (C2_not (C2_cmpgti I32:$Rs, imm:$u5))>; +def: Pat<(i1 (setule I32:$Rs, anyimm:$u5)), + (C2_not (C2_cmpgtui I32:$Rs, imm:$u5))>; + +def: Pat<(i1 (setne I32:$Rs, I32:$Rt)), + (C2_not (C2_cmpeq I32:$Rs, I32:$Rt))>; +def: Pat<(i1 (setle I32:$Rs, I32:$Rt)), + (C2_not (C2_cmpgt I32:$Rs, I32:$Rt))>; +def: Pat<(i1 (setule I32:$Rs, I32:$Rt)), + (C2_not (C2_cmpgtu I32:$Rs, I32:$Rt))>; +def: Pat<(i1 (setge I32:$Rs, I32:$Rt)), + (C2_not (C2_cmpgt I32:$Rt, I32:$Rs))>; +def: Pat<(i1 (setuge I32:$Rs, I32:$Rt)), + (C2_not (C2_cmpgtu I32:$Rt, I32:$Rs))>; + +def: Pat<(i1 (setle I64:$Rs, I64:$Rt)), + (C2_not (C2_cmpgtp I64:$Rs, I64:$Rt))>; +def: Pat<(i1 (setne I64:$Rs, I64:$Rt)), + (C2_not (C2_cmpeqp I64:$Rs, I64:$Rt))>; +def: Pat<(i1 (setge I64:$Rs, I64:$Rt)), + (C2_not (C2_cmpgtp I64:$Rt, I64:$Rs))>; +def: Pat<(i1 (setuge I64:$Rs, I64:$Rt)), + (C2_not (C2_cmpgtup I64:$Rt, I64:$Rs))>; +def: Pat<(i1 (setule I64:$Rs, I64:$Rt)), + (C2_not (C2_cmpgtup I64:$Rs, I64:$Rt))>; +let AddedComplexity = 100 in { + def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 255), 0)), + (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), 255), 0)), + (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>; + def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 65535), 0)), + (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), 65535), 0)), + (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>; +} +// PatFrag for AsserZext which takes the original type as a parameter. def SDTAssertZext: SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0,1>]>; def AssertZextSD: SDNode<"ISD::AssertZext", SDTAssertZext>; class AssertZext: PatFrag<(ops node:$A), (AssertZextSD $A, T)>; multiclass Cmpb_pat { + PatLeaf ImmPred, int Mask> { def: Pat<(i1 (Op (and I32:$Rs, Mask), ImmPred:$I)), (MI I32:$Rs, imm:$I)>; def: Pat<(i1 (Op (AssertExt I32:$Rs), ImmPred:$I)), @@ -159,484 +667,842 @@ let AddedComplexity = 200 in { defm: CmpbND_pat, IsUGT<32,32>, 65535>; } +def: Pat<(i32 (zext (i1 (seteq I32:$Rs, I32:$Rt)))), + (A4_rcmpeq I32:$Rs, I32:$Rt)>; +def: Pat<(i32 (zext (i1 (setne I32:$Rs, I32:$Rt)))), + (A4_rcmpneq I32:$Rs, I32:$Rt)>; +def: Pat<(i32 (zext (i1 (seteq I32:$Rs, anyimm:$s8)))), + (A4_rcmpeqi I32:$Rs, imm:$s8)>; +def: Pat<(i32 (zext (i1 (setne I32:$Rs, anyimm:$s8)))), + (A4_rcmpneqi I32:$Rs, imm:$s8)>; -def SDTHexagonI64I32I32 : SDTypeProfile<1, 2, - [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; - -def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>; -def HexagonPACKHL : SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>; - -// Pats for instruction selection. -class BinOp32_pat - : Pat<(ResT (Op I32:$Rs, I32:$Rt)), - (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>; - -def: BinOp32_pat; -def: BinOp32_pat; -def: BinOp32_pat; -def: BinOp32_pat; -def: BinOp32_pat; +def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), + (C2_xor I1:$Ps, I1:$Pt)>; -def: BinOp32_pat; -def: BinOp32_pat; +def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), + (A2_vcmpbeq (ToZext64 $Rs), (ToZext64 $Rt))>; +def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), + (A4_vcmpbgt (ToZext64 $Rs), (ToZext64 $Rt))>; +def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), + (A2_vcmpbgtu (ToZext64 $Rs), (ToZext64 $Rt))>; -// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones -// that reverse the order of the operands. -class RevCmp : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>; +def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), + (A2_vcmpheq (ToZext64 $Rs), (ToZext64 $Rt))>; +def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), + (A2_vcmphgt (ToZext64 $Rs), (ToZext64 $Rt))>; +def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), + (A2_vcmphgtu (ToZext64 $Rs), (ToZext64 $Rt))>; -// Pats for compares. They use PatFrags as operands, not SDNodes, -// since seteq/setgt/etc. are defined as ParFrags. -class T_cmp32_rr_pat - : Pat<(VT (Op I32:$Rs, I32:$Rt)), - (MI IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), + (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; -def: T_cmp32_rr_pat; -def: T_cmp32_rr_pat; -def: T_cmp32_rr_pat; +// Floating-point comparisons with checks for ordered/unordered status. -def: T_cmp32_rr_pat, i1>; -def: T_cmp32_rr_pat, i1>; +class T3 + : OutPatFrag<(ops node:$Rs, node:$Rt), + (MI1 (MI2 $Rs, $Rt), (MI3 $Rs, $Rt))>; -def: Pat<(select I1:$Pu, I32:$Rs, I32:$Rt), - (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>; +class OpmR_RR_pat + : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), + (Output RsPred:$Rs, RtPred:$Rt)>; -def: Pat<(add I32:$Rs, s32_0ImmPred:$s16), - (A2_addi I32:$Rs, imm:$s16)>; +class Cmpuf: T3; +class Cmpud: T3; -def: Pat<(or I32:$Rs, s32_0ImmPred:$s10), - (A2_orir IntRegs:$Rs, imm:$s10)>; -def: Pat<(and I32:$Rs, s32_0ImmPred:$s10), - (A2_andir IntRegs:$Rs, imm:$s10)>; +class Cmpufn: T3; +class Cmpudn: T3; -def: Pat<(sub s32_0ImmPred:$s10, IntRegs:$Rs), - (A2_subri imm:$s10, IntRegs:$Rs)>; +let Predicates = [HasV5T] in { + def: OpmR_RR_pat, setueq, i1, F32>; + def: OpmR_RR_pat, setuge, i1, F32>; + def: OpmR_RR_pat, setugt, i1, F32>; + def: OpmR_RR_pat, RevCmp, i1, F32>; + def: OpmR_RR_pat, RevCmp, i1, F32>; + def: OpmR_RR_pat, setune, i1, F32>; -// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs). -def: Pat<(not I32:$src1), - (A2_subri -1, IntRegs:$src1)>; + def: OpmR_RR_pat, setueq, i1, F64>; + def: OpmR_RR_pat, setuge, i1, F64>; + def: OpmR_RR_pat, setugt, i1, F64>; + def: OpmR_RR_pat, RevCmp, i1, F64>; + def: OpmR_RR_pat, RevCmp, i1, F64>; + def: OpmR_RR_pat, setune, i1, F64>; +} -def TruncI64ToI32: SDNodeXFormgetTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); -}]>; +class Outn + : OutPatFrag<(ops node:$Rs, node:$Rt), + (C2_not (MI $Rs, $Rt))>; -def: Pat<(s32_0ImmPred:$s16), (A2_tfrsi imm:$s16)>; -def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi (TruncI64ToI32 $s8))>; +let Predicates = [HasV5T] in { + def: OpmR_RR_pat, setone, i1, F32>; + def: OpmR_RR_pat, setne, i1, F32>; -def : Pat<(select I1:$Pu, s32_0ImmPred:$s8, I32:$Rs), - (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>; + def: OpmR_RR_pat, setone, i1, F64>; + def: OpmR_RR_pat, setne, i1, F64>; -def : Pat<(select I1:$Pu, I32:$Rs, s32_0ImmPred:$s8), - (C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>; + def: OpmR_RR_pat, seto, i1, F32>; + def: OpmR_RR_pat, seto, i1, F64>; +} -def : Pat<(select I1:$Pu, s32_0ImmPred:$s8, s8_0ImmPred:$S8), - (C2_muxii I1:$Pu, imm:$s8, imm:$S8)>; -def: Pat<(shl I32:$src1, (i32 16)), (A2_aslh I32:$src1)>; -def: Pat<(sra I32:$src1, (i32 16)), (A2_asrh I32:$src1)>; -def: Pat<(sext_inreg I32:$src1, i8), (A2_sxtb I32:$src1)>; -def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>; +// --(6) Select ---------------------------------------------------------- +// -class T_vcmp_pat - : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))), - (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>; +def: Pat<(select I1:$Pu, I32:$Rs, I32:$Rt), + (C2_mux I1:$Pu, I32:$Rs, I32:$Rt)>; +def: Pat<(select I1:$Pu, anyimm:$s8, I32:$Rs), + (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>; +def: Pat<(select I1:$Pu, I32:$Rs, anyimm:$s8), + (C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>; +def: Pat<(select I1:$Pu, anyimm:$s8, s8_0ImmPred:$S8), + (C2_muxii I1:$Pu, imm:$s8, imm:$S8)>; + +def: Pat<(select (not I1:$Pu), I32:$Rs, I32:$Rt), + (C2_mux I1:$Pu, I32:$Rt, I32:$Rs)>; +def: Pat<(select (not I1:$Pu), s8_0ImmPred:$S8, anyimm:$s8), + (C2_muxii I1:$Pu, imm:$s8, imm:$S8)>; +def: Pat<(select (not I1:$Pu), anyimm:$s8, I32:$Rs), + (C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>; +def: Pat<(select (not I1:$Pu), I32:$Rs, anyimm:$s8), + (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; -def: T_vcmp_pat; +// Map from a 64-bit select to an emulated 64-bit mux. +// Hexagon does not support 64-bit MUXes; so emulate with combines. +def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt), + (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), + (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; -// Add halfword. -def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16), - (A2_addh_l16_ll I32:$src1, I32:$src2)>; +let Predicates = [HasV5T] in { + def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I), + (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; + def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt), + (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>; + def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt), + (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>; + def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt), + (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), + (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; + + def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt), + (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>; + def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt), + (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>; + + def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs), + (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; + def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I), + (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>; +} + +def: Pat<(select I1:$Pu, V4I8:$Rs, V4I8:$Rt), + (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>; +def: Pat<(select I1:$Pu, V2I16:$Rs, V2I16:$Rt), + (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>; +def: Pat<(select I1:$Pu, V2I32:$Rs, V2I32:$Rt), + (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), + (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; -def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)), - (A2_addh_l16_hl I32:$src1, I32:$src2)>; +def: Pat<(vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt), + (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; +def: Pat<(vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt), + (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>; +def: Pat<(vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt), + (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; -def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)), - (A2_addh_h16_ll I32:$src1, I32:$src2)>; -// Subtract halfword. -def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16), - (A2_subh_l16_ll I32:$src1, I32:$src2)>; +class HvxSel_pat + : Pat<(select I1:$Pu, RegPred:$Vs, RegPred:$Vt), + (MI I1:$Pu, RegPred:$Vs, RegPred:$Vt)>; -def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)), - (A2_subh_h16_ll I32:$src1, I32:$src2)>; +let Predicates = [HasV60T,UseHVX] in { + def: HvxSel_pat; + def: HvxSel_pat; + def: HvxSel_pat; + def: HvxSel_pat; + def: HvxSel_pat; + def: HvxSel_pat; + def: HvxSel_pat; + def: HvxSel_pat; +} -// Here, depending on the operand being selected, we'll either generate a -// min or max instruction. -// Ex: -// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected -// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'. -// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value -// is selected and the corresponding HexagonInst is passed in 'SwapInst'. +// From LegalizeDAG.cpp: (Pu ? Pv : Pw) <=> (Pu & Pv) | (!Pu & Pw). +def: Pat<(select I1:$Pu, I1:$Pv, I1:$Pw), + (C2_or (C2_and I1:$Pu, I1:$Pv), + (C2_andn I1:$Pw, I1:$Pu))>; -multiclass T_MinMax_pats { - def: Pat<(select (i1 (Op Val:$src1, Val:$src2)), Val:$src1, Val:$src2), - (Inst Val:$src1, Val:$src2)>; - def: Pat<(select (i1 (Op Val:$src1, Val:$src2)), Val:$src2, Val:$src1), - (SwapInst Val:$src1, Val:$src2)>; -} def IsPosHalf : PatLeaf<(i32 IntRegs:$a), [{ return isPositiveHalfWord(N); }]>; -multiclass MinMax_pats { - defm: T_MinMax_pats; - - def: Pat<(sext_inreg (select (i1 (Op IsPosHalf:$src1, IsPosHalf:$src2)), - IsPosHalf:$src1, IsPosHalf:$src2), - i16), - (Inst IntRegs:$src1, IntRegs:$src2)>; +multiclass SelMinMax16_pats { + def: Pat<(sext_inreg (select (i1 (CmpOp IsPosHalf:$Rs, IsPosHalf:$Rt)), + IsPosHalf:$Rs, IsPosHalf:$Rt), i16), + (InstA IntRegs:$Rs, IntRegs:$Rt)>; + def: Pat<(sext_inreg (select (i1 (CmpOp IsPosHalf:$Rs, IsPosHalf:$Rt)), + IsPosHalf:$Rt, IsPosHalf:$Rs), i16), + (InstB IntRegs:$Rs, IntRegs:$Rt)>; +} - def: Pat<(sext_inreg (select (i1 (Op IsPosHalf:$src1, IsPosHalf:$src2)), - IsPosHalf:$src2, IsPosHalf:$src1), - i16), - (SwapInst IntRegs:$src1, IntRegs:$src2)>; +let AddedComplexity = 200 in { + defm: SelMinMax16_pats; + defm: SelMinMax16_pats; + defm: SelMinMax16_pats; + defm: SelMinMax16_pats; + defm: SelMinMax16_pats; + defm: SelMinMax16_pats; + defm: SelMinMax16_pats; + defm: SelMinMax16_pats; } let AddedComplexity = 200 in { - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; - defm: MinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; } -class T_cmp64_rr_pat - : Pat<(i1 (CmpOp I64:$Rs, I64:$Rt)), - (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>; +let AddedComplexity = 100, Predicates = [HasV5T] in { + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; + defm: SelMinMax_pats; +} -def: T_cmp64_rr_pat; -def: T_cmp64_rr_pat; -def: T_cmp64_rr_pat; -def: T_cmp64_rr_pat>; -def: T_cmp64_rr_pat>; -def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>; -def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>; +// --(7) Insert/extract -------------------------------------------------- +// -def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>; -def: Pat<(i64 (or I64:$Rs, I64:$Rt)), (A2_orp I64:$Rs, I64:$Rt)>; -def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>; +def SDTHexagonINSERT: + SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; +def SDTHexagonINSERTRP: + SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisInt<0>, SDTCisVT<3, i64>]>; -def: Pat<(i1 (not I1:$Ps)), (C2_not PredRegs:$Ps)>; +def HexagonINSERT: SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>; +def HexagonINSERTRP: SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>; -def: Pat<(i1 (and I1:$Ps, I1:$Pt)), (C2_and I1:$Ps, I1:$Pt)>; -def: Pat<(i1 (or I1:$Ps, I1:$Pt)), (C2_or I1:$Ps, I1:$Pt)>; -def: Pat<(i1 (xor I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>; -def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>; -def: Pat<(i1 (or I1:$Ps, (not I1:$Pt))), (C2_orn I1:$Ps, I1:$Pt)>; +def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2), + (S2_insert I32:$Rs, I32:$Rt, imm:$u1, imm:$u2)>; +def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2), + (S2_insertp I64:$Rs, I64:$Rt, imm:$u1, imm:$u2)>; +def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru), + (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>; +def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru), + (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>; -def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>; +def SDTHexagonEXTRACTU + : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; +def SDTHexagonEXTRACTURP + : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, + SDTCisVT<2, i64>]>; -def: Pat<(br bb:$dst), (J2_jump b30_2Imm:$dst)>; -def: Pat<(brcond I1:$src1, bb:$block), (J2_jumpt PredRegs:$src1, bb:$block)>; -def: Pat<(brind I32:$dst), (J2_jumpr IntRegs:$dst)>; +def HexagonEXTRACTU: SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>; +def HexagonEXTRACTURP: SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>; -def: Pat<(retflag), (PS_jmpret (i32 R31))>; -def: Pat<(eh_return), (EH_RETURN_JMPR (i32 R31))>; +def: Pat<(HexagonEXTRACTU I32:$Rs, u5_0ImmPred:$u5, u5_0ImmPred:$U5), + (S2_extractu I32:$Rs, imm:$u5, imm:$U5)>; +def: Pat<(HexagonEXTRACTU I64:$Rs, u6_0ImmPred:$u6, u6_0ImmPred:$U6), + (S2_extractup I64:$Rs, imm:$u6, imm:$U6)>; +def: Pat<(HexagonEXTRACTURP I32:$Rs, I64:$Rt), + (S2_extractu_rp I32:$Rs, I64:$Rt)>; +def: Pat<(HexagonEXTRACTURP I64:$Rs, I64:$Rt), + (S2_extractup_rp I64:$Rs, I64:$Rt)>; -// Patterns to select load-indexed (i.e. load from base+offset). -multiclass Loadx_pat { - def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>; - def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), - (VT (MI AddrFI:$fi, imm:$Off))>; - def: Pat<(VT (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))), - (VT (MI AddrFI:$fi, imm:$Off))>; - def: Pat<(VT (Load (add I32:$Rs, ImmPred:$Off))), - (VT (MI IntRegs:$Rs, imm:$Off))>; - def: Pat<(VT (Load I32:$Rs)), (VT (MI IntRegs:$Rs, 0))>; -} +def SDTHexagonVSPLAT: + SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; -let AddedComplexity = 20 in { - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - defm: Loadx_pat; - // No sextloadi1. -} +def HexagonVSPLAT: SDNode<"HexagonISD::VSPLAT", SDTHexagonVSPLAT>; + +def: Pat<(v4i8 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrb I32:$Rs)>; +def: Pat<(v4i16 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrh I32:$Rs)>; +def: Pat<(v2i32 (HexagonVSPLAT s8_0ImmPred:$s8)), + (A2_combineii imm:$s8, imm:$s8)>; +def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (Combinew I32:$Rs, I32:$Rs)>; + + +// --(8) Shift/permute --------------------------------------------------- +// + +def SDTHexagonI64I32I32: SDTypeProfile<1, 2, + [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; +def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, + SDTCisSubVecOfVec<1, 0>]>; +def SDTHexagonVPACK: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, SDTCisVec<1>]>; + +def HexagonPACKHL: SDNode<"HexagonISD::PACKHL", SDTHexagonI64I32I32>; +def HexagonCOMBINE: SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>; +def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>; +def HexagonVPACKE: SDNode<"HexagonISD::VPACKE", SDTHexagonVPACK>; +def HexagonVPACKO: SDNode<"HexagonISD::VPACKO", SDTHexagonVPACK>; + +def: OpR_RR_pat, i64, I32>; + +def: Pat<(HexagonCOMBINE I32:$Rs, I32:$Rt), (Combinew $Rs, $Rt)>; + +// The complexity of the combines involving immediates should be greater +// than the complexity of the combine with two registers. +let AddedComplexity = 50 in { + def: Pat<(HexagonCOMBINE I32:$Rs, anyimm:$s8), + (A4_combineri IntRegs:$Rs, imm:$s8)>; + def: Pat<(HexagonCOMBINE anyimm:$s8, I32:$Rs), + (A4_combineir imm:$s8, IntRegs:$Rs)>; +} + +// The complexity of the combine with two immediates should be greater than +// the complexity of a combine involving a register. +let AddedComplexity = 75 in { + def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8, anyimm:$u6), + (A4_combineii imm:$s8, imm:$u6)>; + def: Pat<(HexagonCOMBINE anyimm:$s8, s8_0ImmPred:$S8), + (A2_combineii imm:$s8, imm:$S8)>; +} + +let Predicates = [UseHVX] in { + def: OpR_RR_pat, VecPI32, HVI32>; + def: OpR_RR_pat, VecI8, HVI8>; + def: OpR_RR_pat, VecI8, HVI8>; + def: OpR_RR_pat, VecI16, HVI16>; + def: OpR_RR_pat, VecI16, HVI16>; +} + +def: Pat<(bswap I32:$Rs), (A2_swiz I32:$Rs)>; +def: Pat<(bswap I64:$Rss), (Combinew (A2_swiz (LoReg $Rss)), + (A2_swiz (HiReg $Rss)))>; + +def: Pat<(shl s6_0ImmPred:$s6, I32:$Rt), (S4_lsli imm:$s6, I32:$Rt)>; +def: Pat<(shl I32:$Rs, (i32 16)), (A2_aslh I32:$Rs)>; +def: Pat<(sra I32:$Rs, (i32 16)), (A2_asrh I32:$Rs)>; + +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; +def: OpR_RI_pat; + +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; + + +def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)), + (S2_asr_i_r_rnd I32:$Rs, imm:$u5)>; +def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)), + (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>, Requires<[HasV5T]>; + +// Prefer S2_addasl_rrri over S2_asl_i_r_acc. +let AddedComplexity = 120 in +def: Pat<(add I32:$Rt, (shl I32:$Rs, u3_0ImmPred:$u3)), + (S2_addasl_rrri IntRegs:$Rt, IntRegs:$Rs, imm:$u3)>; + +let AddedComplexity = 100 in { + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + def: AccRRI_pat, I32, u5_0ImmPred>; + + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; + def: AccRRI_pat, I64, u6_0ImmPred>; +} + +let AddedComplexity = 100 in { + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + def: AccRRR_pat, I32, I32>; + + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; + def: AccRRR_pat, I64, I32>; +} + + +class OpshIRI_pat + : Pat<(Op anyimm:$u8, (ShOp RegPred:$Rs, ImmPred:$U5)), + (MI anyimm:$u8, RegPred:$Rs, imm:$U5)>; + +let AddedComplexity = 200 in { + def: OpshIRI_pat, I32, u5_0ImmPred>; + def: OpshIRI_pat, I32, u5_0ImmPred>; + def: OpshIRI_pat, I32, u5_0ImmPred>; + def: OpshIRI_pat, I32, u5_0ImmPred>; + def: OpshIRI_pat, I32, u5_0ImmPred>; + def: OpshIRI_pat, I32, u5_0ImmPred>; + def: OpshIRI_pat, I32, u5_0ImmPred>; + def: OpshIRI_pat, I32, u5_0ImmPred>; +} + +// Prefer this pattern to S2_asl_i_p_or for the special case of joining +// two 32-bit words into a 64-bit word. +let AddedComplexity = 200 in +def: Pat<(or (shl (Aext64 I32:$a), (i32 32)), (Zext64 I32:$b)), + (Combinew I32:$a, I32:$b)>; + +def: Pat<(or (or (or (shl (Zext64 (and I32:$b, (i32 65535))), (i32 16)), + (Zext64 (and I32:$a, (i32 65535)))), + (shl (Aext64 (and I32:$c, (i32 65535))), (i32 32))), + (shl (Aext64 I32:$d), (i32 48))), + (Combinew (A2_combine_ll I32:$d, I32:$c), + (A2_combine_ll I32:$b, I32:$a))>; + +def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add I32:$b, 3))), + (i32 8)), + (i32 (zextloadi8 (add I32:$b, 2)))), + (i32 16)), + (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), + (zextloadi8 I32:$b)), + (A2_swiz (L2_loadri_io IntRegs:$b, 0))>; + + +def SDTHexagonVShift + : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVec<0>, SDTCisVT<2, i32>]>; + +def HexagonVASL: SDNode<"HexagonISD::VASL", SDTHexagonVShift>; +def HexagonVASR: SDNode<"HexagonISD::VASR", SDTHexagonVShift>; +def HexagonVLSR: SDNode<"HexagonISD::VLSR", SDTHexagonVShift>; + +def: OpR_RI_pat, v2i32, V2I32, u5_0ImmPred>; +def: OpR_RI_pat, v4i16, V4I16, u4_0ImmPred>; +def: OpR_RI_pat, v2i32, V2I32, u5_0ImmPred>; +def: OpR_RI_pat, v4i16, V4I16, u4_0ImmPred>; +def: OpR_RI_pat, v2i32, V2I32, u5_0ImmPred>; +def: OpR_RI_pat, v4i16, V4I16, u4_0ImmPred>; + +def: OpR_RR_pat, v2i32, V2I32, I32>; +def: OpR_RR_pat, v4i16, V4I16, I32>; +def: OpR_RR_pat, v2i32, V2I32, I32>; +def: OpR_RR_pat, v4i16, V4I16, I32>; +def: OpR_RR_pat, v2i32, V2I32, I32>; +def: OpR_RR_pat, v4i16, V4I16, I32>; + +def: Pat<(sra V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c))), + (S2_asr_i_vw V2I32:$b, imm:$c)>; +def: Pat<(srl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c))), + (S2_lsr_i_vw V2I32:$b, imm:$c)>; +def: Pat<(shl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c))), + (S2_asl_i_vw V2I32:$b, imm:$c)>; +def: Pat<(sra V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), + (S2_asr_i_vh V4I16:$b, imm:$c)>; +def: Pat<(srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), + (S2_lsr_i_vh V4I16:$b, imm:$c)>; +def: Pat<(shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), + (S2_asl_i_vh V4I16:$b, imm:$c)>; -// Sign-extending loads of i1 need to replicate the lowest bit throughout -// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should -// do the trick. -let AddedComplexity = 20 in -def: Pat<(i32 (sextloadi1 I32:$Rs)), - (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>; -def: Pat<(i32 (mul I32:$src1, I32:$src2)), (M2_mpyi I32:$src1, I32:$src2)>; -def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up I32:$src1, I32:$src2)>; -def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>; +// --(9) Arithmetic/bitwise ---------------------------------------------- +// + +def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>; +def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>; +def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>; + +let Predicates = [HasV5T] in { + def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>; + def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>; + + def: Pat<(fabs F64:$Rs), + (Combinew (S2_clrbit_i (HiReg $Rs), 31), + (i32 (LoReg $Rs)))>; + def: Pat<(fneg F64:$Rs), + (Combinew (S2_togglebit_i (HiReg $Rs), 31), + (i32 (LoReg $Rs)))>; +} + +let AddedComplexity = 50 in +def: Pat<(xor (add (sra I32:$Rs, (i32 31)), + I32:$Rs), + (sra I32:$Rs, (i32 31))), + (A2_abs I32:$Rs)>; + + +def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>; +def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>; +def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>; +def: Pat<(sub anyimm:$s10, I32:$Rs), (A2_subri imm:$s10, I32:$Rs)>; + +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat, i64, I64>; +def: OpR_RR_pat, i64, I64>; + +def: OpR_RR_pat; +def: OpR_RR_pat; + +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; + +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; + +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; + +def: OpR_RR_pat; +def: OpR_RR_pat, i32, I32>; +def: OpR_RR_pat, i32, I32>; +def: OpR_RI_pat; +def: OpR_RI_pat; + +// Arithmetic on predicates. +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; +def: OpR_RR_pat; -def: Pat<(mul IntRegs:$Rs, u32_0ImmPred:$u8), - (M2_mpysip IntRegs:$Rs, imm:$u8)>; -def: Pat<(ineg (mul IntRegs:$Rs, u8_0ImmPred:$u8)), +let Predicates = [HasV5T] in { + def: OpR_RR_pat, f32, F32>; + def: OpR_RR_pat, f32, F32>; + def: OpR_RR_pat, f32, F32>; + def: OpR_RR_pat, f32, F32>; + def: OpR_RR_pat, f32, F32>; +} + +// In expressions like a0*b0 + a1*b1 + ..., prefer to generate multiply-add, +// over add-add with individual multiplies as inputs. +let AddedComplexity = 10 in { + def: AccRRI_pat, I32, u32_0ImmPred>; + def: AccRRI_pat, I32, u32_0ImmPred>; + def: AccRRR_pat, I32, I32>; +} + +def: AccRRI_pat, I32, s32_0ImmPred>; +def: AccRRI_pat, I32, s32_0ImmPred>; +def: AccRRR_pat, I32, I32>; + + +def: Pat<(ineg (mul I32:$Rs, u8_0ImmPred:$u8)), (M2_mpysin IntRegs:$Rs, imm:$u8)>; -def: Pat<(mul IntRegs:$src1, s32_0ImmPred:$src2), - (M2_mpysmi IntRegs:$src1, imm:$src2)>; -def: Pat<(add (mul IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1), - (M2_macsip IntRegs:$src1, IntRegs:$src2, imm:$src3)>; -def: Pat<(add (mul I32:$src2, I32:$src3), I32:$src1), - (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; -def: Pat<(add (add IntRegs:$src2, s32_0ImmPred:$src3), IntRegs:$src1), - (M2_accii IntRegs:$src1, IntRegs:$src2, imm:$src3)>; -def: Pat<(add (add I32:$src2, I32:$src3), I32:$src1), - (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -class T_MType_acc_pat1 - : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)), - (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>; - -class T_MType_acc_pat2 - : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))), - (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -def : T_MType_acc_pat2 ; -def : T_MType_acc_pat1 ; - -def : T_MType_acc_pat1 ; -def : T_MType_acc_pat2 ; - -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; -def: T_MType_acc_pat2 ; - -class T_MType_acc_pat3 - : Pat <(secOp I32:$src1, (firstOp I32:$src2, (not I32:$src3))), - (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>; - -def: T_MType_acc_pat3 ; -def: T_MType_acc_pat3 ; -def: T_MType_acc_pat3 ; -// This complex pattern is really only to detect various forms of -// sign-extension i32->i64. The selected value will be of type i64 -// whose low word is the value being extended. The high word is -// unspecified. -def Usxtw : ComplexPattern; +def n8_0ImmPred: PatLeaf<(i32 imm), [{ + int64_t V = N->getSExtValue(); + return -255 <= V && V <= 0; +}]>; -def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>; -def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>; -def Sext64: PatLeaf<(i64 Usxtw:$Rs)>; +// Change the sign of the immediate for Rd=-mpyi(Rs,#u8) +def: Pat<(mul I32:$Rs, n8_0ImmPred:$n8), + (M2_mpysin I32:$Rs, (NegImm8 imm:$n8))>; + +def: Pat<(add Sext64:$Rs, I64:$Rt), + (A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>; + +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I32, I32>; +def: AccRRR_pat, I64, I64>; + +def: AccRRR_pat>, I32, I32>; +def: AccRRR_pat>, I32, I32>; +def: AccRRR_pat>, I32, I32>; + +// S4_addaddi and S4_subaddi don't have tied operands, so give them +// a bit of preference. +let AddedComplexity = 30 in { + def: Pat<(add I32:$Rs, (Su I32:$Ru, anyimm:$s6)), + (S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>; + def: Pat<(add I32:$Rs, (Su anyimm:$s6, I32:$Ru)), + (S4_subaddi IntRegs:$Rs, imm:$s6, IntRegs:$Ru)>; + def: Pat<(sub (Su I32:$Rs, anyimm:$s6), I32:$Ru), + (S4_subaddi IntRegs:$Rs, imm:$s6, IntRegs:$Ru)>; + def: Pat<(add (Su I32:$Rs, I32:$Ru), anyimm:$s6), + (S4_subaddi IntRegs:$Rs, imm:$s6, IntRegs:$Ru)>; +} + +def: Pat<(or I32:$Ru, (Su I32:$Rx, anyimm:$s10)), + (S4_or_andix IntRegs:$Ru, IntRegs:$Rx, imm:$s10)>; +def: Pat<(or I32:$Rx, (Su I32:$Rs, anyimm:$s10)), + (S4_or_andi IntRegs:$Rx, IntRegs:$Rs, imm:$s10)>; +def: Pat<(or I32:$Rx, (Su I32:$Rs, anyimm:$s10)), + (S4_or_ori IntRegs:$Rx, IntRegs:$Rs, imm:$s10)>; -def: Pat<(i32 (trunc (sra (mul Sext64:$Rs, Sext64:$Rt), (i32 32)))), + +def: Pat<(i32 (trunc (sra (Su Sext64:$Rs, Sext64:$Rt), (i32 32)))), (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; -def: Pat<(i32 (trunc (srl (mul Sext64:$Rs, Sext64:$Rt), (i32 32)))), +def: Pat<(i32 (trunc (srl (Su Sext64:$Rs, Sext64:$Rt), (i32 32)))), (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; +def: Pat<(mul (Zext64 I32:$Rs), (Zext64 I32:$Rt)), + (M2_dpmpyuu_s0 I32:$Rs, I32:$Rt)>; def: Pat<(mul (Aext64 I32:$Rs), (Aext64 I32:$Rt)), (M2_dpmpyuu_s0 I32:$Rs, I32:$Rt)>; - def: Pat<(mul Sext64:$Rs, Sext64:$Rt), (M2_dpmpyss_s0 (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; -// Multiply and accumulate, use full result. -// Rxx[+-]=mpy(Rs,Rt) - -def: Pat<(add I64:$Rx, (mul Sext64:$Rs, Sext64:$Rt)), +def: Pat<(add I64:$Rx, (Su Sext64:$Rs, Sext64:$Rt)), (M2_dpmpyss_acc_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; - -def: Pat<(sub I64:$Rx, (mul Sext64:$Rs, Sext64:$Rt)), +def: Pat<(sub I64:$Rx, (Su Sext64:$Rs, Sext64:$Rt)), (M2_dpmpyss_nac_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; - -def: Pat<(add I64:$Rx, (mul (Aext64 I32:$Rs), (Aext64 I32:$Rt))), +def: Pat<(add I64:$Rx, (Su (Aext64 I32:$Rs), (Aext64 I32:$Rt))), (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; - -def: Pat<(add I64:$Rx, (mul (Zext64 I32:$Rs), (Zext64 I32:$Rt))), +def: Pat<(add I64:$Rx, (Su (Zext64 I32:$Rs), (Zext64 I32:$Rt))), (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; - -def: Pat<(sub I64:$Rx, (mul (Aext64 I32:$Rs), (Aext64 I32:$Rt))), +def: Pat<(sub I64:$Rx, (Su (Aext64 I32:$Rs), (Aext64 I32:$Rt))), (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; - -def: Pat<(sub I64:$Rx, (mul (Zext64 I32:$Rs), (Zext64 I32:$Rt))), +def: Pat<(sub I64:$Rx, (Su (Zext64 I32:$Rs), (Zext64 I32:$Rt))), (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; -class Storepi_pat - : Pat<(Store Value:$src1, I32:$src2, Offset:$offset), - (MI I32:$src2, imm:$offset, Value:$src1)>; - -def: Storepi_pat; -def: Storepi_pat; -def: Storepi_pat; -def: Storepi_pat; +// Add halfword. +def: Pat<(sext_inreg (add I32:$Rt, I32:$Rs), i16), + (A2_addh_l16_ll I32:$Rt, I32:$Rs)>; +def: Pat<(sra (add (shl I32:$Rt, (i32 16)), I32:$Rs), (i32 16)), + (A2_addh_l16_hl I32:$Rt, I32:$Rs)>; +def: Pat<(shl (add I32:$Rt, I32:$Rs), (i32 16)), + (A2_addh_h16_ll I32:$Rt, I32:$Rs)>; -// Patterns for generating stores, where the address takes different forms: -// - frameindex, -// - frameindex + offset, -// - base + offset, -// - simple (base address without offset). -// These would usually be used together (via Storex_pat defined below), but -// in some cases one may want to apply different properties (such as -// AddedComplexity) to the individual patterns. -class Storex_fi_pat - : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>; -multiclass Storex_fi_add_pat { - def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; - def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; -} -multiclass Storex_add_pat { - def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; - def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; -} -class Storex_simple_pat - : Pat<(Store Value:$Rt, I32:$Rs), - (MI IntRegs:$Rs, 0, Value:$Rt)>; +// Subtract halfword. +def: Pat<(sext_inreg (sub I32:$Rt, I32:$Rs), i16), + (A2_subh_l16_ll I32:$Rt, I32:$Rs)>; +def: Pat<(sra (add (shl I32:$Rt, (i32 16)), I32:$Rs), (i32 16)), + (A2_addh_l16_hl I32:$Rt, I32:$Rs)>; +def: Pat<(shl (sub I32:$Rt, I32:$Rs), (i32 16)), + (A2_subh_h16_ll I32:$Rt, I32:$Rs)>; -// Patterns for generating stores, where the address takes different forms, -// and where the value being stored is transformed through the value modifier -// ValueMod. The address forms are same as above. -class Storexm_fi_pat - : Pat<(Store Value:$Rs, AddrFI:$fi), - (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>; -multiclass Storexm_fi_add_pat { - def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; - def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)), - (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; -} -multiclass Storexm_add_pat { - def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; - def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)), - (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; -} -class Storexm_simple_pat - : Pat<(Store Value:$Rt, I32:$Rs), - (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>; +def: Pat<(mul I64:$Rss, I64:$Rtt), + (Combinew + (M2_maci (M2_maci (HiReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))), + (LoReg $Rss), + (HiReg $Rtt)), + (LoReg $Rtt), + (HiReg $Rss)), + (i32 (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)))))>; -multiclass Storex_pat { - def: Storex_fi_pat ; - defm: Storex_fi_add_pat ; - defm: Storex_add_pat ; -} +def MulHU : OutPatFrag<(ops node:$Rss, node:$Rtt), + (A2_addp + (M2_dpmpyuu_acc_s0 + (S2_lsr_i_p + (A2_addp + (M2_dpmpyuu_acc_s0 + (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)), 32), + (HiReg $Rss), + (LoReg $Rtt)), + (A4_combineir 0, (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt))))), + 32), + (HiReg $Rss), + (HiReg $Rtt)), + (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt)), 32))>; -multiclass Storexm_pat { - def: Storexm_fi_pat ; - defm: Storexm_fi_add_pat ; - defm: Storexm_add_pat ; -} +// Multiply 64-bit unsigned and use upper result. +def : Pat <(mulhu I64:$Rss, I64:$Rtt), (MulHU $Rss, $Rtt)>; -// Regular stores in the DAG have two operands: value and address. -// Atomic stores also have two, but they are reversed: address, value. -// To use atomic stores with the patterns, they need to have their operands -// swapped. This relies on the knowledge that the F.Fragment uses names -// "ptr" and "val". -class SwapSt - : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode, - F.OperandTransform>; +// Multiply 64-bit signed and use upper result. +// +// For two signed 64-bit integers A and B, let A' and B' denote A and B +// with the sign bit cleared. Then A = -2^63*s(A) + A', where s(A) is the +// sign bit of A (and identically for B). With this notation, the signed +// product A*B can be written as: +// AB = (-2^63 s(A) + A') * (-2^63 s(B) + B') +// = 2^126 s(A)s(B) - 2^63 [s(A)B'+s(B)A'] + A'B' +// = 2^126 s(A)s(B) + 2^63 [s(A)B'+s(B)A'] + A'B' - 2*2^63 [s(A)B'+s(B)A'] +// = (unsigned product AB) - 2^64 [s(A)B'+s(B)A'] -let AddedComplexity = 20 in { - defm: Storex_pat; - defm: Storex_pat; - defm: Storex_pat; - defm: Storex_pat; +// Clear the sign bit in a 64-bit register. +def ClearSign : OutPatFrag<(ops node:$Rss), + (Combinew (S2_clrbit_i (HiReg $Rss), 31), (i32 (LoReg $Rss)))>; - defm: Storex_pat, I32, s32_0ImmPred, S2_storerb_io>; - defm: Storex_pat, I32, s31_1ImmPred, S2_storerh_io>; - defm: Storex_pat, I32, s30_2ImmPred, S2_storeri_io>; - defm: Storex_pat, I64, s29_3ImmPred, S2_storerd_io>; -} +def : Pat <(mulhs I64:$Rss, I64:$Rtt), + (A2_subp + (MulHU $Rss, $Rtt), + (A2_addp + (A2_andp (S2_asr_i_p $Rss, 63), (ClearSign $Rtt)), + (A2_andp (S2_asr_i_p $Rtt, 63), (ClearSign $Rss))))>; -// Simple patterns should be tried with the least priority. -def: Storex_simple_pat; -def: Storex_simple_pat; -def: Storex_simple_pat; -def: Storex_simple_pat; +def: Pat<(add (Su I32:$Rs, u6_0ImmPred:$U6), anyimm:$u6), + (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>; +def: Pat<(add (Su I32:$Rs, I32:$Rt), anyimm:$u6), + (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>; +def: Pat<(add I32:$Ru, (Su I32:$Rs, u6_2ImmPred:$u6_2)), + (M4_mpyri_addr_u2 IntRegs:$Ru, imm:$u6_2, IntRegs:$Rs)>; +def: Pat<(add I32:$Ru, (Su I32:$Rs, anyimm:$u6)), + (M4_mpyri_addr IntRegs:$Ru, IntRegs:$Rs, imm:$u6)>; +def: Pat<(add I32:$Ru, (Su I32:$Ry, I32:$Rs)), + (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>; -def: Storex_simple_pat, I32, S2_storerb_io>; -def: Storex_simple_pat, I32, S2_storerh_io>; -def: Storex_simple_pat, I32, S2_storeri_io>; -def: Storex_simple_pat, I64, S2_storerd_io>; -let AddedComplexity = 20 in { - defm: Storexm_pat; - defm: Storexm_pat; - defm: Storexm_pat; +let Predicates = [HasV5T] in { + def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx), + (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>; + def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx), + (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>; + def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx), + (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>; } -def: Storexm_simple_pat; -def: Storexm_simple_pat; -def: Storexm_simple_pat; -def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>; -def: Pat <(i64 (sext_inreg I64:$src, i32)), (A2_sxtw (LoReg I64:$src))>; +def: Pat<(mul V2I32:$Rs, V2I32:$Rt), + (PS_vmulw V2I32:$Rs, V2I32:$Rt)>; +def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)), + (PS_vmulw_acc V2I32:$Rx, V2I32:$Rs, V2I32:$Rt)>; + +// Add/subtract two v4i8: Hexagon does not have an insn for this one, so +// we use the double add v8i8, and use only the low part of the result. +def: Pat<(add V4I8:$Rs, V4I8:$Rt), + (LoReg (A2_vaddub (ToZext64 $Rs), (ToZext64 $Rt)))>; +def: Pat<(sub V4I8:$Rs, V4I8:$Rt), + (LoReg (A2_vsubub (ToZext64 $Rs), (ToZext64 $Rt)))>; -def: Pat<(select (i1 (setlt I32:$src, 0)), (sub 0, I32:$src), I32:$src), - (A2_abs IntRegs:$src)>; +// Use M2_vmpy2s_s0 for half-word vector multiply. It multiplies two +// half-words, and saturates the result to a 32-bit value, except the +// saturation never happens (it can only occur with scaling). +def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), + (LoReg (S2_vtrunewh (A2_combineii 0, 0), + (M2_vmpy2s_s0 V2I16:$Rs, V2I16:$Rt)))>; +def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), + (S2_vtrunewh (M2_vmpy2s_s0 (HiReg $Rs), (HiReg $Rt)), + (M2_vmpy2s_s0 (LoReg $Rs), (LoReg $Rt)))>; -let AddedComplexity = 50 in -def: Pat<(xor (add (sra I32:$src, (i32 31)), - I32:$src), - (sra I32:$src, (i32 31))), - (A2_abs IntRegs:$src)>; +// Multiplies two v4i8 vectors. +def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), + (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>, + Requires<[HasV5T]>; -def: Pat<(sra I32:$src, u5_0ImmPred:$u5), - (S2_asr_i_r IntRegs:$src, imm:$u5)>; -def: Pat<(srl I32:$src, u5_0ImmPred:$u5), - (S2_lsr_i_r IntRegs:$src, imm:$u5)>; -def: Pat<(shl I32:$src, u5_0ImmPred:$u5), - (S2_asl_i_r IntRegs:$src, imm:$u5)>; +// Multiplies two v8i8 vectors. +def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), + (Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))), + (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>, + Requires<[HasV5T]>; -def: Pat<(sra (add (sra I32:$src1, u5_0ImmPred:$src2), 1), (i32 1)), - (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>; -def : Pat<(not I64:$src1), - (A2_notp DoubleRegs:$src1)>; +// --(10) Bit ------------------------------------------------------------ +// // Count leading zeros. -def: Pat<(ctlz I32:$Rs), (S2_cl0 I32:$Rs)>; -def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>; +def: Pat<(ctlz I32:$Rs), (S2_cl0 I32:$Rs)>; +def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>; -// Count trailing zeros: 32-bit. -def: Pat<(cttz I32:$Rs), (S2_ct0 I32:$Rs)>; +// Count trailing zeros. +def: Pat<(cttz I32:$Rs), (S2_ct0 I32:$Rs)>; +def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>; // Count leading ones. -def: Pat<(ctlz (not I32:$Rs)), (S2_cl1 I32:$Rs)>; +def: Pat<(ctlz (not I32:$Rs)), (S2_cl1 I32:$Rs)>; def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; -// Count trailing ones: 32-bit. -def: Pat<(cttz (not I32:$Rs)), (S2_ct1 I32:$Rs)>; +// Count trailing ones. +def: Pat<(cttz (not I32:$Rs)), (S2_ct1 I32:$Rs)>; +def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; + +// Define leading/trailing patterns that require zero-extensions to 64 bits. +def: Pat<(i64 (ctlz I64:$Rss)), (ToZext64 (S2_cl0p I64:$Rss))>; +def: Pat<(i64 (cttz I64:$Rss)), (ToZext64 (S2_ct0p I64:$Rss))>; +def: Pat<(i64 (ctlz (not I64:$Rss))), (ToZext64 (S2_cl1p I64:$Rss))>; +def: Pat<(i64 (cttz (not I64:$Rss))), (ToZext64 (S2_ct1p I64:$Rss))>; + +def: Pat<(i64 (ctpop I64:$Rss)), (ToZext64 (S5_popcountp I64:$Rss))>; +def: Pat<(i32 (ctpop I32:$Rs)), (S5_popcountp (A4_combineir 0, I32:$Rs))>; + +def: Pat<(bitreverse I32:$Rs), (S2_brev I32:$Rs)>; +def: Pat<(bitreverse I64:$Rss), (S2_brevp I64:$Rss)>; + let AddedComplexity = 20 in { // Complexity greater than and/or/xor def: Pat<(and I32:$Rs, IsNPow2_32:$V), @@ -657,39 +1523,30 @@ let AddedComplexity = 20 in { // Complexity greater than and/or/xor // Clr/set/toggle bit for 64-bit values with immediate bit index. let AddedComplexity = 20 in { // Complexity greater than and/or/xor def: Pat<(and I64:$Rss, IsNPow2_64L:$V), - (REG_SEQUENCE DoubleRegs, - (i32 (HiReg $Rss)), isub_hi, - (S2_clrbit_i (LoReg $Rss), (LogN2_64 $V)), isub_lo)>; + (Combinew (i32 (HiReg $Rss)), + (S2_clrbit_i (LoReg $Rss), (LogN2_64 $V)))>; def: Pat<(and I64:$Rss, IsNPow2_64H:$V), - (REG_SEQUENCE DoubleRegs, - (S2_clrbit_i (HiReg $Rss), (UDEC32 (i32 (LogN2_64 $V)))), - isub_hi, - (i32 (LoReg $Rss)), isub_lo)>; + (Combinew (S2_clrbit_i (HiReg $Rss), (UDEC32 (i32 (LogN2_64 $V)))), + (i32 (LoReg $Rss)))>; def: Pat<(or I64:$Rss, IsPow2_64L:$V), - (REG_SEQUENCE DoubleRegs, - (i32 (HiReg $Rss)), isub_hi, - (S2_setbit_i (LoReg $Rss), (Log2_64 $V)), isub_lo)>; + (Combinew (i32 (HiReg $Rss)), + (S2_setbit_i (LoReg $Rss), (Log2_64 $V)))>; def: Pat<(or I64:$Rss, IsPow2_64H:$V), - (REG_SEQUENCE DoubleRegs, - (S2_setbit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))), - isub_hi, - (i32 (LoReg $Rss)), isub_lo)>; + (Combinew (S2_setbit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))), + (i32 (LoReg $Rss)))>; def: Pat<(xor I64:$Rss, IsPow2_64L:$V), - (REG_SEQUENCE DoubleRegs, - (i32 (HiReg $Rss)), isub_hi, - (S2_togglebit_i (LoReg $Rss), (Log2_64 $V)), isub_lo)>; + (Combinew (i32 (HiReg $Rss)), + (S2_togglebit_i (LoReg $Rss), (Log2_64 $V)))>; def: Pat<(xor I64:$Rss, IsPow2_64H:$V), - (REG_SEQUENCE DoubleRegs, - (S2_togglebit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))), - isub_hi, - (i32 (LoReg $Rss)), isub_lo)>; + (Combinew (S2_togglebit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))), + (i32 (LoReg $Rss)))>; } let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), - (S2_tstbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>; + (S2_tstbit_i IntRegs:$Rs, imm:$u5)>; def: Pat<(i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)), (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>; def: Pat<(i1 (trunc I32:$Rs)), @@ -700,7 +1557,7 @@ let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. let AddedComplexity = 20 in { // Complexity greater than compare reg-imm. def: Pat<(i1 (seteq (and I32:$Rs, u6_0ImmPred:$u6), 0)), - (C2_bitsclri IntRegs:$Rs, u6_0ImmPred:$u6)>; + (C2_bitsclri IntRegs:$Rs, imm:$u6)>; def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), 0)), (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>; } @@ -709,743 +1566,569 @@ let AddedComplexity = 10 in // Complexity greater than compare reg-reg. def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), IntRegs:$Rt)), (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add I32:$b, 3))), - (i32 8)), - (i32 (zextloadi8 (add I32:$b, 2)))), - (i32 16)), - (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), - (zextloadi8 I32:$b)), - (A2_swiz (L2_loadri_io IntRegs:$b, 0))>; - -// Patterns for loads of i1: -def: Pat<(i1 (load AddrFI:$fi)), - (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>; -def: Pat<(i1 (load (add I32:$Rs, s32_0ImmPred:$Off))), - (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>; -def: Pat<(i1 (load I32:$Rs)), - (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>; - -def I1toI32: OutPatFrag<(ops node:$Rs), - (C2_muxii (i1 $Rs), 1, 0)>; - -def I32toI1: OutPatFrag<(ops node:$Rs), - (i1 (C2_tfrrp (i32 $Rs)))>; - -defm: Storexm_pat; -def: Storexm_simple_pat; +let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. + def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), + (S4_ntstbit_i I32:$Rs, imm:$u5)>; + def: Pat<(i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)), + (S4_ntstbit_r I32:$Rs, I32:$Rt)>; +} -def: Pat<(sra (add (sra I64:$src, u6_0ImmPred:$u6), 1), (i32 1)), - (S2_asr_i_p_rnd DoubleRegs:$src, imm:$u6)>, Requires<[HasV5T]>; -def: Pat<(sra I64:$src, u6_0ImmPred:$u6), - (S2_asr_i_p DoubleRegs:$src, imm:$u6)>; -def: Pat<(srl I64:$src, u6_0ImmPred:$u6), - (S2_lsr_i_p DoubleRegs:$src, imm:$u6)>; -def: Pat<(shl I64:$src, u6_0ImmPred:$u6), - (S2_asl_i_p DoubleRegs:$src, imm:$u6)>; +// Add extra complexity to prefer these instructions over bitsset/bitsclr. +// The reason is that tstbit/ntstbit can be folded into a compound instruction: +// if ([!]tstbit(...)) jump ... +let AddedComplexity = 100 in +def: Pat<(i1 (setne (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))), + (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>; let AddedComplexity = 100 in -def: Pat<(add I32:$Rt, (shl I32:$Rs, u3_0ImmPred:$u3)), - (S2_addasl_rrri IntRegs:$Rt, IntRegs:$Rs, imm:$u3)>; - -def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>; -def: Pat<(HexagonBARRIER), (Y2_barrier)>; - -def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off), - (PS_fi (i32 AddrFI:$Rs), s32_0ImmPred:$off)>; - +def: Pat<(i1 (seteq (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))), + (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>; -// Support for generating global address. -// Taken from X86InstrInfo.td. -def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, - SDTCisVT<1, i32>, - SDTCisPtrTy<0>]>; -def HexagonCONST32 : SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; -def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; +// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be +// represented as a compare against "value & 0xFF", which is an exact match +// for cmpb (same for cmph). The patterns below do not contain any additional +// complexity that would make them preferable, and if they were actually used +// instead of cmpb/cmph, they would result in a compare against register that +// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF). +def: Pat<(i1 (setne (and I32:$Rs, u6_0ImmPred:$u6), 0)), + (C4_nbitsclri I32:$Rs, imm:$u6)>; +def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)), + (C4_nbitsclr I32:$Rs, I32:$Rt)>; +def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), + (C4_nbitsset I32:$Rs, I32:$Rt)>; -// Map TLS addressses to A2_tfrsi. -def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s32_0Imm:$addr)>; -def: Pat<(HexagonCONST32 bbl:$label), (A2_tfrsi s32_0Imm:$label)>; -def: Pat<(i64 imm:$v), (CONST64 imm:$v)>; -def: Pat<(i1 0), (PS_false)>; -def: Pat<(i1 1), (PS_true)>; +// --(11) Load ----------------------------------------------------------- +// -// Pseudo instructions. -def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, - SDTCisVT<1, i32> ]>; -def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, - SDTCisVT<1, i32> ]>; +def extloadv2i8: PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v2i8; +}]>; +def extloadv4i8: PatFrag<(ops node:$ptr), (extload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v4i8; +}]>; -def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; -def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v2i8; +}]>; +def zextloadv4i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v4i8; +}]>; -def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v2i8; +}]>; +def sextloadv4i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::v4i8; +}]>; -// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain, -// Optional Flag and Variable Arguments. -// Its 1 Operand has pointer type. -def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +// Patterns to select load-indexed: Rs + Off. +// - frameindex [+ imm], +multiclass Loadxfi_pat { + def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), + (VT (MI AddrFI:$fi, imm:$Off))>; + def: Pat<(VT (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))), + (VT (MI AddrFI:$fi, imm:$Off))>; + def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>; +} +// Patterns to select load-indexed: Rs + Off. +// - base reg [+ imm] +multiclass Loadxgi_pat { + def: Pat<(VT (Load (add I32:$Rs, ImmPred:$Off))), + (VT (MI IntRegs:$Rs, imm:$Off))>; + def: Pat<(VT (Load (IsOrAdd I32:$Rs, ImmPred:$Off))), + (VT (MI IntRegs:$Rs, imm:$Off))>; + def: Pat<(VT (Load I32:$Rs)), (VT (MI IntRegs:$Rs, 0))>; +} -def: Pat<(callseq_start timm:$amt, timm:$amt2), - (ADJCALLSTACKDOWN imm:$amt, imm:$amt2)>; -def: Pat<(callseq_end timm:$amt1, timm:$amt2), - (ADJCALLSTACKUP imm:$amt1, imm:$amt2)>; +// Patterns to select load-indexed: Rs + Off. Combines Loadxfi + Loadxgi. +multiclass Loadxi_pat { + defm: Loadxfi_pat; + defm: Loadxgi_pat; +} -//Tail calls. -def: Pat<(HexagonTCRet tglobaladdr:$dst), - (PS_tailcall_i tglobaladdr:$dst)>; -def: Pat<(HexagonTCRet texternalsym:$dst), - (PS_tailcall_i texternalsym:$dst)>; -def: Pat<(HexagonTCRet I32:$dst), - (PS_tailcall_r I32:$dst)>; - -// Map from r0 = and(r1, 65535) to r0 = zxth(r1) -def: Pat<(and I32:$src1, 65535), - (A2_zxth IntRegs:$src1)>; - -// Map from r0 = and(r1, 255) to r0 = zxtb(r1). -def: Pat<(and I32:$src1, 255), - (A2_zxtb IntRegs:$src1)>; - -// Map Add(p1, true) to p1 = not(p1). -// Add(p1, false) should never be produced, -// if it does, it got to be mapped to NOOP. -def: Pat<(add I1:$src1, -1), - (C2_not PredRegs:$src1)>; - -// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i). -def: Pat<(select (not I1:$src1), s8_0ImmPred:$src2, s32_0ImmPred:$src3), - (C2_muxii PredRegs:$src1, s32_0ImmPred:$src3, s8_0ImmPred:$src2)>; - -// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) -// => r0 = C2_muxir(p0, r1, #i) -def: Pat<(select (not I1:$src1), s32_0ImmPred:$src2, - I32:$src3), - (C2_muxir PredRegs:$src1, IntRegs:$src3, s32_0ImmPred:$src2)>; - -// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) -// => r0 = C2_muxri (p0, #i, r1) -def: Pat<(select (not I1:$src1), IntRegs:$src2, s32_0ImmPred:$src3), - (C2_muxri PredRegs:$src1, s32_0ImmPred:$src3, IntRegs:$src2)>; - -// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump. -def: Pat<(brcond (not I1:$src1), bb:$offset), - (J2_jumpf PredRegs:$src1, bb:$offset)>; - -// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo). -def: Pat<(i64 (sext_inreg I64:$src1, i32)), - (A2_sxtw (LoReg DoubleRegs:$src1))>; - -// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)). -def: Pat<(i64 (sext_inreg I64:$src1, i16)), - (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>; - -// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)). -def: Pat<(i64 (sext_inreg I64:$src1, i8)), - (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>; - -def: Pat<(brcond (i1 (setne I32:$Rs, I32:$Rt)), bb:$offset), - (J2_jumpf (C2_cmpeq I32:$Rs, I32:$Rt), bb:$offset)>; -def: Pat<(brcond (i1 (setne I32:$Rs, s10_0ImmPred:$s10)), bb:$offset), - (J2_jumpf (C2_cmpeqi I32:$Rs, imm:$s10), bb:$offset)>; -def: Pat<(brcond (i1 (setne I1:$Pu, (i1 -1))), bb:$offset), - (J2_jumpf PredRegs:$Pu, bb:$offset)>; -def: Pat<(brcond (i1 (setne I1:$Pu, (i1 0))), bb:$offset), - (J2_jumpt PredRegs:$Pu, bb:$offset)>; - -// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1) -def: Pat<(brcond (i1 (setlt I32:$Rs, s8_0ImmPred:$s8)), bb:$offset), - (J2_jumpf (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s8)), bb:$offset)>; +// Patterns to select load reg indexed: Rs + Off with a value modifier. +// - frameindex [+ imm] +multiclass Loadxfim_pat { + def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), + (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>; + def: Pat<(VT (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))), + (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>; + def: Pat<(VT (Load AddrFI:$fi)), (VT (ValueMod (MI AddrFI:$fi, 0)))>; +} +// Patterns to select load reg indexed: Rs + Off with a value modifier. +// - base reg [+ imm] +multiclass Loadxgim_pat { + def: Pat<(VT (Load (add I32:$Rs, ImmPred:$Off))), + (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>; + def: Pat<(VT (Load (IsOrAdd I32:$Rs, ImmPred:$Off))), + (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>; + def: Pat<(VT (Load I32:$Rs)), (VT (ValueMod (MI IntRegs:$Rs, 0)))>; +} -// Map from a 64-bit select to an emulated 64-bit mux. -// Hexagon does not support 64-bit MUXes; so emulate with combines. -def: Pat<(select I1:$src1, I64:$src2, - I64:$src3), - (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2), - (HiReg DoubleRegs:$src3)), - (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2), - (LoReg DoubleRegs:$src3)))>; - -// Map from a 1-bit select to logical ops. -// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3). -def: Pat<(select I1:$src1, I1:$src2, I1:$src3), - (C2_or (C2_and PredRegs:$src1, PredRegs:$src2), - (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>; - -// Map for truncating from 64 immediates to 32 bit immediates. -def: Pat<(i32 (trunc I64:$src)), - (LoReg DoubleRegs:$src)>; - -// Map for truncating from i64 immediates to i1 bit immediates. -def: Pat<(i1 (trunc I64:$src)), - (C2_tfrrp (LoReg DoubleRegs:$src))>; - -// rs <= rt -> !(rs > rt). -let AddedComplexity = 30 in -def: Pat<(i1 (setle I32:$src1, s32_0ImmPred:$src2)), - (C2_not (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2))>; - -// rs <= rt -> !(rs > rt). -def : Pat<(i1 (setle I32:$src1, I32:$src2)), - (i1 (C2_not (C2_cmpgt I32:$src1, I32:$src2)))>; - -// Rss <= Rtt -> !(Rss > Rtt). -def: Pat<(i1 (setle I64:$src1, I64:$src2)), - (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>; - -// Map cmpne -> cmpeq. -// Hexagon_TODO: We should improve on this. -// rs != rt -> !(rs == rt). -let AddedComplexity = 30 in -def: Pat<(i1 (setne I32:$src1, s32_0ImmPred:$src2)), - (C2_not (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2))>; - -// Convert setne back to xor for hexagon since we compute w/ pred registers. -def: Pat<(i1 (setne I1:$src1, I1:$src2)), - (C2_xor PredRegs:$src1, PredRegs:$src2)>; - -// Map cmpne(Rss) -> !cmpew(Rss). -// rs != rt -> !(rs == rt). -def: Pat<(i1 (setne I64:$src1, I64:$src2)), - (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>; - -// rs >= rt -> rt <= rs -def: Pat<(i1 (setge I32:$Rs, I32:$Rt)), - (C4_cmplte I32:$Rt, I32:$Rs)>; +// Patterns to select load reg indexed: Rs + Off with a value modifier. +// Combines Loadxfim + Loadxgim. +multiclass Loadxim_pat { + defm: Loadxfim_pat; + defm: Loadxgim_pat; +} -let AddedComplexity = 30 in -def: Pat<(i1 (setge I32:$Rs, s32_0ImmPred:$s10)), - (C2_cmpgti IntRegs:$Rs, (SDEC1 imm:$s10))>; - -// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss). -// rss >= rtt -> !(rtt > rss). -def: Pat<(i1 (setge I64:$src1, I64:$src2)), - (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>; - -// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm). -// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1). -// rs < rt -> !(rs >= rt). -let AddedComplexity = 30 in -def: Pat<(i1 (setlt I32:$src1, s32_0ImmPred:$src2)), - (C2_not (C2_cmpgti IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2)))>; - -// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs) -def: Pat<(i1 (setuge I32:$src1, 0)), - (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>; - -// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1) -def: Pat<(i1 (setuge I32:$src1, u32_0ImmPred:$src2)), - (C2_cmpgtui IntRegs:$src1, (UDEC1 u32_0ImmPred:$src2))>; - -// Generate cmpgtu(Rs, #u9) -def: Pat<(i1 (setugt I32:$src1, u32_0ImmPred:$src2)), - (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>; - -// Map from Rs >= Rt -> !(Rt > Rs). -// rs >= rt -> !(rt > rs). -def: Pat<(i1 (setuge I64:$src1, I64:$src2)), - (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>; - -// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1). -// Map from (Rs <= Rt) -> !(Rs > Rt). -def: Pat<(i1 (setule I64:$src1, I64:$src2)), - (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>; - -// Sign extends. -// sext i1->i32 -def: Pat<(i32 (sext I1:$Pu)), - (C2_muxii I1:$Pu, -1, 0)>; - -// sext i1->i64 -def: Pat<(i64 (sext I1:$Pu)), - (A2_combinew (C2_muxii PredRegs:$Pu, -1, 0), - (C2_muxii PredRegs:$Pu, -1, 0))>; +// Patterns to select load reg reg-indexed: Rs + Rt< { + let AddedComplexity = 40 in + def: Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>; -// Zero extends. -// zext i1->i32 -def: Pat<(i32 (zext I1:$Pu)), - (C2_muxii PredRegs:$Pu, 1, 0)>; + let AddedComplexity = 20 in + def: Pat<(VT (Load (add I32:$Rs, I32:$Rt))), + (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>; +} -// zext i1->i64 -def: Pat<(i64 (zext I1:$Pu)), - (ToZext64 (C2_muxii PredRegs:$Pu, 1, 0))>; +// Patterns to select load reg reg-indexed: Rs + Rt< { + let AddedComplexity = 40 in + def: Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))), + (VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2)))>; -// zext i32->i64 -def: Pat<(Zext64 I32:$Rs), - (ToZext64 IntRegs:$Rs)>; + let AddedComplexity = 20 in + def: Pat<(VT (Load (add I32:$Rs, I32:$Rt))), + (VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, 0)))>; +} -// Map from Rs = Pd to Pd = mux(Pd, #1, #0) -def: Pat<(i32 (anyext I1:$Pu)), - (C2_muxii PredRegs:$Pu, 1, 0)>; +// Pattern to select load long-offset reg-indexed: Addr + Rt< + : Pat<(VT (Load (add (shl IntRegs:$Rt, u2_0ImmPred:$u2), ImmPred:$Addr))), + (VT (MI IntRegs:$Rt, imm:$u2, ImmPred:$Addr))>; -// Map from Rss = Pd to Rdd = combine(#0, (mux(Pd, #1, #0))) -def: Pat<(i64 (anyext I1:$Pu)), - (ToZext64 (C2_muxii PredRegs:$Pu, 1, 0))>; +class Loadxum_pat + : Pat<(VT (Load (add (shl IntRegs:$Rt, u2_0ImmPred:$u2), ImmPred:$Addr))), + (VT (ValueMod (MI IntRegs:$Rt, imm:$u2, ImmPred:$Addr)))>; -// Clear the sign bit in a 64-bit register. -def ClearSign : OutPatFrag<(ops node:$Rss), - (A2_combinew (S2_clrbit_i (HiReg $Rss), 31), (LoReg $Rss))>; +// Pattern to select load absolute. +class Loada_pat + : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>; -def MulHU : OutPatFrag<(ops node:$Rss, node:$Rtt), - (A2_addp - (M2_dpmpyuu_acc_s0 - (S2_lsr_i_p - (A2_addp - (M2_dpmpyuu_acc_s0 - (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)), 32), - (HiReg $Rss), - (LoReg $Rtt)), - (A2_combinew (A2_tfrsi 0), - (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt))))), - 32), - (HiReg $Rss), - (HiReg $Rtt)), - (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt)), 32))>; +// Pattern to select load absolute with value modifier. +class Loadam_pat + : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>; -// Multiply 64-bit unsigned and use upper result. -def : Pat <(mulhu I64:$Rss, I64:$Rtt), (MulHU $Rss, $Rtt)>; -// Multiply 64-bit signed and use upper result. -// -// For two signed 64-bit integers A and B, let A' and B' denote A and B -// with the sign bit cleared. Then A = -2^63*s(A) + A', where s(A) is the -// sign bit of A (and identically for B). With this notation, the signed -// product A*B can be written as: -// AB = (-2^63 s(A) + A') * (-2^63 s(B) + B') -// = 2^126 s(A)s(B) - 2^63 [s(A)B'+s(B)A'] + A'B' -// = 2^126 s(A)s(B) + 2^63 [s(A)B'+s(B)A'] + A'B' - 2*2^63 [s(A)B'+s(B)A'] -// = (unsigned product AB) - 2^64 [s(A)B'+s(B)A'] +let AddedComplexity = 20 in { + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + // No sextloadi1. -def : Pat <(mulhs I64:$Rss, I64:$Rtt), - (A2_subp - (MulHU $Rss, $Rtt), - (A2_addp - (A2_andp (S2_asr_i_p $Rss, 63), (ClearSign $Rtt)), - (A2_andp (S2_asr_i_p $Rtt, 63), (ClearSign $Rss))))>; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; + defm: Loadxi_pat; +} -// Hexagon specific ISD nodes. -def SDTHexagonALLOCA : SDTypeProfile<1, 2, - [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA, - [SDNPHasChain]>; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; +defm: Loadxim_pat; +let AddedComplexity = 60 in { + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + def: Loadxu_pat; + + def: Loadxum_pat; + def: Loadxum_pat; + def: Loadxum_pat; + def: Loadxum_pat; + def: Loadxum_pat; + def: Loadxum_pat; + def: Loadxum_pat; + def: Loadxum_pat; + def: Loadxum_pat; +} + +defm: Loadxr_pat; +defm: Loadxr_pat; +defm: Loadxr_pat; +defm: Loadxr_pat; +defm: Loadxr_pat; +defm: Loadxr_pat; +defm: Loadxr_pat; +defm: Loadxr_pat; +defm: Loadxr_pat; +defm: Loadxr_pat; + +defm: Loadxrm_pat; +defm: Loadxrm_pat; +defm: Loadxrm_pat; +defm: Loadxrm_pat; +defm: Loadxrm_pat; +defm: Loadxrm_pat; +defm: Loadxrm_pat; +defm: Loadxrm_pat; +defm: Loadxrm_pat; + +// Absolute address -def: Pat<(HexagonALLOCA I32:$Rs, (i32 imm:$A)), - (PS_alloca IntRegs:$Rs, imm:$A)>; +let AddedComplexity = 60 in { + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; +} -def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>; -def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>; +let AddedComplexity = 30 in { + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + + def: Loadam_pat; + def: Loadam_pat; +} + +// GP-relative address + +let AddedComplexity = 100 in { + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; + def: Loada_pat; +} + +let AddedComplexity = 70 in { + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + def: Loadam_pat; + + def: Loadam_pat; + def: Loadam_pat; +} -def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi imm:$dst)>; -def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi imm:$dst)>; -let AddedComplexity = 100 in -def: Pat<(add I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(sub I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(and I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(or I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +// Sign-extending loads of i1 need to replicate the lowest bit throughout +// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should +// do the trick. +let AddedComplexity = 20 in +def: Pat<(i32 (sextloadi1 I32:$Rs)), + (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>; -let AddedComplexity = 100 in -def: Pat<(add I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(sub I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(and I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(or I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +// Patterns for loads of i1: +def: Pat<(i1 (load AddrFI:$fi)), + (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>; +def: Pat<(i1 (load (add I32:$Rs, anyimm0:$Off))), + (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>; +def: Pat<(i1 (load I32:$Rs)), + (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>; -let AddedComplexity = 100 in -def: Pat<(add I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(sub I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(and I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(or I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -let AddedComplexity = 100 in -def: Pat<(xor I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; +// HVX loads -let AddedComplexity = 100 in -def: Pat<(add I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(sub I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(and I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(or I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -let AddedComplexity = 100 in -def: Pat<(xor I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +multiclass HvxLd_pat { + def: Pat<(VT (Load I32:$Rt)), (MI I32:$Rt, 0)>; + def: Pat<(VT (Load (add I32:$Rt, ImmPred:$s))), (MI I32:$Rt, imm:$s)>; +} -let AddedComplexity = 100 in -def: Pat<(add I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(sub I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(and I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -def: Pat<(or I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -let AddedComplexity = 100 in -def: Pat<(xor I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>; -let AddedComplexity = 100 in -def: Pat<(add I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(sub I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(and I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -def: Pat<(or I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; -let AddedComplexity = 100 in -def: Pat<(xor I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>; +let Predicates = [UseHVX] in { + multiclass HvxLdVs_pat { + defm: HvxLd_pat; + defm: HvxLd_pat; + defm: HvxLd_pat; + defm: HvxLd_pat; + } + defm: HvxLdVs_pat; + defm: HvxLdVs_pat; + defm: HvxLdVs_pat; + + multiclass HvxLdWs_pat { + defm: HvxLd_pat; + defm: HvxLd_pat; + defm: HvxLd_pat; + defm: HvxLd_pat; + } + defm: HvxLdWs_pat; + defm: HvxLdWs_pat; + defm: HvxLdWs_pat; +} -let AddedComplexity = 100 in -def: Pat<(add I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(sub I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(and I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(or I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -let AddedComplexity = 100 in -def: Pat<(add I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(sub I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(and I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(or I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(xor I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -let AddedComplexity = 100 in -def: Pat<(add I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(sub I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(and I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(or I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -let AddedComplexity = 100 in -def: Pat<(add I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(sub I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(and I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(or I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(xor I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; +// --(12) Store ---------------------------------------------------------- +// -let AddedComplexity = 100 in -def: Pat<(add I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(sub I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(and I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(or I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -let AddedComplexity = 100 in -def: Pat<(add I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(sub I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(and I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(or I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(xor I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -let AddedComplexity = 100 in -def: Pat<(add I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(sub I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(and I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(or I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>; -let AddedComplexity = 100 in -def: Pat<(add I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(sub I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(and I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(or I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(xor I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>; - -def: Pat<(sra I64:$src1, I32:$src2), (S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2)>; -def: Pat<(srl I64:$src1, I32:$src2), (S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2)>; -def: Pat<(shl I64:$src1, I32:$src2), (S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2)>; -def: Pat<(shl I64:$src1, I32:$src2), (S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2)>; - -def: Pat<(sra I32:$src1, I32:$src2), (S2_asr_r_r IntRegs:$src1, IntRegs:$src2)>; -def: Pat<(srl I32:$src1, I32:$src2), (S2_lsr_r_r IntRegs:$src1, IntRegs:$src2)>; -def: Pat<(shl I32:$src1, I32:$src2), (S2_asl_r_r IntRegs:$src1, IntRegs:$src2)>; -def: Pat<(shl I32:$src1, I32:$src2), (S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>; +class Storepi_pat + : Pat<(Store Value:$Rt, I32:$Rx, Offset:$s4), + (MI I32:$Rx, imm:$s4, Value:$Rt)>; -def SDTHexagonINSERT: - SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; -def SDTHexagonINSERTRP: - SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisInt<0>, SDTCisVT<3, i64>]>; +def: Storepi_pat; +def: Storepi_pat; +def: Storepi_pat; +def: Storepi_pat; -def HexagonINSERT : SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>; -def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>; +// Patterns for generating stores, where the address takes different forms: +// - frameindex, +// - frameindex + offset, +// - base + offset, +// - simple (base address without offset). +// These would usually be used together (via Storexi_pat defined below), but +// in some cases one may want to apply different properties (such as +// AddedComplexity) to the individual patterns. +class Storexi_fi_pat + : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>; -def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2), - (S2_insert I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2)>; -def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2), - (S2_insertp I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2)>; -def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru), - (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>; -def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru), - (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>; +multiclass Storexi_fi_add_pat { + def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; + def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; +} -let AddedComplexity = 100 in -def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))), - (i32 (extloadi8 (add I32:$b, 3))), - 24, 8), - (i32 16)), - (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), - (zextloadi8 I32:$b)), - (A2_swiz (L2_loadri_io I32:$b, 0))>; +multiclass Storexi_add_pat { + def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; + def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; +} -def SDTHexagonEXTRACTU: - SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, - SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; -def SDTHexagonEXTRACTURP: - SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, - SDTCisVT<2, i64>]>; - -def HexagonEXTRACTU : SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>; -def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>; - -def: Pat<(HexagonEXTRACTU I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3), - (S2_extractu I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>; -def: Pat<(HexagonEXTRACTU I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3), - (S2_extractup I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>; -def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2), - (S2_extractu_rp I32:$src1, I64:$src2)>; -def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2), - (S2_extractup_rp I64:$src1, I64:$src2)>; +class Storexi_base_pat + : Pat<(Store Value:$Rt, I32:$Rs), + (MI IntRegs:$Rs, 0, Value:$Rt)>; -def n8_0ImmPred: PatLeaf<(i32 imm), [{ - int64_t V = N->getSExtValue(); - return -255 <= V && V <= 0; -}]>; +// Patterns for generating stores, where the address takes different forms, +// and where the value being stored is transformed through the value modifier +// ValueMod. The address forms are same as above. +class Storexim_fi_pat + : Pat<(Store Value:$Rs, AddrFI:$fi), + (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>; -// Change the sign of the immediate for Rd=-mpyi(Rs,#u8) -def: Pat<(mul I32:$src1, (ineg n8_0ImmPred:$src2)), - (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>; +multiclass Storexim_fi_add_pat { + def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; + def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)), + (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; +} -multiclass MinMax_pats_p { - defm: T_MinMax_pats; +multiclass Storexim_add_pat { + def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; + def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)), + (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; } -def: Pat<(add Sext64:$Rs, I64:$Rt), - (A2_addsp (LoReg Sext64:$Rs), DoubleRegs:$Rt)>; +class Storexim_base_pat + : Pat<(Store Value:$Rt, I32:$Rs), + (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>; -let AddedComplexity = 200 in { - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; - defm: MinMax_pats_p; +multiclass Storexi_pat { + defm: Storexi_fi_add_pat ; + def: Storexi_fi_pat ; + defm: Storexi_add_pat ; } -def callv3 : SDNode<"HexagonISD::CALL", SDT_SPCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +multiclass Storexim_pat { + defm: Storexim_fi_add_pat ; + def: Storexim_fi_pat ; + defm: Storexim_add_pat ; +} -def callv3nr : SDNode<"HexagonISD::CALLnr", SDT_SPCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +// Reg< + : Pat<(Store Value:$Rt, (add (shl I32:$Ru, u2_0ImmPred:$u2), ImmPred:$A)), + (MI IntRegs:$Ru, imm:$u2, ImmPred:$A, Value:$Rt)>; +// Reg< + : Pat<(Store Value:$Ru, (add I32:$Rs, (shl I32:$Rt, u2_0ImmPred:$u2))), + (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>; -// Map call instruction -def : Pat<(callv3 I32:$dst), - (J2_callr I32:$dst)>; -def : Pat<(callv3 tglobaladdr:$dst), - (J2_call tglobaladdr:$dst)>; -def : Pat<(callv3 texternalsym:$dst), - (J2_call texternalsym:$dst)>; -def : Pat<(callv3 tglobaltlsaddr:$dst), - (J2_call tglobaltlsaddr:$dst)>; +// Reg + Reg +class Storexr_add_pat + : Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)), + (MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>; -def : Pat<(callv3nr I32:$dst), - (PS_callr_nr I32:$dst)>; -def : Pat<(callv3nr tglobaladdr:$dst), - (PS_call_nr tglobaladdr:$dst)>; -def : Pat<(callv3nr texternalsym:$dst), - (PS_call_nr texternalsym:$dst)>; +class Storea_pat + : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>; +class Stoream_pat + : Pat<(Store Value:$val, Addr:$addr), + (MI Addr:$addr, (ValueMod Value:$val))>; -def addrga: PatLeaf<(i32 AddrGA:$Addr)>; -def addrgp: PatLeaf<(i32 AddrGP:$Addr)>; +// Regular stores in the DAG have two operands: value and address. +// Atomic stores also have two, but they are reversed: address, value. +// To use atomic stores with the patterns, they need to have their operands +// swapped. This relies on the knowledge that the F.Fragment uses names +// "ptr" and "val". +class SwapSt + : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode, + F.OperandTransform>; +def IMM_BYTE : SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); +}]>; -// Pats for instruction selection. +def IMM_HALF : SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); +}]>; -// A class to embed the usual comparison patfrags within a zext to i32. -// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same -// names, or else the frag's "body" won't match the operands. -class CmpInReg - : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>; +def IMM_WORD : SDNodeXFormgetSExtValue(); + return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); +}]>; -def: T_cmp32_rr_pat, i32>; -def: T_cmp32_rr_pat, i32>; - -def: T_cmp32_rr_pat; -def: T_cmp32_rr_pat; -def: T_cmp32_rr_pat; - -def: T_cmp32_rr_pat, i1>; -def: T_cmp32_rr_pat, i1>; - -let AddedComplexity = 100 in { - def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), - 255), 0)), - (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>; - def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), - 255), 0)), - (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>; - def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), - 65535), 0)), - (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>; - def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), - 65535), 0)), - (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>; -} - -def: Pat<(i32 (zext (i1 (seteq I32:$Rs, s32_0ImmPred:$s8)))), - (A4_rcmpeqi IntRegs:$Rs, s32_0ImmPred:$s8)>; -def: Pat<(i32 (zext (i1 (setne I32:$Rs, s32_0ImmPred:$s8)))), - (A4_rcmpneqi IntRegs:$Rs, s32_0ImmPred:$s8)>; - -// Preserve the S2_tstbit_r generation -def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, I32:$src2)), - I32:$src1)), 0)))), - (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>; - -// The complexity of the combines involving immediates should be greater -// than the complexity of the combine with two registers. -let AddedComplexity = 50 in { -def: Pat<(HexagonCOMBINE IntRegs:$r, s32_0ImmPred:$i), - (A4_combineri IntRegs:$r, s32_0ImmPred:$i)>; - -def: Pat<(HexagonCOMBINE s32_0ImmPred:$i, IntRegs:$r), - (A4_combineir s32_0ImmPred:$i, IntRegs:$r)>; -} - -// The complexity of the combine with two immediates should be greater than -// the complexity of a combine involving a register. -let AddedComplexity = 75 in { -def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8, u32_0ImmPred:$u6), - (A4_combineii imm:$s8, imm:$u6)>; -def: Pat<(HexagonCOMBINE s32_0ImmPred:$s8, s8_0ImmPred:$S8), - (A2_combineii imm:$s8, imm:$S8)>; -} - - -// Patterns to generate indexed loads with different forms of the address: -// - frameindex, -// - base + offset, -// - base (without offset). -multiclass Loadxm_pat { - def: Pat<(VT (Load AddrFI:$fi)), - (VT (ValueMod (MI AddrFI:$fi, 0)))>; - def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))), - (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>; - def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))), - (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>; - def: Pat<(VT (Load I32:$Rs)), - (VT (ValueMod (MI IntRegs:$Rs, 0)))>; -} - -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; -defm: Loadxm_pat; - -// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs). -def: Pat<(Aext64 I32:$src1), (ToZext64 IntRegs:$src1)>; - -multiclass T_LoadAbsReg_Pat { - def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2), - (HexagonCONST32 tglobaladdr:$src3)))), - (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3)>; - def : Pat <(VT (ldOp (add IntRegs:$src1, - (HexagonCONST32 tglobaladdr:$src2)))), - (MI IntRegs:$src1, 0, tglobaladdr:$src2)>; - - def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2), - (HexagonCONST32 tconstpool:$src3)))), - (MI IntRegs:$src1, u2_0ImmPred:$src2, tconstpool:$src3)>; - def : Pat <(VT (ldOp (add IntRegs:$src1, - (HexagonCONST32 tconstpool:$src2)))), - (MI IntRegs:$src1, 0, tconstpool:$src2)>; - - def : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2), - (HexagonCONST32 tjumptable:$src3)))), - (MI IntRegs:$src1, u2_0ImmPred:$src2, tjumptable:$src3)>; - def : Pat <(VT (ldOp (add IntRegs:$src1, - (HexagonCONST32 tjumptable:$src2)))), - (MI IntRegs:$src1, 0, tjumptable:$src2)>; -} - -let AddedComplexity = 60 in { -defm : T_LoadAbsReg_Pat ; -defm : T_LoadAbsReg_Pat ; -defm : T_LoadAbsReg_Pat ; - -defm : T_LoadAbsReg_Pat ; -defm : T_LoadAbsReg_Pat ; -defm : T_LoadAbsReg_Pat ; +def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>; +def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>; +def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>; -defm : T_LoadAbsReg_Pat ; -defm : T_LoadAbsReg_Pat ; -} +// Even though the offset is not extendable in the store-immediate, we +// can still generate the fi# in the base address. If the final offset +// is not valid for the instruction, we will replace it with a scratch +// register. +class SmallStackStore + : PatFrag<(ops node:$Val, node:$Addr), (Store node:$Val, node:$Addr), [{ + return isSmallStackStore(cast(N)); +}]>; -// 'def pats' for load instructions with base + register offset and non-zero -// immediate value. Immediate value is used to left-shift the second -// register operand. -class Loadxs_pat - : Pat<(VT (Load (add I32:$Rs, - (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))), - (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>; +// This is the complement of SmallStackStore. +class LargeStackStore + : PatFrag<(ops node:$Val, node:$Addr), (Store node:$Val, node:$Addr), [{ + return !isSmallStackStore(cast(N)); +}]>; -let AddedComplexity = 40 in { - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; - def: Loadxs_pat; -} - -// 'def pats' for load instruction base + register offset and -// zero immediate value. -class Loadxs_simple_pat - : Pat<(VT (Load (add I32:$Rs, I32:$Rt))), - (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>; +// Preferred addressing modes for various combinations of stored value +// and address computation. +// For stores where the address and value are both immediates, prefer +// store-immediate. The reason is that the constant-extender optimization +// can replace store-immediate with a store-register, but there is nothing +// to generate a store-immediate out of a store-register. +// +// C R F F+C R+C R+R R<; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; -} - -let AddedComplexity = 40 in -multiclass T_StoreAbsReg_Pats { - def : Pat<(stOp (VT RC:$src4), - (add (shl I32:$src1, u2_0ImmPred:$src2), - u32_0ImmPred:$src3)), - (MI IntRegs:$src1, u2_0ImmPred:$src2, u32_0ImmPred:$src3, RC:$src4)>; - - def : Pat<(stOp (VT RC:$src4), - (add (shl IntRegs:$src1, u2_0ImmPred:$src2), - (HexagonCONST32 tglobaladdr:$src3))), - (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>; - - def : Pat<(stOp (VT RC:$src4), - (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))), - (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>; -} - -defm : T_StoreAbsReg_Pats ; -defm : T_StoreAbsReg_Pats ; -defm : T_StoreAbsReg_Pats ; -defm : T_StoreAbsReg_Pats ; - -class Storexs_pat - : Pat<(Store Value:$Ru, (add I32:$Rs, - (i32 (shl I32:$Rt, u2_0ImmPred:$u2)))), - (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>; -let AddedComplexity = 40 in { - def: Storexs_pat; - def: Storexs_pat; - def: Storexs_pat; - def: Storexs_pat; -} +// First, match the unusual case of doubleword store into Reg+Imm4, i.e. +// a store where the offset Imm4 is a multiple of 4, but not of 8. This +// implies that Reg is also a proper multiple of 4. To still generate a +// doubleword store, add 4 to Reg, and subtract 4 from the offset. def s30_2ProperPred : PatLeaf<(i32 imm), [{ int64_t v = (int64_t)N->getSExtValue(); @@ -1456,301 +2139,273 @@ def RoundTo8 : SDNodeXFormgetTargetConstant(Imm & -8, SDLoc(N), MVT::i32); }]>; -let AddedComplexity = 40 in +let AddedComplexity = 150 in def: Pat<(store I64:$Ru, (add I32:$Rs, s30_2ProperPred:$Off)), (S2_storerd_io (A2_addi I32:$Rs, 4), (RoundTo8 $Off), I64:$Ru)>; -class Store_rr_pat - : Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)), - (MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>; - -let AddedComplexity = 20 in { - def: Store_rr_pat; - def: Store_rr_pat; - def: Store_rr_pat; - def: Store_rr_pat; -} - - -def IMM_BYTE : SDNodeXFormgetSExtValue(); - return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); -}]>; - -def IMM_HALF : SDNodeXFormgetSExtValue(); - return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); -}]>; - -def IMM_WORD : SDNodeXFormgetSExtValue(); - return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); -}]>; - -def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>; -def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>; -def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>; - -// Emit store-immediate, but only when the stored value will not be constant- -// extended. The reason for that is that there is no pass that can optimize -// constant extenders in store-immediate instructions. In some cases we can -// end up will a number of such stores, all of which store the same extended -// value (e.g. after unrolling a loop that initializes floating point array). - -// Predicates to determine if the 16-bit immediate is expressible as a sign- -// extended 8-bit immediate. Store-immediate-halfword will ignore any bits -// beyond 0..15, so we don't care what is in there. - -def i16in8ImmPred: PatLeaf<(i32 imm), [{ - int64_t v = (int16_t)N->getSExtValue(); - return v == (int64_t)(int8_t)v; -}]>; - -// Predicates to determine if the 32-bit immediate is expressible as a sign- -// extended 8-bit immediate. -def i32in8ImmPred: PatLeaf<(i32 imm), [{ - int64_t v = (int32_t)N->getSExtValue(); - return v == (int64_t)(int8_t)v; -}]>; - -class SmallStackStore - : PatFrag<(ops node:$Val, node:$Addr), (Store node:$Val, node:$Addr), [{ - return isSmallStackStore(cast(N)); -}]>; - -let AddedComplexity = 40 in { - // Even though the offset is not extendable in the store-immediate, we - // can still generate the fi# in the base address. If the final offset - // is not valid for the instruction, we will replace it with a scratch - // register. - def: Storexm_fi_pat , s32_0ImmPred, - ToImmByte, S4_storeirb_io>; - def: Storexm_fi_pat , i16in8ImmPred, - ToImmHalf, S4_storeirh_io>; - def: Storexm_fi_pat , i32in8ImmPred, - ToImmWord, S4_storeiri_io>; - -// defm: Storexm_fi_add_pat ; -// defm: Storexm_fi_add_pat ; -// defm: Storexm_fi_add_pat ; - - defm: Storexm_add_pat; - defm: Storexm_add_pat; - defm: Storexm_add_pat; -} - -def: Storexm_simple_pat; -def: Storexm_simple_pat; -def: Storexm_simple_pat; - -// op(Ps, op(Pt, Pu)) -class LogLog_pat - : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))), - (MI I1:$Ps, I1:$Pt, I1:$Pu)>; - -// op(Ps, op(Pt, ~Pu)) -class LogLogNot_pat - : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))), - (MI I1:$Ps, I1:$Pt, I1:$Pu)>; - -def: LogLog_pat; -def: LogLog_pat; -def: LogLog_pat; -def: LogLog_pat; - -def: LogLogNot_pat; -def: LogLogNot_pat; -def: LogLogNot_pat; -def: LogLogNot_pat; - -//===----------------------------------------------------------------------===// -// PIC: Support for PIC compilations. The patterns and SD nodes defined -// below are needed to support code generation for PIC -//===----------------------------------------------------------------------===// - -def SDT_HexagonAtGot - : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; -def SDT_HexagonAtPcrel - : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; - -// AT_GOT address-of-GOT, address-of-global, offset-in-global -def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>; -// AT_PCREL address-of-global -def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>; - -def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)), - (L2_loadri_io I32:$got, imm:$addr)>; -def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off), - (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>; -def: Pat<(HexagonAtPcrel I32:$addr), - (C4_addipc imm:$addr)>; - -def: Pat<(i64 (and I64:$Rs, (i64 (not I64:$Rt)))), - (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>; -def: Pat<(i64 (or I64:$Rs, (i64 (not I64:$Rt)))), - (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>; - -def: Pat<(add I32:$Rs, (add I32:$Ru, s32_0ImmPred:$s6)), - (S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>; - -// Rd=add(Rs,sub(#s6,Ru)) -def: Pat<(add I32:$src1, (sub s32_0ImmPred:$src2, - I32:$src3)), - (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>; +class Storexi_abs_pat + : Pat<(Store Value:$val, anyimm:$addr), + (MI (ToI32 $addr), 0, Value:$val)>; +class Storexim_abs_pat + : Pat<(Store Value:$val, anyimm:$addr), + (MI (ToI32 $addr), 0, (ValueMod Value:$val))>; -// Rd=sub(add(Rs,#s6),Ru) -def: Pat<(sub (add I32:$src1, s32_0ImmPred:$src2), - I32:$src3), - (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>; +let AddedComplexity = 140 in { + def: Storexim_abs_pat; + def: Storexim_abs_pat; + def: Storexim_abs_pat; -// Rd=add(sub(Rs,Ru),#s6) -def: Pat<(add (sub I32:$src1, I32:$src3), - (s32_0ImmPred:$src2)), - (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>; + def: Storexi_abs_pat; + def: Storexi_abs_pat; + def: Storexi_abs_pat; +} -def: Pat<(xor I64:$dst2, - (xor I64:$Rss, I64:$Rtt)), - (M4_xor_xacc DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt)>; -def: Pat<(or I32:$Ru, (and (i32 IntRegs:$_src_), s32_0ImmPred:$s10)), - (S4_or_andix IntRegs:$Ru, IntRegs:$_src_, imm:$s10)>; +// GP-relative address +let AddedComplexity = 120 in { + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat, I32, addrgp, S2_storerbgp>; + def: Storea_pat, I32, addrgp, S2_storerhgp>; + def: Storea_pat, I32, addrgp, S2_storerigp>; + def: Storea_pat, I64, addrgp, S2_storerdgp>; + + def: Stoream_pat; + def: Stoream_pat; + def: Stoream_pat; + def: Stoream_pat; +} + +// Absolute address +let AddedComplexity = 110 in { + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat; + def: Storea_pat, I32, anyimm0, PS_storerbabs>; + def: Storea_pat, I32, anyimm1, PS_storerhabs>; + def: Storea_pat, I32, anyimm2, PS_storeriabs>; + def: Storea_pat, I64, anyimm3, PS_storerdabs>; + + def: Stoream_pat; + def: Stoream_pat; + def: Stoream_pat; + def: Stoream_pat; +} + +// Reg<; + def: Storexu_shl_pat; + def: Storexu_shl_pat; + def: Storexu_shl_pat; + def: Storexu_shl_pat; + def: Storexu_shl_pat; -def: Pat<(or I32:$src1, (and I32:$Rs, s32_0ImmPred:$s10)), - (S4_or_andi IntRegs:$src1, IntRegs:$Rs, imm:$s10)>; + def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), anyimm:$A)), + (S4_storerb_ur IntRegs:$Rs, imm:$u2, imm:$A, (I1toI32 I1:$Pu))>; +} -def: Pat<(or I32:$src1, (or I32:$Rs, s32_0ImmPred:$s10)), - (S4_or_ori IntRegs:$src1, IntRegs:$Rs, imm:$s10)>; +// Reg<; + def: Storexr_shl_pat; + def: Storexr_shl_pat; + def: Storexr_shl_pat; + def: Storexr_shl_pat; + def: Storexr_shl_pat; + def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), I32:$Rt)), + (S4_storerb_ur IntRegs:$Rt, IntRegs:$Rs, imm:$u2, (I1toI32 I1:$Pu))>; +} +class SS_ : SmallStackStore; +class LS_ : LargeStackStore; -// Count trailing zeros: 64-bit. -def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>; +multiclass IMFA_ { + defm: Storexim_fi_add_pat; +} +multiclass IFA_ { + defm: Storexi_fi_add_pat; +} -// Count trailing ones: 64-bit. -def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; +// Fi+Imm, store-immediate +let AddedComplexity = 80 in { + defm: IMFA_, anyint, u6_0ImmPred, ToImmByte, S4_storeirb_io>; + defm: IMFA_, anyint, u6_1ImmPred, ToImmHalf, S4_storeirh_io>; + defm: IMFA_, anyint, u6_2ImmPred, ToImmWord, S4_storeiri_io>; -// Define leading/trailing patterns that require zero-extensions to 64 bits. -def: Pat<(i64 (ctlz I64:$Rss)), (ToZext64 (S2_cl0p I64:$Rss))>; -def: Pat<(i64 (cttz I64:$Rss)), (ToZext64 (S2_ct0p I64:$Rss))>; -def: Pat<(i64 (ctlz (not I64:$Rss))), (ToZext64 (S2_cl1p I64:$Rss))>; -def: Pat<(i64 (cttz (not I64:$Rss))), (ToZext64 (S2_ct1p I64:$Rss))>; + defm: IFA_, anyimm, u6_0ImmPred, S4_storeirb_io>; + defm: IFA_, anyimm, u6_1ImmPred, S4_storeirh_io>; + defm: IFA_, anyimm, u6_2ImmPred, S4_storeiri_io>; -def: Pat<(i64 (ctpop I64:$Rss)), (ToZext64 (S5_popcountp I64:$Rss))>; -def: Pat<(i32 (ctpop I32:$Rs)), (S5_popcountp (A4_combineir 0, I32:$Rs))>; + // For large-stack stores, generate store-register (prefer explicit Fi + // in the address). + defm: IMFA_, anyimm, u6_0ImmPred, ToI32, S2_storerb_io>; + defm: IMFA_, anyimm, u6_1ImmPred, ToI32, S2_storerh_io>; + defm: IMFA_, anyimm, u6_2ImmPred, ToI32, S2_storeri_io>; +} -def: Pat<(bitreverse I32:$Rs), (S2_brev I32:$Rs)>; -def: Pat<(bitreverse I64:$Rss), (S2_brevp I64:$Rss)>; +// Fi, store-immediate +let AddedComplexity = 70 in { + def: Storexim_fi_pat, anyint, ToImmByte, S4_storeirb_io>; + def: Storexim_fi_pat, anyint, ToImmHalf, S4_storeirh_io>; + def: Storexim_fi_pat, anyint, ToImmWord, S4_storeiri_io>; -def: Pat<(bswap I32:$Rs), (A2_swiz I32:$Rs)>; -def: Pat<(bswap I64:$Rss), (A2_combinew (A2_swiz (LoReg $Rss)), - (A2_swiz (HiReg $Rss)))>; + def: Storexi_fi_pat, anyimm, S4_storeirb_io>; + def: Storexi_fi_pat, anyimm, S4_storeirh_io>; + def: Storexi_fi_pat, anyimm, S4_storeiri_io>; -let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. - def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), - (S4_ntstbit_i I32:$Rs, u5_0ImmPred:$u5)>; - def: Pat<(i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)), - (S4_ntstbit_r I32:$Rs, I32:$Rt)>; + // For large-stack stores, generate store-register (prefer explicit Fi + // in the address). + def: Storexim_fi_pat, anyimm, ToI32, S2_storerb_io>; + def: Storexim_fi_pat, anyimm, ToI32, S2_storerh_io>; + def: Storexim_fi_pat, anyimm, ToI32, S2_storeri_io>; } -// Add extra complexity to prefer these instructions over bitsset/bitsclr. -// The reason is that tstbit/ntstbit can be folded into a compound instruction: -// if ([!]tstbit(...)) jump ... -let AddedComplexity = 100 in -def: Pat<(i1 (setne (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))), - (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>; +// Fi+Imm, Fi, store-register +let AddedComplexity = 60 in { + defm: Storexi_fi_add_pat; + defm: Storexi_fi_add_pat; + defm: Storexi_fi_add_pat; + defm: Storexi_fi_add_pat; + defm: Storexi_fi_add_pat; + defm: Storexi_fi_add_pat; + defm: Storexim_fi_add_pat; -let AddedComplexity = 100 in -def: Pat<(i1 (seteq (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))), - (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>; + def: Storexi_fi_pat; + def: Storexi_fi_pat; + def: Storexi_fi_pat; + def: Storexi_fi_pat; + def: Storexi_fi_pat; + def: Storexi_fi_pat; + def: Storexim_fi_pat; +} -// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be -// represented as a compare against "value & 0xFF", which is an exact match -// for cmpb (same for cmph). The patterns below do not contain any additional -// complexity that would make them preferable, and if they were actually used -// instead of cmpb/cmph, they would result in a compare against register that -// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF). -def: Pat<(i1 (setne (and I32:$Rs, u6_0ImmPred:$u6), 0)), - (C4_nbitsclri I32:$Rs, u6_0ImmPred:$u6)>; -def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)), - (C4_nbitsclr I32:$Rs, I32:$Rt)>; -def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), - (C4_nbitsset I32:$Rs, I32:$Rt)>; +multiclass IMRA_ { + defm: Storexim_add_pat; +} +multiclass IRA_ { + defm: Storexi_add_pat; +} -def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), u32_0ImmPred:$u6), - (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>; -def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), - (HexagonCONST32 tglobaladdr:$global)), - (M4_mpyri_addi tglobaladdr:$global, IntRegs:$Rs, imm:$U6)>; -def: Pat<(add (mul I32:$Rs, I32:$Rt), u32_0ImmPred:$u6), - (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(add (mul I32:$Rs, I32:$Rt), - (HexagonCONST32 tglobaladdr:$global)), - (M4_mpyrr_addi tglobaladdr:$global, IntRegs:$Rs, IntRegs:$Rt)>; -def: Pat<(add I32:$src1, (mul I32:$src3, u6_2ImmPred:$src2)), - (M4_mpyri_addr_u2 IntRegs:$src1, imm:$src2, IntRegs:$src3)>; -def: Pat<(add I32:$src1, (mul I32:$src3, u32_0ImmPred:$src2)), - (M4_mpyri_addr IntRegs:$src1, IntRegs:$src3, imm:$src2)>; +// Reg+Imm, store-immediate +let AddedComplexity = 50 in { + defm: IMRA_; + defm: IMRA_; + defm: IMRA_; -def: Pat<(add I32:$Ru, (mul (i32 IntRegs:$_src_), I32:$Rs)), - (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs)>; + defm: IRA_; + defm: IRA_; + defm: IRA_; +} -def: T_vcmp_pat; +// Reg+Imm, store-register +let AddedComplexity = 40 in { + defm: Storexi_pat; + defm: Storexi_pat; + defm: Storexi_pat; + defm: Storexi_pat; + defm: Storexi_pat; + defm: Storexi_pat; -class T_Shift_CommOp_pat - : Pat<(Op (ShOp IntRegs:$Rx, u5_0ImmPred:$U5), u32_0ImmPred:$u8), - (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>; + defm: Storexim_pat; + defm: Storexim_pat; + defm: Storexim_pat; + defm: Storexim_pat; -let AddedComplexity = 200 in { - def : T_Shift_CommOp_pat ; - def : T_Shift_CommOp_pat ; - def : T_Shift_CommOp_pat ; - def : T_Shift_CommOp_pat ; + defm: Storexi_pat, I32, anyimm0, S2_storerb_io>; + defm: Storexi_pat, I32, anyimm1, S2_storerh_io>; + defm: Storexi_pat, I32, anyimm2, S2_storeri_io>; + defm: Storexi_pat, I64, anyimm3, S2_storerd_io>; } +// Reg+Reg let AddedComplexity = 30 in { - def : T_Shift_CommOp_pat ; - def : T_Shift_CommOp_pat ; -} - -class T_Shift_Op_pat - : Pat<(Op u32_0ImmPred:$u8, (ShOp IntRegs:$Rx, u5_0ImmPred:$U5)), - (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>; - -def : T_Shift_Op_pat ; -def : T_Shift_Op_pat ; + def: Storexr_add_pat; + def: Storexr_add_pat; + def: Storexr_add_pat; + def: Storexr_add_pat; + def: Storexr_add_pat; + def: Storexr_add_pat; -let AddedComplexity = 200 in { - def: Pat<(add addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)), - (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; - def: Pat<(add addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)), - (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; - def: Pat<(sub addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)), - (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; - def: Pat<(sub addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)), - (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>; + def: Pat<(store I1:$Pu, (add I32:$Rs, I32:$Rt)), + (S4_storerb_rr IntRegs:$Rs, IntRegs:$Rt, 0, (I1toI32 I1:$Pu))>; } -def: Pat<(shl s6_0ImmPred:$s6, I32:$Rt), - (S4_lsli imm:$s6, IntRegs:$Rt)>; +// Reg, store-immediate +let AddedComplexity = 20 in { + def: Storexim_base_pat; + def: Storexim_base_pat; + def: Storexim_base_pat; + + def: Storexi_base_pat; + def: Storexi_base_pat; + def: Storexi_base_pat; +} + +// Reg, store-register +let AddedComplexity = 10 in { + def: Storexi_base_pat; + def: Storexi_base_pat; + def: Storexi_base_pat; + def: Storexi_base_pat; + def: Storexi_base_pat; + def: Storexi_base_pat; + + def: Storexim_base_pat; + def: Storexim_base_pat; + def: Storexim_base_pat; + def: Storexim_base_pat; + + def: Storexi_base_pat, I32, S2_storerb_io>; + def: Storexi_base_pat, I32, S2_storerh_io>; + def: Storexi_base_pat, I32, S2_storeri_io>; + def: Storexi_base_pat, I64, S2_storerd_io>; +} + +// HVX stores + +multiclass HvxSt_pat { + def: Pat<(Store Value:$Vs, I32:$Rt), + (MI I32:$Rt, 0, Value:$Vs)>; + def: Pat<(Store Value:$Vs, (add I32:$Rt, ImmPred:$s)), + (MI I32:$Rt, imm:$s, Value:$Vs)>; +} + +let Predicates = [UseHVX] in { + multiclass HvxStVs_pat { + defm: HvxSt_pat; + defm: HvxSt_pat; + defm: HvxSt_pat; + defm: HvxSt_pat; + } + defm: HvxStVs_pat; + defm: HvxStVs_pat; + defm: HvxStVs_pat; + + multiclass HvxStWs_pat { + defm: HvxSt_pat; + defm: HvxSt_pat; + defm: HvxSt_pat; + defm: HvxSt_pat; + } + defm: HvxStWs_pat; + defm: HvxStWs_pat; + defm: HvxStWs_pat; +} -//===----------------------------------------------------------------------===// -// MEMOP -//===----------------------------------------------------------------------===// +// --(13) Memop ---------------------------------------------------------- +// def m5_0Imm8Pred : PatLeaf<(i32 imm), [{ int8_t V = N->getSExtValue(); @@ -1797,25 +2452,10 @@ def LogN2_16 : SDNodeXFormgetTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32); }]>; -def NegImm8 : SDNodeXFormgetSExtValue(); - return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); -}]>; - -def NegImm16 : SDNodeXFormgetSExtValue(); - return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); -}]>; - -def NegImm32 : SDNodeXFormgetSExtValue(); - return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); -}]>; - def IdImm : SDNodeXForm; -multiclass Memopxr_simple_pat { +multiclass Memopxr_base_pat { // Addr: i32 def: Pat<(Store (Oper (Load I32:$Rs), I32:$A), I32:$Rs), (MI I32:$Rs, 0, I32:$A)>; @@ -1844,11 +2484,11 @@ multiclass Memopxr_add_pat { - defm: Memopxr_simple_pat ; - defm: Memopxr_add_pat ; + defm: Memopxr_base_pat ; + defm: Memopxr_add_pat ; } -let AddedComplexity = 180 in { +let AddedComplexity = 200 in { // add reg defm: Memopxr_pat; @@ -1911,9 +2551,8 @@ let AddedComplexity = 180 in { } -multiclass Memopxi_simple_pat { +multiclass Memopxi_base_pat { // Addr: i32 def: Pat<(Store (Oper (Load I32:$Rs), Arg:$A), I32:$Rs), (MI I32:$Rs, 0, (ArgMod Arg:$A))>; @@ -1944,12 +2583,11 @@ multiclass Memopxi_add_pat { - defm: Memopxi_simple_pat ; - defm: Memopxi_add_pat ; + defm: Memopxi_base_pat ; + defm: Memopxi_add_pat ; } - -let AddedComplexity = 200 in { +let AddedComplexity = 220 in { // add imm defm: Memopxi_pat; @@ -2043,1244 +2681,152 @@ let AddedComplexity = 200 in { Log2_32, L4_ior_memopw_io>; } -def : T_CMP_pat ; -def : T_CMP_pat ; -def : T_CMP_pat ; - -// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1). -def: Pat<(i1 (setlt I32:$src1, s32_0ImmPred:$src2)), - (C4_cmpltei IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2))>; -// rs != rt -> !(rs == rt). -def: Pat<(i1 (setne I32:$src1, s32_0ImmPred:$src2)), - (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>; +// --(14) PIC ------------------------------------------------------------ +// -// For the sequence -// zext( setult ( and(Rs, 255), u8)) -// Use the isdigit transformation below +def SDT_HexagonAtGot + : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; +def SDT_HexagonAtPcrel + : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +// AT_GOT address-of-GOT, address-of-global, offset-in-global +def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>; +// AT_PCREL address-of-global +def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>; -def u7_0PosImmPred : ImmLeaf 0 && isUInt<7>(Imm); -}]>; +def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)), + (L2_loadri_io I32:$got, imm:$addr)>; +def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off), + (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>; +def: Pat<(HexagonAtPcrel I32:$addr), + (C4_addipc imm:$addr)>; -// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)' -// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;. -// The isdigit transformation relies on two 'clever' aspects: -// 1) The data type is unsigned which allows us to eliminate a zero test after -// biasing the expression by 48. We are depending on the representation of -// the unsigned types, and semantics. -// 2) The front end has converted <= 9 into < 10 on entry to LLVM +// --(15) Call ----------------------------------------------------------- // -// For the C code: -// retval = ((c>='0') & (c<='9')) ? 1 : 0; -// The code is transformed upstream of llvm into -// retval = (c-48) < 10 ? 1 : 0; -let AddedComplexity = 139 in -def: Pat<(i32 (zext (i1 (setult (and I32:$src1, 255), u7_0PosImmPred:$src2)))), - (C2_muxii (A4_cmpbgtui IntRegs:$src1, (UDEC1 imm:$src2)), 0, 1)>; - -class Loada_pat - : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>; +// Pseudo instructions. +def SDT_SPCallSeqStart + : SDCallSeqStart<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def SDT_SPCallSeqEnd + : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -class Loadam_pat - : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>; +def callseq_start: SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +def callseq_end: SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -class Storea_pat - : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>; +def SDT_SPCall: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; -class Stoream_pat - : Pat<(Store Value:$val, Addr:$addr), - (MI Addr:$addr, (ValueMod Value:$val))>; +def HexagonTCRet: SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def callv3: SDNode<"HexagonISD::CALL", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def callv3nr: SDNode<"HexagonISD::CALLnr", SDT_SPCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; -let AddedComplexity = 30 in { - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; +def: Pat<(callseq_start timm:$amt, timm:$amt2), + (ADJCALLSTACKDOWN imm:$amt, imm:$amt2)>; +def: Pat<(callseq_end timm:$amt1, timm:$amt2), + (ADJCALLSTACKUP imm:$amt1, imm:$amt2)>; - def: Stoream_pat; - def: Stoream_pat; - def: Stoream_pat; -} +def: Pat<(HexagonTCRet tglobaladdr:$dst), (PS_tailcall_i tglobaladdr:$dst)>; +def: Pat<(HexagonTCRet texternalsym:$dst), (PS_tailcall_i texternalsym:$dst)>; +def: Pat<(HexagonTCRet I32:$dst), (PS_tailcall_r I32:$dst)>; -def: Storea_pat, I32, addrgp, S2_storerbgp>; -def: Storea_pat, I32, addrgp, S2_storerhgp>; -def: Storea_pat, I32, addrgp, S2_storerigp>; -def: Storea_pat, I64, addrgp, S2_storerdgp>; +def: Pat<(callv3 I32:$dst), (J2_callr I32:$dst)>; +def: Pat<(callv3 tglobaladdr:$dst), (J2_call tglobaladdr:$dst)>; +def: Pat<(callv3 texternalsym:$dst), (J2_call texternalsym:$dst)>; +def: Pat<(callv3 tglobaltlsaddr:$dst), (J2_call tglobaltlsaddr:$dst)>; -let AddedComplexity = 100 in { - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; +def: Pat<(callv3nr I32:$dst), (PS_callr_nr I32:$dst)>; +def: Pat<(callv3nr tglobaladdr:$dst), (PS_call_nr tglobaladdr:$dst)>; +def: Pat<(callv3nr texternalsym:$dst), (PS_call_nr texternalsym:$dst)>; - // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1" - // to "r0 = 1; memw(#foo) = r0" - let AddedComplexity = 100 in - def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)), - (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>; -} +def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>; -class LoadAbs_pats - : Pat <(VT (ldOp (HexagonCONST32 tglobaladdr:$absaddr))), - (VT (MI tglobaladdr:$absaddr))>; +def: Pat<(retflag), (PS_jmpret (i32 R31))>; +def: Pat<(eh_return), (EH_RETURN_JMPR (i32 R31))>; -let AddedComplexity = 30 in { - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; - def: LoadAbs_pats ; -} -let AddedComplexity = 30 in -def: Pat<(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$absaddr))), - (ToZext64 (PS_loadrubabs tglobaladdr:$absaddr))>; +// --(16) Branch --------------------------------------------------------- +// -def: Loada_pat; -def: Loada_pat; -def: Loada_pat; -def: Loada_pat; +def: Pat<(br bb:$dst), (J2_jump b30_2Imm:$dst)>; +def: Pat<(brind I32:$dst), (J2_jumpr I32:$dst)>; -def: Loadam_pat; -def: Loadam_pat; +def: Pat<(brcond I1:$Pu, bb:$dst), + (J2_jumpt I1:$Pu, bb:$dst)>; +def: Pat<(brcond (not I1:$Pu), bb:$dst), + (J2_jumpf I1:$Pu, bb:$dst)>; +def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst), + (J2_jumpf I1:$Pu, bb:$dst)>; +def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst), + (J2_jumpt I1:$Pu, bb:$dst)>; -def: Stoream_pat; -def: Stoream_pat; -// Map from load(globaladdress) -> mem[u][bhwd](#foo) -class LoadGP_pats - : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))), - (VT (MI tglobaladdr:$global))>; +// --(17) Misc ----------------------------------------------------------- -let AddedComplexity = 100 in { - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; - def: LoadGP_pats ; -} - -// When the Interprocedural Global Variable optimizer realizes that a certain -// global variable takes only two constant values, it shrinks the global to -// a boolean. Catch those loads here in the following 3 patterns. -let AddedComplexity = 100 in { - def: LoadGP_pats ; - def: LoadGP_pats ; -} -// Transfer global address into a register -def: Pat<(HexagonCONST32 tglobaladdr:$Rs), (A2_tfrsi imm:$Rs)>; -def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi imm:$Rs)>; -def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs), (A2_tfrsi imm:$Rs)>; - -let AddedComplexity = 30 in { - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - - def: Stoream_pat; - def: Stoream_pat; - def: Stoream_pat; -} - -let AddedComplexity = 30 in { - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - - def: Loadam_pat; - def: Loadam_pat; - def: Loadam_pat; +// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)' +// for C code of the form r = (c>='0' && c<='9') ? 1 : 0. +// The isdigit transformation relies on two 'clever' aspects: +// 1) The data type is unsigned which allows us to eliminate a zero test after +// biasing the expression by 48. We are depending on the representation of +// the unsigned types, and semantics. +// 2) The front end has converted <= 9 into < 10 on entry to LLVM. +// +// For the C code: +// retval = (c >= '0' && c <= '9') ? 1 : 0; +// The code is transformed upstream of llvm into +// retval = (c-48) < 10 ? 1 : 0; - def: Loadam_pat; - def: Loadam_pat; - def: Loadam_pat; +def u7_0PosImmPred : ImmLeaf 0 && isUInt<7>(Imm); +}]>; - def: Loadam_pat; - def: Loadam_pat; - def: Loadam_pat; -} +let AddedComplexity = 139 in +def: Pat<(i32 (zext (i1 (setult (and I32:$Rs, 255), u7_0PosImmPred:$u7)))), + (C2_muxii (A4_cmpbgtui IntRegs:$Rs, (UDEC1 imm:$u7)), 0, 1)>; -// Indexed store word - global address. -// memw(Rs+#u6:2)=#S8 let AddedComplexity = 100 in -defm: Storex_add_pat; - -// Load from a global address that has only one use in the current basic block. -let AddedComplexity = 100 in { - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - - def: Loada_pat; - def: Loada_pat; -} - -// Store to a global address that has only one use in the current basic block. -let AddedComplexity = 100 in { - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - - def: Stoream_pat; -} - -// i8/i16/i32 -> i64 loads -// We need a complexity of 120 here to override preceding handling of -// zextload. -let AddedComplexity = 120 in { - def: Loadam_pat; - def: Loadam_pat; - def: Loadam_pat; - - def: Loadam_pat; - def: Loadam_pat; - def: Loadam_pat; - - def: Loadam_pat; - def: Loadam_pat; - def: Loadam_pat; -} - -let AddedComplexity = 100 in { - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; - - def: Loada_pat; - def: Loada_pat; -} - -let AddedComplexity = 100 in { - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; - def: Storea_pat; -} - -def: Loada_pat; -def: Loada_pat; -def: Loada_pat; -def: Loada_pat; - -def: Storea_pat, I32, addrgp, PS_storerbabs>; -def: Storea_pat, I32, addrgp, PS_storerhabs>; -def: Storea_pat, I32, addrgp, PS_storeriabs>; -def: Storea_pat, I64, addrgp, PS_storerdabs>; - -// Prefer this pattern to S2_asl_i_p_or for the special case of joining -// two 32-bit words into a 64-bit word. -let AddedComplexity = 200 in -def: Pat<(or (shl (Aext64 I32:$a), (i32 32)), (Zext64 I32:$b)), - (A2_combinew I32:$a, I32:$b)>; +def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))), + (i32 (extloadi8 (add I32:$b, 3))), + 24, 8), + (i32 16)), + (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), + (zextloadi8 I32:$b)), + (A2_swiz (L2_loadri_io I32:$b, 0))>; -def: Pat<(or (or (or (shl (i64 (zext (and I32:$b, (i32 65535)))), (i32 16)), - (i64 (zext (i32 (and I32:$a, (i32 65535)))))), - (shl (i64 (anyext (and I32:$c, (i32 65535)))), (i32 32))), - (shl (Aext64 I32:$d), (i32 48))), - (A2_combinew (A2_combine_ll I32:$d, I32:$c), - (A2_combine_ll I32:$b, I32:$a))>; // We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH // because the SDNode ISD::PREFETCH has properties MayLoad and MayStore. // We don't really want either one here. -def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>; -def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH, - [SDNPHasChain]>; +def SDTHexagonDCFETCH: SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>; +def HexagonDCFETCH: SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH, + [SDNPHasChain]>; def: Pat<(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3), (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>; def: Pat<(HexagonDCFETCH (i32 (add IntRegs:$Rs, u11_3ImmPred:$u11_3)), (i32 0)), (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>; -def f32ImmPred : PatLeaf<(f32 fpimm:$F)>; -def f64ImmPred : PatLeaf<(f64 fpimm:$F)>; - -def ftoi : SDNodeXFormgetValueAPF().bitcastToAPInt(); - return CurDAG->getTargetConstant(I.getZExtValue(), SDLoc(N), - MVT::getIntegerVT(I.getBitWidth())); -}]>; - - -def: Pat<(sra (i64 (add (sra I64:$src1, u6_0ImmPred:$src2), 1)), (i32 1)), - (S2_asr_i_p_rnd I64:$src1, imm:$src2)>; - -let AddedComplexity = 20 in { - defm: Loadx_pat; - defm: Loadx_pat; -} - -let AddedComplexity = 60 in { - defm : T_LoadAbsReg_Pat ; - defm : T_LoadAbsReg_Pat ; -} - -let AddedComplexity = 40 in { - def: Loadxs_pat; - def: Loadxs_pat; -} - -let AddedComplexity = 20 in { - def: Loadxs_simple_pat; - def: Loadxs_simple_pat; -} - -let AddedComplexity = 80 in { - def: Loada_pat; - def: Loada_pat; - def: Loada_pat; -} - -let AddedComplexity = 100 in { - def: LoadGP_pats ; - def: LoadGP_pats ; -} - -let AddedComplexity = 20 in { - defm: Storex_pat; - defm: Storex_pat; -} - -// Simple patterns should be tried with the least priority. -def: Storex_simple_pat; -def: Storex_simple_pat; - -let AddedComplexity = 60 in { - defm : T_StoreAbsReg_Pats ; - defm : T_StoreAbsReg_Pats ; -} - -let AddedComplexity = 40 in { - def: Storexs_pat; - def: Storexs_pat; -} - -let AddedComplexity = 20 in { - def: Store_rr_pat; - def: Store_rr_pat; -} - -let AddedComplexity = 80 in { - def: Storea_pat; - def: Storea_pat; -} - -let AddedComplexity = 100 in { - def: Storea_pat; - def: Storea_pat; -} - -defm: Storex_pat; -defm: Storex_pat; -def: Storex_simple_pat; -def: Storex_simple_pat; - -def: Pat<(fadd F32:$src1, F32:$src2), - (F2_sfadd F32:$src1, F32:$src2)>; - -def: Pat<(fsub F32:$src1, F32:$src2), - (F2_sfsub F32:$src1, F32:$src2)>; - -def: Pat<(fmul F32:$src1, F32:$src2), - (F2_sfmpy F32:$src1, F32:$src2)>; - -let Predicates = [HasV5T] in { - def: Pat<(f32 (fminnum F32:$Rs, F32:$Rt)), (F2_sfmin F32:$Rs, F32:$Rt)>; - def: Pat<(f32 (fmaxnum F32:$Rs, F32:$Rt)), (F2_sfmax F32:$Rs, F32:$Rt)>; -} - -let AddedComplexity = 100, Predicates = [HasV5T] in { - class SfSel12 - : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rs, F32:$Rt), - (MI F32:$Rs, F32:$Rt)>; - class SfSel21 - : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rt, F32:$Rs), - (MI F32:$Rs, F32:$Rt)>; - - def: SfSel12; - def: SfSel12; - def: SfSel12; - def: SfSel12; - def: SfSel21; - def: SfSel21; - def: SfSel21; - def: SfSel21; -} - -class T_fcmp32_pat - : Pat<(i1 (OpNode F32:$src1, F32:$src2)), - (MI F32:$src1, F32:$src2)>; -class T_fcmp64_pat - : Pat<(i1 (OpNode F64:$src1, F64:$src2)), - (MI F64:$src1, F64:$src2)>; - -def: T_fcmp32_pat; -def: T_fcmp32_pat; -def: T_fcmp32_pat; -def: T_fcmp32_pat; - -def: T_fcmp64_pat; -def: T_fcmp64_pat; -def: T_fcmp64_pat; -def: T_fcmp64_pat; - -let Predicates = [HasV5T] in -multiclass T_fcmp_pats { - // IntRegs - def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), - (IntMI F32:$src1, F32:$src2)>; - // DoubleRegs - def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), - (DoubleMI F64:$src1, F64:$src2)>; -} - -defm : T_fcmp_pats ; -defm : T_fcmp_pats ; -defm : T_fcmp_pats ; - -//===----------------------------------------------------------------------===// -// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations. -//===----------------------------------------------------------------------===// -let Predicates = [HasV5T] in -multiclass unord_Pats { - // IntRegs - def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), - (IntMI F32:$src1, F32:$src2))>; - - // DoubleRegs - def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), - (DoubleMI F64:$src1, F64:$src2))>; -} - -defm : unord_Pats ; -defm : unord_Pats ; -defm : unord_Pats ; - -//===----------------------------------------------------------------------===// -// Multiclass to define 'Def Pats' for the following dags: -// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2)) -// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2) -// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2) -// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2)) -//===----------------------------------------------------------------------===// -let Predicates = [HasV5T] in -multiclass eq_ordgePats { - // IntRegs - def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), - (C2_not (IntMI F32:$src1, F32:$src2))>; - def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), - (IntMI F32:$src1, F32:$src2)>; - def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), - (IntMI F32:$src1, F32:$src2)>; - def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), - (C2_not (IntMI F32:$src1, F32:$src2))>; - - // DoubleRegs - def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (C2_not (DoubleMI F64:$src1, F64:$src2))>; - def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), - (DoubleMI F64:$src1, F64:$src2)>; - def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (DoubleMI F64:$src1, F64:$src2)>; - def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)), - (C2_not (DoubleMI F64:$src1, F64:$src2))>; -} - -defm : eq_ordgePats; -defm : eq_ordgePats; -defm : eq_ordgePats; +def SDTHexagonALLOCA + : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; +def HexagonALLOCA + : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA, [SDNPHasChain]>; -//===----------------------------------------------------------------------===// -// Multiclass to define 'Def Pats' for the following dags: -// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1)) -// seteq(setolt(op1, op2), 1) -> setogt(op2, op1) -// setne(setolt(op1, op2), 0) -> setogt(op2, op1) -// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1)) -//===----------------------------------------------------------------------===// -let Predicates = [HasV5T] in -multiclass eq_ordltPats { - // IntRegs - def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)), - (C2_not (IntMI F32:$src2, F32:$src1))>; - def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)), - (IntMI F32:$src2, F32:$src1)>; - def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)), - (IntMI F32:$src2, F32:$src1)>; - def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)), - (C2_not (IntMI F32:$src2, F32:$src1))>; - - // DoubleRegs - def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (C2_not (DoubleMI F64:$src2, F64:$src1))>; - def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)), - (DoubleMI F64:$src2, F64:$src1)>; - def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (DoubleMI F64:$src2, F64:$src1)>; - def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)), - (C2_not (DoubleMI F64:$src2, F64:$src1))>; -} - -defm : eq_ordltPats; -defm : eq_ordltPats; - - -// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp -let Predicates = [HasV5T] in { - def: Pat<(i1 (seto F32:$src1, F32:$src2)), - (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>; - def: Pat<(i1 (seto F32:$src1, f32ImmPred:$src2)), - (C2_not (F2_sfcmpuo (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>; - def: Pat<(i1 (seto F64:$src1, F64:$src2)), - (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>; - def: Pat<(i1 (seto F64:$src1, f64ImmPred:$src2)), - (C2_not (F2_dfcmpuo (CONST64 (ftoi $src2)), F64:$src1))>; -} - -// Ordered lt. -let Predicates = [HasV5T] in { - def: Pat<(i1 (setolt F32:$src1, F32:$src2)), - (F2_sfcmpgt F32:$src2, F32:$src1)>; - def: Pat<(i1 (setolt F32:$src1, f32ImmPred:$src2)), - (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; - def: Pat<(i1 (setolt F64:$src1, F64:$src2)), - (F2_dfcmpgt F64:$src2, F64:$src1)>; - def: Pat<(i1 (setolt F64:$src1, f64ImmPred:$src2)), - (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>; -} - -// Unordered lt. -let Predicates = [HasV5T] in { - def: Pat<(i1 (setult F32:$src1, F32:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), - (F2_sfcmpgt F32:$src2, F32:$src1))>; - def: Pat<(i1 (setult F32:$src1, f32ImmPred:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))), - (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>; - def: Pat<(i1 (setult F64:$src1, F64:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), - (F2_dfcmpgt F64:$src2, F64:$src1))>; - def: Pat<(i1 (setult F64:$src1, f64ImmPred:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))), - (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1))>; -} - -// Ordered le. -let Predicates = [HasV5T] in { - // rs <= rt -> rt >= rs. - def: Pat<(i1 (setole F32:$src1, F32:$src2)), - (F2_sfcmpge F32:$src2, F32:$src1)>; - def: Pat<(i1 (setole F32:$src1, f32ImmPred:$src2)), - (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; - - // Rss <= Rtt -> Rtt >= Rss. - def: Pat<(i1 (setole F64:$src1, F64:$src2)), - (F2_dfcmpge F64:$src2, F64:$src1)>; - def: Pat<(i1 (setole F64:$src1, f64ImmPred:$src2)), - (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>; -} - -// Unordered le. -let Predicates = [HasV5T] in { -// rs <= rt -> rt >= rs. - def: Pat<(i1 (setule F32:$src1, F32:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), - (F2_sfcmpge F32:$src2, F32:$src1))>; - def: Pat<(i1 (setule F32:$src1, f32ImmPred:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))), - (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>; - def: Pat<(i1 (setule F64:$src1, F64:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), - (F2_dfcmpge F64:$src2, F64:$src1))>; - def: Pat<(i1 (setule F64:$src1, f64ImmPred:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))), - (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1))>; -} - -// Ordered ne. -let Predicates = [HasV5T] in { - def: Pat<(i1 (setone F32:$src1, F32:$src2)), - (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; - def: Pat<(i1 (setone F64:$src1, F64:$src2)), - (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; - def: Pat<(i1 (setone F32:$src1, f32ImmPred:$src2)), - (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>; - def: Pat<(i1 (setone F64:$src1, f64ImmPred:$src2)), - (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>; -} - -// Unordered ne. -let Predicates = [HasV5T] in { - def: Pat<(i1 (setune F32:$src1, F32:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, F32:$src2), - (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>; - def: Pat<(i1 (setune F64:$src1, F64:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, F64:$src2), - (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>; - def: Pat<(i1 (setune F32:$src1, f32ImmPred:$src2)), - (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))), - (C2_not (F2_sfcmpeq F32:$src1, - (f32 (A2_tfrsi (ftoi $src2))))))>; - def: Pat<(i1 (setune F64:$src1, f64ImmPred:$src2)), - (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))), - (C2_not (F2_dfcmpeq F64:$src1, - (CONST64 (ftoi $src2)))))>; -} - -// Besides set[o|u][comparions], we also need set[comparisons]. -let Predicates = [HasV5T] in { - // lt. - def: Pat<(i1 (setlt F32:$src1, F32:$src2)), - (F2_sfcmpgt F32:$src2, F32:$src1)>; - def: Pat<(i1 (setlt F32:$src1, f32ImmPred:$src2)), - (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; - def: Pat<(i1 (setlt F64:$src1, F64:$src2)), - (F2_dfcmpgt F64:$src2, F64:$src1)>; - def: Pat<(i1 (setlt F64:$src1, f64ImmPred:$src2)), - (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>; - - // le. - // rs <= rt -> rt >= rs. - def: Pat<(i1 (setle F32:$src1, F32:$src2)), - (F2_sfcmpge F32:$src2, F32:$src1)>; - def: Pat<(i1 (setle F32:$src1, f32ImmPred:$src2)), - (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>; - - // Rss <= Rtt -> Rtt >= Rss. - def: Pat<(i1 (setle F64:$src1, F64:$src2)), - (F2_dfcmpge F64:$src2, F64:$src1)>; - def: Pat<(i1 (setle F64:$src1, f64ImmPred:$src2)), - (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>; - - // ne. - def: Pat<(i1 (setne F32:$src1, F32:$src2)), - (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>; - def: Pat<(i1 (setne F64:$src1, F64:$src2)), - (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>; - def: Pat<(i1 (setne F32:$src1, f32ImmPred:$src2)), - (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>; - def: Pat<(i1 (setne F64:$src1, f64ImmPred:$src2)), - (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>; -} - - -def: Pat<(f64 (fpextend F32:$Rs)), (F2_conv_sf2df F32:$Rs)>; -def: Pat<(f32 (fpround F64:$Rs)), (F2_conv_df2sf F64:$Rs)>; - -def: Pat<(f32 (sint_to_fp I32:$Rs)), (F2_conv_w2sf I32:$Rs)>; -def: Pat<(f32 (sint_to_fp I64:$Rs)), (F2_conv_d2sf I64:$Rs)>; -def: Pat<(f64 (sint_to_fp I32:$Rs)), (F2_conv_w2df I32:$Rs)>; -def: Pat<(f64 (sint_to_fp I64:$Rs)), (F2_conv_d2df I64:$Rs)>; - -def: Pat<(f32 (uint_to_fp I32:$Rs)), (F2_conv_uw2sf I32:$Rs)>; -def: Pat<(f32 (uint_to_fp I64:$Rs)), (F2_conv_ud2sf I64:$Rs)>; -def: Pat<(f64 (uint_to_fp I32:$Rs)), (F2_conv_uw2df I32:$Rs)>; -def: Pat<(f64 (uint_to_fp I64:$Rs)), (F2_conv_ud2df I64:$Rs)>; - -def: Pat<(i32 (fp_to_sint F32:$Rs)), (F2_conv_sf2w_chop F32:$Rs)>; -def: Pat<(i32 (fp_to_sint F64:$Rs)), (F2_conv_df2w_chop F64:$Rs)>; -def: Pat<(i64 (fp_to_sint F32:$Rs)), (F2_conv_sf2d_chop F32:$Rs)>; -def: Pat<(i64 (fp_to_sint F64:$Rs)), (F2_conv_df2d_chop F64:$Rs)>; - -def: Pat<(i32 (fp_to_uint F32:$Rs)), (F2_conv_sf2uw_chop F32:$Rs)>; -def: Pat<(i32 (fp_to_uint F64:$Rs)), (F2_conv_df2uw_chop F64:$Rs)>; -def: Pat<(i64 (fp_to_uint F32:$Rs)), (F2_conv_sf2ud_chop F32:$Rs)>; -def: Pat<(i64 (fp_to_uint F64:$Rs)), (F2_conv_df2ud_chop F64:$Rs)>; - -// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. -let Predicates = [HasV5T] in { - def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>; - def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>; - def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>; - def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>; -} - -def : Pat <(fma F32:$src2, F32:$src3, F32:$src1), - (F2_sffma F32:$src1, F32:$src2, F32:$src3)>; - -def : Pat <(fma (fneg F32:$src2), F32:$src3, F32:$src1), - (F2_sffms F32:$src1, F32:$src2, F32:$src3)>; - -def : Pat <(fma F32:$src2, (fneg F32:$src3), F32:$src1), - (F2_sffms F32:$src1, F32:$src2, F32:$src3)>; - -def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$imm), - (C2_muxir I1:$Pu, F32:$Rs, (ftoi $imm))>, - Requires<[HasV5T]>; - -def: Pat<(select I1:$Pu, f32ImmPred:$imm, F32:$Rt), - (C2_muxri I1:$Pu, (ftoi $imm), F32:$Rt)>, - Requires<[HasV5T]>; - -def: Pat<(select I1:$src1, F32:$src2, F32:$src3), - (C2_mux I1:$src1, F32:$src2, F32:$src3)>, - Requires<[HasV5T]>; - -def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4), - (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>, - Requires<[HasV5T]>; - -def: Pat<(select I1:$src1, F64:$src2, F64:$src3), - (C2_vmux I1:$src1, F64:$src2, F64:$src3)>, - Requires<[HasV5T]>; - -def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4), - (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>, - Requires<[HasV5T]>; - -// Map from p0 = pnot(p0); r0 = select(p0, #i, r1) -// => r0 = mux(p0, #i, r1) -def: Pat<(select (not I1:$src1), f32ImmPred:$src2, F32:$src3), - (C2_muxir I1:$src1, F32:$src3, (ftoi $src2))>, - Requires<[HasV5T]>; - -// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i) -// => r0 = mux(p0, r1, #i) -def: Pat<(select (not I1:$src1), F32:$src2, f32ImmPred:$src3), - (C2_muxri I1:$src1, (ftoi $src3), F32:$src2)>, - Requires<[HasV5T]>; - -def: Pat<(i32 (fp_to_sint F64:$src1)), - (LoReg (F2_conv_df2d_chop F64:$src1))>, - Requires<[HasV5T]>; - -def : Pat <(fabs F32:$src1), - (S2_clrbit_i F32:$src1, 31)>, - Requires<[HasV5T]>; - -def : Pat <(fneg F32:$src1), - (S2_togglebit_i F32:$src1, 31)>, - Requires<[HasV5T]>; - -def: Pat<(fabs F64:$Rs), - (REG_SEQUENCE DoubleRegs, - (S2_clrbit_i (HiReg $Rs), 31), isub_hi, - (i32 (LoReg $Rs)), isub_lo)>; - -def: Pat<(fneg F64:$Rs), - (REG_SEQUENCE DoubleRegs, - (S2_togglebit_i (HiReg $Rs), 31), isub_hi, - (i32 (LoReg $Rs)), isub_lo)>; - -def: Pat<(mul I64:$Rss, I64:$Rtt), - (A2_combinew - (M2_maci (M2_maci (HiReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))), - (LoReg $Rss), - (HiReg $Rtt)), - (LoReg $Rtt), - (HiReg $Rss)), - (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))))>; - -def alignedload : PatFrag<(ops node:$addr), (load $addr), [{ - return isAlignedMemNode(dyn_cast(N)); -}]>; - -def unalignedload : PatFrag<(ops node:$addr), (load $addr), [{ - return !isAlignedMemNode(dyn_cast(N)); -}]>; - -def alignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{ - return isAlignedMemNode(dyn_cast(N)); -}]>; - -def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{ - return !isAlignedMemNode(dyn_cast(N)); -}]>; - - -multiclass vS32b_ai_pats { - // Aligned stores - def : Pat<(alignednontemporalstore (VTSgl HvxVR:$src1), IntRegs:$addr), - (V6_vS32b_nt_ai IntRegs:$addr, 0, (VTSgl HvxVR:$src1))>; - def : Pat<(alignedstore (VTSgl HvxVR:$src1), IntRegs:$addr), - (V6_vS32b_ai IntRegs:$addr, 0, (VTSgl HvxVR:$src1))>; - def : Pat<(unalignedstore (VTSgl HvxVR:$src1), IntRegs:$addr), - (V6_vS32Ub_ai IntRegs:$addr, 0, (VTSgl HvxVR:$src1))>; - - // Fold Add R+OFF into vector store. - let AddedComplexity = 10 in { - def : Pat<(alignednontemporalstore (VTSgl HvxVR:$src1), - (add IntRegs:$src2, IsVecOff:$offset)), - (V6_vS32b_nt_ai IntRegs:$src2, imm:$offset, - (VTSgl HvxVR:$src1))>; - def : Pat<(alignedstore (VTSgl HvxVR:$src1), - (add IntRegs:$src2, IsVecOff:$offset)), - (V6_vS32b_ai IntRegs:$src2, imm:$offset, - (VTSgl HvxVR:$src1))>; - def : Pat<(unalignedstore (VTSgl HvxVR:$src1), - (add IntRegs:$src2, IsVecOff:$offset)), - (V6_vS32Ub_ai IntRegs:$src2, imm:$offset, - (VTSgl HvxVR:$src1))>; - } -} - -defm : vS32b_ai_pats ; -defm : vS32b_ai_pats ; -defm : vS32b_ai_pats ; -defm : vS32b_ai_pats ; - - -multiclass vL32b_ai_pats { - // Aligned loads - def : Pat < (VTSgl (alignednontemporalload IntRegs:$addr)), - (V6_vL32b_nt_ai IntRegs:$addr, 0) >; - def : Pat < (VTSgl (alignedload IntRegs:$addr)), - (V6_vL32b_ai IntRegs:$addr, 0) >; - def : Pat < (VTSgl (unalignedload IntRegs:$addr)), - (V6_vL32Ub_ai IntRegs:$addr, 0) >; - - // Fold Add R+OFF into vector load. - let AddedComplexity = 10 in { - def : Pat<(VTSgl (alignednontemporalload (add IntRegs:$src2, IsVecOff:$offset))), - (V6_vL32b_nt_ai IntRegs:$src2, imm:$offset)>; - def : Pat<(VTSgl (alignedload (add IntRegs:$src2, IsVecOff:$offset))), - (V6_vL32b_ai IntRegs:$src2, imm:$offset)>; - def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, IsVecOff:$offset))), - (V6_vL32Ub_ai IntRegs:$src2, imm:$offset)>; - } -} - -defm : vL32b_ai_pats ; -defm : vL32b_ai_pats ; -defm : vL32b_ai_pats ; -defm : vL32b_ai_pats ; - -multiclass STrivv_pats { - def : Pat<(alignednontemporalstore (VTSgl HvxWR:$src1), IntRegs:$addr), - (PS_vstorerw_nt_ai IntRegs:$addr, 0, (VTSgl HvxWR:$src1))>; - def : Pat<(alignedstore (VTSgl HvxWR:$src1), IntRegs:$addr), - (PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl HvxWR:$src1))>; - def : Pat<(unalignedstore (VTSgl HvxWR:$src1), IntRegs:$addr), - (PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl HvxWR:$src1))>; -} - -defm : STrivv_pats ; -defm : STrivv_pats ; -defm : STrivv_pats ; -defm : STrivv_pats ; - -multiclass LDrivv_pats { - def : Pat<(VTSgl (alignednontemporalload I32:$addr)), - (PS_vloadrw_nt_ai I32:$addr, 0)>; - def : Pat<(VTSgl (alignedload I32:$addr)), - (PS_vloadrw_ai I32:$addr, 0)>; - def : Pat<(VTSgl (unalignedload I32:$addr)), - (PS_vloadrwu_ai I32:$addr, 0)>; -} - -defm : LDrivv_pats ; -defm : LDrivv_pats ; -defm : LDrivv_pats ; -defm : LDrivv_pats ; - -let Predicates = [HasV60T] in { - def: Pat<(select I1:$Pu, (VecI32 HvxVR:$Vs), HvxVR:$Vt), - (PS_vselect I1:$Pu, HvxVR:$Vs, HvxVR:$Vt)>; - def: Pat<(select I1:$Pu, (VecPI32 HvxWR:$Vs), HvxWR:$Vt), - (PS_wselect I1:$Pu, HvxWR:$Vs, HvxWR:$Vt)>; -} - - -def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, - SDTCisSubVecOfVec<1, 0>]>; - -def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>; - -def: Pat<(VecPI32 (HexagonVCOMBINE (VecI32 HvxVR:$Vs), (VecI32 HvxVR:$Vt))), - (V6_vcombine HvxVR:$Vs, HvxVR:$Vt)>; - -def SDTHexagonVPACK: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, SDTCisVec<1>]>; - -def HexagonVPACKE: SDNode<"HexagonISD::VPACKE", SDTHexagonVPACK>; -def HexagonVPACKO: SDNode<"HexagonISD::VPACKO", SDTHexagonVPACK>; - -def: Pat<(VecI8 (HexagonVPACKE (VecI8 HvxVR:$Vs), (VecI8 HvxVR:$Vt))), - (V6_vpackeb HvxVR:$Vs, HvxVR:$Vt)>; -def: Pat<(VecI8 (HexagonVPACKO (VecI8 HvxVR:$Vs), (VecI8 HvxVR:$Vt))), - (V6_vpackob HvxVR:$Vs, HvxVR:$Vt)>; -def: Pat<(VecI16 (HexagonVPACKE (VecI16 HvxVR:$Vs), (VecI16 HvxVR:$Vt))), - (V6_vpackeh HvxVR:$Vs, HvxVR:$Vt)>; -def: Pat<(VecI16 (HexagonVPACKO (VecI16 HvxVR:$Vs), (VecI16 HvxVR:$Vt))), - (V6_vpackoh HvxVR:$Vs, HvxVR:$Vt)>; - -def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; -def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; -def V8I1: PatLeaf<(v8i1 PredRegs:$R)>; -def V4I8: PatLeaf<(v4i8 IntRegs:$R)>; -def V2I16: PatLeaf<(v2i16 IntRegs:$R)>; -def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; -def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; -def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; - - -multiclass bitconvert_32 { - def : Pat <(b (bitconvert (a IntRegs:$src))), - (b IntRegs:$src)>; - def : Pat <(a (bitconvert (b IntRegs:$src))), - (a IntRegs:$src)>; -} - -multiclass bitconvert_64 { - def : Pat <(b (bitconvert (a DoubleRegs:$src))), - (b DoubleRegs:$src)>; - def : Pat <(a (bitconvert (b DoubleRegs:$src))), - (a DoubleRegs:$src)>; -} - -// Bit convert vector types to integers. -defm : bitconvert_32; -defm : bitconvert_32; -defm : bitconvert_64; -defm : bitconvert_64; -defm : bitconvert_64; - -def: Pat<(sra (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2), - (S2_asr_i_vh DoubleRegs:$src1, imm:$src2)>; -def: Pat<(srl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2), - (S2_lsr_i_vh DoubleRegs:$src1, imm:$src2)>; -def: Pat<(shl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2), - (S2_asl_i_vh DoubleRegs:$src1, imm:$src2)>; - -def: Pat<(sra (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2), - (S2_asr_i_vw DoubleRegs:$src1, imm:$src2)>; -def: Pat<(srl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2), - (S2_lsr_i_vw DoubleRegs:$src1, imm:$src2)>; -def: Pat<(shl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2), - (S2_asl_i_vw DoubleRegs:$src1, imm:$src2)>; - -def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), - (A2_svaddh IntRegs:$src1, IntRegs:$src2)>; - -def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), - (A2_svsubh IntRegs:$src1, IntRegs:$src2)>; - -def SDTHexagonVSPLAT: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; -def HexagonVSPLAT: SDNode<"HexagonISD::VSPLAT", SDTHexagonVSPLAT>; - -// Replicate the low 8-bits from 32-bits input register into each of the -// four bytes of 32-bits destination register. -def: Pat<(v4i8 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrb I32:$Rs)>; - -// Replicate the low 16-bits from 32-bits input register into each of the -// four halfwords of 64-bits destination register. -def: Pat<(v4i16 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrh I32:$Rs)>; - -def: Pat<(v2i32 (HexagonVSPLAT s8_0ImmPred:$s8)), - (A2_combineii imm:$s8, imm:$s8)>; -def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (A2_combinew I32:$Rs, I32:$Rs)>; - - -class VArith_pat - : Pat <(Op Type:$Rss, Type:$Rtt), - (MI Type:$Rss, Type:$Rtt)>; - -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; - -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; - -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; -def: VArith_pat ; - -def: Pat<(v2i32 (sra V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))), - (S2_asr_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v2i32 (srl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))), - (S2_lsr_i_vw V2I32:$b, imm:$c)>; -def: Pat<(v2i32 (shl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c)))), - (S2_asl_i_vw V2I32:$b, imm:$c)>; - -def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), - (S2_asr_i_vh V4I16:$b, imm:$c)>; -def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), - (S2_lsr_i_vh V4I16:$b, imm:$c)>; -def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c)))), - (S2_asl_i_vh V4I16:$b, imm:$c)>; - - -def SDTHexagonVShift - : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVec<0>, SDTCisVT<2, i32>]>; - -def HexagonVASL: SDNode<"HexagonISD::VASL", SDTHexagonVShift>; -def HexagonVASR: SDNode<"HexagonISD::VASR", SDTHexagonVShift>; -def HexagonVLSR: SDNode<"HexagonISD::VLSR", SDTHexagonVShift>; - -def: Pat<(v2i32 (HexagonVASL V2I32:$Rs, u5_0ImmPred:$u5)), - (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVASL V4I16:$Rs, u4_0ImmPred:$u4)), - (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; -def: Pat<(v2i32 (HexagonVASR V2I32:$Rs, u5_0ImmPred:$u5)), - (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVASR V4I16:$Rs, u4_0ImmPred:$u4)), - (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; -def: Pat<(v2i32 (HexagonVLSR V2I32:$Rs, u5_0ImmPred:$u5)), - (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; -def: Pat<(v4i16 (HexagonVLSR V4I16:$Rs, u4_0ImmPred:$u4)), - (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; - -class vshift_rr_pat - : Pat <(Op Value:$Rs, I32:$Rt), - (MI Value:$Rs, I32:$Rt)>; - -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; -def: vshift_rr_pat ; - - -class vcmp_vi1_pat - : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)), - (MI InVal:$Rs, InVal:$Rt)>; - -def: vcmp_vi1_pat; -def: vcmp_vi1_pat; -def: vcmp_vi1_pat; - -def: vcmp_vi1_pat; -def: vcmp_vi1_pat; -def: vcmp_vi1_pat; - -def: Pat<(mul V2I32:$Rs, V2I32:$Rt), - (PS_vmulw DoubleRegs:$Rs, DoubleRegs:$Rt)>; -def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)), - (PS_vmulw_acc DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt)>; - - -// Adds two v4i8: Hexagon does not have an insn for this one, so we -// use the double add v8i8, and use only the low part of the result. -def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), - (LoReg (A2_vaddub (ToZext64 $Rs), (ToZext64 $Rt)))>; - -// Subtract two v4i8: Hexagon does not have an insn for this one, so we -// use the double sub v8i8, and use only the low part of the result. -def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), - (LoReg (A2_vsubub (ToZext64 $Rs), (ToZext64 $Rt)))>; - -// -// No 32 bit vector mux. -// -def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)), - (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>; -def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)), - (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>; - -// -// 64-bit vector mux. -// -def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)), - (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; -def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)), - (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>; -def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)), - (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; - -// -// No 32 bit vector compare. -// -def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), - (A2_vcmpbeq (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), - (A4_vcmpbgt (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), - (A2_vcmpbgtu (ToZext64 $Rs), (ToZext64 $Rt))>; - -def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), - (A2_vcmpheq (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), - (A2_vcmphgt (ToZext64 $Rs), (ToZext64 $Rt))>; -def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), - (A2_vcmphgtu (ToZext64 $Rs), (ToZext64 $Rt))>; - - -class InvertCmp_pat - : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)), - (InvMI Value:$Rt, Value:$Rs)>; - -// Map from a compare operation to the corresponding instruction with the -// order of operands reversed, e.g. x > y --> cmp.lt(y,x). -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; - -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; -def: InvertCmp_pat; - -// Map from vcmpne(Rss) -> !vcmpew(Rss). -// rs != rt -> !(rs == rt). -def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), - (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; - - -// Truncate: from vector B copy all 'E'ven 'B'yte elements: -// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6]; -def: Pat<(v4i8 (trunc V4I16:$Rs)), - (S2_vtrunehb V4I16:$Rs)>; - -// Truncate: from vector B copy all 'O'dd 'B'yte elements: -// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7]; -// S2_vtrunohb - -// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements: -// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2]; -// S2_vtruneh - -def: Pat<(v2i16 (trunc V2I32:$Rs)), - (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; - -def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; -def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; -def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; -def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; -def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; -def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; - -// Sign extends a v2i8 into a v2i32. -def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), - (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; - -// Sign extends a v2i16 into a v2i32. -def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)), - (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>; - - -// Multiplies two v2i16 and returns a v2i32. We are using here the -// saturating multiply, as hexagon does not provide a non saturating -// vector multiply, and saturation does not impact the result that is -// in double precision of the operands. - -// Multiplies two v2i16 vectors: as Hexagon does not have a multiply -// with the C semantics for this one, this pattern uses the half word -// multiply vmpyh that takes two v2i16 and returns a v2i32. This is -// then truncated to fit this back into a v2i16 and to simulate the -// wrap around semantics for unsigned in C. -def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt), - (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>; - -def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), - (LoReg (S2_vtrunewh (A2_combineii 0, 0), - (vmpyh V2I16:$Rs, V2I16:$Rt)))>; - -// Multiplies two v4i16 vectors. -def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), - (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)), - (vmpyh (LoReg $Rs), (LoReg $Rt)))>; - -def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt), - (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))), - (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>; - -// Multiplies two v4i8 vectors. -def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), - (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>, - Requires<[HasV5T]>; - -def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), - (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>; - -// Multiplies two v8i8 vectors. -def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), - (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))), - (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>, - Requires<[HasV5T]>; - -def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), - (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))), - (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>; - -// Truncated store from v4i16 to v4i8. -def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), - [{ return cast(N)->getMemoryVT() == MVT::v4i8; }]>; - -// Truncated store from v2i32 to v2i16. -def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr), - (truncstore node:$val, node:$ptr), - [{ return cast(N)->getMemoryVT() == MVT::v2i16; }]>; - -def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt), - (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs), - (LoReg $Rs))))>; - -def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt), - (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>; - - -// Zero and sign extended load from v2i8 into v2i16. -def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), - [{ return cast(N)->getMemoryVT() == MVT::v2i8; }]>; - -def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), - [{ return cast(N)->getMemoryVT() == MVT::v2i8; }]>; - -def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)), - (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>; - -def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)), - (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>; - -def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)), - (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>; - -def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)), - (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>; +def: Pat<(HexagonALLOCA I32:$Rs, (i32 imm:$A)), + (PS_alloca IntRegs:$Rs, imm:$A)>; +def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>; +def: Pat<(HexagonBARRIER), (Y2_barrier)>; // Read cycle counter. -// def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf, [SDNPHasChain]>; diff --git a/llvm/test/CodeGen/Hexagon/PR33749.ll b/llvm/test/CodeGen/Hexagon/PR33749.ll new file mode 100644 index 0000000..7f85330 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/PR33749.ll @@ -0,0 +1,50 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s +; This testcase used to fail with "cannot select 'i1 = add x, y'". +; Check for some sane output: +; CHECK: xor(p{{[0-3]}},p{{[0-3]}}) + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define void @foo(i32* nocapture %a0) local_unnamed_addr #0 { +b1: + %v2 = getelementptr inbounds i32, i32* %a0, i32 26 + %v3 = load i32, i32* %v2, align 4 + %v4 = add nsw i32 %v3, 1 + %v5 = load i32, i32* %a0, align 4 + br label %b6 + +b6: ; preds = %b28, %b1 + %v7 = phi i32 [ %v29, %b28 ], [ %v5, %b1 ] + %v8 = mul nsw i32 %v4, %v7 + %v9 = add nsw i32 %v8, %v7 + %v10 = mul i32 %v7, %v7 + %v11 = mul i32 %v10, %v9 + %v12 = add nsw i32 %v11, 1 + %v13 = mul nsw i32 %v12, %v7 + %v14 = add nsw i32 %v13, %v7 + %v15 = mul i32 %v10, %v14 + %v16 = and i32 %v15, 1 + %v17 = add nsw i32 %v16, -1 + %v18 = mul i32 %v10, %v7 + %v19 = mul i32 %v18, %v11 + %v20 = mul i32 %v19, %v17 + %v21 = and i32 %v20, 1 + %v22 = add nsw i32 %v21, -1 + %v23 = mul nsw i32 %v22, %v3 + %v24 = sub nsw i32 %v7, %v23 + %v25 = mul i32 %v10, %v24 + %v26 = sub i32 0, %v7 + %v27 = icmp eq i32 %v25, %v26 + br i1 %v27, label %b30, label %b28 + +b28: ; preds = %b6 + %v29 = add nsw i32 %v3, %v7 + store i32 %v29, i32* %a0, align 4 + br label %b6 + +b30: ; preds = %b6 + ret void +} + +attributes #0 = { norecurse nounwind "target-cpu"="hexagonv60" } diff --git a/llvm/test/CodeGen/Hexagon/addrmode-indoff.ll b/llvm/test/CodeGen/Hexagon/addrmode-indoff.ll index 6ea2b3d..274add3 100644 --- a/llvm/test/CodeGen/Hexagon/addrmode-indoff.ll +++ b/llvm/test/CodeGen/Hexagon/addrmode-indoff.ll @@ -3,72 +3,90 @@ ; Bug 6840. Use absolute+index addressing. @ga = common global [1024 x i8] zeroinitializer, align 8 -@gb = common global [1024 x i8] zeroinitializer, align 8 -; CHECK: memub(r{{[0-9]+}}{{ *}}<<{{ *}}#0{{ *}}+{{ *}}##ga) -define zeroext i8 @lf2(i32 %i) nounwind readonly { +; CHECK-LABEL: test0 +; CHECK: memub(r{{[0-9]+}}+##ga) +define zeroext i8 @test0(i32 %i) nounwind readonly { entry: - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %i - %0 = load i8, i8* %arrayidx, align 1 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %i + %0 = load i8, i8* %t, align 1 ret i8 %0 } -; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#0{{ *}}+{{ *}}##gb) -define signext i8 @lf2s(i32 %i) nounwind readonly { +; CHECK-LABEL: test1 +; CHECK: memb(r{{[0-9]+}}+##ga) +define signext i8 @test1(i32 %i) nounwind readonly { entry: - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @gb, i32 0, i32 %i - %0 = load i8, i8* %arrayidx, align 1 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %i + %0 = load i8, i8* %t, align 1 ret i8 %0 } -; CHECK: memub(r{{[0-9]+}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##ga) -define zeroext i8 @lf3(i32 %i) nounwind readonly { +; CHECK-LABEL: test2 +; CHECK: memub(r{{[0-9]+}}<<#1+##ga) +define zeroext i8 @test2(i32 %i) nounwind readonly { entry: - %mul = shl nsw i32 %i, 2 - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %mul - %0 = load i8, i8* %arrayidx, align 1 + %j = shl nsw i32 %i, 1 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j + %0 = load i8, i8* %t, align 1 ret i8 %0 } -; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##gb) -define signext i8 @lf3s(i32 %i) nounwind readonly { +; CHECK-LABEL: test3 +; CHECK: memb(r{{[0-9]+}}<<#1+##ga) +define signext i8 @test3(i32 %i) nounwind readonly { entry: - %mul = shl nsw i32 %i, 2 - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @gb, i32 0, i32 %mul - %0 = load i8, i8* %arrayidx, align 1 + %j = shl nsw i32 %i, 1 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j + %0 = load i8, i8* %t, align 1 ret i8 %0 } -; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#0{{ *}}+{{ *}}##ga) -define void @sf4(i32 %i, i8 zeroext %j) nounwind { +; CHECK-LABEL: test4 +; CHECK: memub(r{{[0-9]+}}<<#2+##ga) +define zeroext i8 @test4(i32 %i) nounwind readonly { entry: - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %i - store i8 %j, i8* %arrayidx, align 1 - ret void + %j = shl nsw i32 %i, 2 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j + %0 = load i8, i8* %t, align 1 + ret i8 %0 +} + +; CHECK-LABEL: test5 +; CHECK: memb(r{{[0-9]+}}<<#2+##ga) +define signext i8 @test5(i32 %i) nounwind readonly { +entry: + %j = shl nsw i32 %i, 2 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j + %0 = load i8, i8* %t, align 1 + ret i8 %0 } -; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#0{{ *}}+{{ *}}##gb) -define void @sf4s(i32 %i, i8 signext %j) nounwind { +; CHECK-LABEL: test10 +; CHECK: memb(r{{[0-9]+}}+##ga) +define void @test10(i32 %i, i8 zeroext %v) nounwind { entry: - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @gb, i32 0, i32 %i - store i8 %j, i8* %arrayidx, align 1 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %i + store i8 %v, i8* %t, align 1 ret void } -; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##ga) -define void @sf5(i32 %i, i8 zeroext %j) nounwind { +; CHECK-LABEL: test11 +; CHECK: memb(r{{[0-9]+}}<<#1+##ga) +define void @test11(i32 %i, i8 signext %v) nounwind { entry: - %mul = shl nsw i32 %i, 2 - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %mul - store i8 %j, i8* %arrayidx, align 1 + %j = shl nsw i32 %i, 1 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j + store i8 %v, i8* %t, align 1 ret void } -; CHECK: memb(r{{[0-9]+}}{{ *}}<<{{ *}}#2{{ *}}+{{ *}}##gb) -define void @sf5s(i32 %i, i8 signext %j) nounwind { +; CHECK-LABEL: test12 +; CHECK: memb(r{{[0-9]+}}<<#2+##ga) +define void @test12(i32 %i, i8 zeroext %v) nounwind { entry: - %mul = shl nsw i32 %i, 2 - %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @gb, i32 0, i32 %mul - store i8 %j, i8* %arrayidx, align 1 + %j = shl nsw i32 %i, 2 + %t = getelementptr inbounds [1024 x i8], [1024 x i8]* @ga, i32 0, i32 %j + store i8 %v, i8* %t, align 1 ret void } diff --git a/llvm/test/CodeGen/Hexagon/block-addr.ll b/llvm/test/CodeGen/Hexagon/block-addr.ll index 5af3a69..bd59e59 100644 --- a/llvm/test/CodeGen/Hexagon/block-addr.ll +++ b/llvm/test/CodeGen/Hexagon/block-addr.ll @@ -1,7 +1,6 @@ ; RUN: llc -march=hexagon < %s | FileCheck %s -; CHECK: .LJTI -; CHECK-DAG: r[[REG:[0-9]+]] = memw(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+<<#[0-9]+}}) +; CHECK-DAG: r[[REG:[0-9]+]] = memw(r{{[0-9]+<<#[0-9]+}}+##.LJTI{{.*}}) ; CHECK-DAG: jumpr r[[REG]] define void @main() #0 { diff --git a/llvm/test/CodeGen/Hexagon/hwloop-loop1.ll b/llvm/test/CodeGen/Hexagon/hwloop-loop1.ll index 427efdc..af908b6 100644 --- a/llvm/test/CodeGen/Hexagon/hwloop-loop1.ll +++ b/llvm/test/CodeGen/Hexagon/hwloop-loop1.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s +; RUN: llc -march=hexagon -mcpu=hexagonv5 -enable-pipeliner=0 < %s | FileCheck %s ; ; Generate loop1 instruction for double loop sequence. diff --git a/llvm/test/CodeGen/Hexagon/ifcvt-diamond-bug-2016-08-26.ll b/llvm/test/CodeGen/Hexagon/ifcvt-diamond-bug-2016-08-26.ll index 91b9aaa..19eb2d1 100644 --- a/llvm/test/CodeGen/Hexagon/ifcvt-diamond-bug-2016-08-26.ll +++ b/llvm/test/CodeGen/Hexagon/ifcvt-diamond-bug-2016-08-26.ll @@ -1,31 +1,34 @@ -; RUN: llc -march=hexagon -hexagon-eif=0 < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-eif=0 -disable-machine-sink < %s | FileCheck %s target triple = "hexagon" %struct.0 = type { i16, i16 } @t = external local_unnamed_addr global %struct.0, align 2 -define void @foo(i32 %p) local_unnamed_addr #0 { +define void @foo(i32 %p, i16 %x, i16 %y, i16 %z) local_unnamed_addr #0 { entry: %conv90 = trunc i32 %p to i16 %call105 = call signext i16 @bar(i16 signext 16384, i16 signext undef) #0 %call175 = call signext i16 @bar(i16 signext %conv90, i16 signext 4) #0 %call197 = call signext i16 @bar(i16 signext %conv90, i16 signext 4) #0 + %x1 = add i16 %x, 1 + %z1 = add i16 %z, 1 %cmp199 = icmp eq i16 %call197, 0 br i1 %cmp199, label %if.then200, label %if.else201 -; CHECK-DAG: [[R4:r[0-9]+]] = #4 +; CHECK-DAG: [[R4:r[0-9]+]] = add ; CHECK: p0 = cmp.eq(r0,#0) -; CHECK: if (!p0.new) [[R3:r[0-9]+]] = #3 +; CHECK: if (!p0) [[R3:r[0-9]+]] = add(r{{[0-9]+}},#3) ; CHECK-DAG: if (!p0) memh(##t) = [[R3]] ; CHECK-DAG: if (p0) memh(##t) = [[R4]] if.then200: ; preds = %entry - store i16 4, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 0), align 2 - store i16 0, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 1), align 2 + store i16 %x1, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 0), align 2 + store i16 %z1, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 1), align 2 br label %if.end202 if.else201: ; preds = %entry - store i16 3, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 0), align 2 + %y1 = add i16 %y, 3 + store i16 %y1, i16* getelementptr inbounds (%struct.0, %struct.0* @t, i32 0, i32 0), align 2 br label %if.end202 if.end202: ; preds = %if.else201, %if.then200 @@ -34,4 +37,4 @@ if.end202: ; preds = %if.else201, %if.the declare signext i16 @bar(i16 signext, i16 signext) local_unnamed_addr #0 -attributes #0 = { optsize "target-cpu"="hexagonv55" } +attributes #0 = { "target-cpu"="hexagonv55" } diff --git a/llvm/test/CodeGen/Hexagon/sdata-array.ll b/llvm/test/CodeGen/Hexagon/sdata-array.ll index 89ef460..cea86bd 100644 --- a/llvm/test/CodeGen/Hexagon/sdata-array.ll +++ b/llvm/test/CodeGen/Hexagon/sdata-array.ll @@ -5,9 +5,9 @@ @foo = common global [4 x i8] zeroinitializer, align 1 -define void @set() nounwind { +define void @set(i8 %x) nounwind { entry: - store i8 0, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @foo, i32 0, i32 0), align 1 + store i8 %x, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @foo, i32 0, i32 0), align 1 ret void } diff --git a/llvm/test/CodeGen/Hexagon/store-imm-amode.ll b/llvm/test/CodeGen/Hexagon/store-imm-amode.ll new file mode 100644 index 0000000..463559a --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/store-imm-amode.ll @@ -0,0 +1,97 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check that a store with a proper addressing mode is selected for various +; cases of storing an immediate value. + + +@var_i8 = global [10 x i8] zeroinitializer, align 8 + +; CHECK-LABEL: store_imm_i8: +; CHECK: memb(r0+#0) = #-1 +define void @store_imm_i8(i8* %p) nounwind { + store i8 255, i8* %p, align 4 + ret void +} + +; CHECK-LABEL: store_rr_i8: +; CHECK: [[RV:r[0-9]+]] = #255 +; CHECK: memb(r0+r1<<#0) = [[RV]] +define void @store_rr_i8(i8* %p, i32 %x) nounwind { + %t0 = getelementptr i8, i8* %p, i32 %x + store i8 255, i8* %t0, align 4 + ret void +} + +; CHECK-LABEL: store_io_i8: +; CHECK: [[RV:r[0-9]+]] = #255 +; CHECK: memb(r0+##var_i8) = [[RV]] +define void @store_io_i8(i32 %x) nounwind { + %t0 = getelementptr [10 x i8], [10 x i8]* @var_i8, i32 0, i32 %x + store i8 255, i8* %t0, align 4 + ret void +} + +; CHECK-LABEL: store_ur_i8: +; CHECK: [[RV:r[0-9]+]] = #255 +; CHECK: memb(r0<<#2+##var_i8) = [[RV]] +define void @store_ur_i8(i32 %x) nounwind { + %t0 = shl i32 %x, 2 + %t1 = getelementptr [10 x i8], [10 x i8]* @var_i8, i32 0, i32 %t0 + store i8 255, i8* %t1, align 4 + ret void +} + +@var_i16 = global [10 x i16] zeroinitializer, align 8 + +; CHECK-LABEL: store_imm_i16: +; CHECK: memh(r0+#0) = #-1 +define void @store_imm_i16(i16* %p) nounwind { + store i16 65535, i16* %p, align 4 + ret void +} + +; CHECK-LABEL: store_rr_i16: +; CHECK: [[RV:r[0-9]+]] = ##65535 +; CHECK: memh(r0+r1<<#1) = [[RV]] +define void @store_rr_i16(i16* %p, i32 %x) nounwind { + %t0 = getelementptr i16, i16* %p, i32 %x + store i16 65535, i16* %t0, align 4 + ret void +} + +; CHECK-LABEL: store_ur_i16: +; CHECK: [[RV:r[0-9]+]] = ##65535 +; CHECK: memh(r0<<#1+##var_i16) = [[RV]] +define void @store_ur_i16(i32 %x) nounwind { + %t0 = getelementptr [10 x i16], [10 x i16]* @var_i16, i32 0, i32 %x + store i16 65535, i16* %t0, align 4 + ret void +} + +@var_i32 = global [10 x i32] zeroinitializer, align 8 + +; CHECK-LABEL: store_imm_i32: +; CHECK: memw(r0+#0) = #-1 +define void @store_imm_i32(i32* %p) nounwind { + store i32 4294967295, i32* %p, align 4 + ret void +} + +; CHECK-LABEL: store_rr_i32: +; CHECK: [[RV:r[0-9]+]] = #-1 +; CHECK: memw(r0+r1<<#2) = [[RV]] +define void @store_rr_i32(i32* %p, i32 %x) nounwind { + %t0 = getelementptr i32, i32* %p, i32 %x + store i32 4294967295, i32* %t0, align 4 + ret void +} + +; CHECK-LABEL: store_ur_i32: +; CHECK: [[RV:r[0-9]+]] = #-1 +; CHECK: memw(r0<<#2+##var_i32) = [[RV]] +define void @store_ur_i32(i32 %x) nounwind { + %t0 = getelementptr [10 x i32], [10 x i32]* @var_i32, i32 0, i32 %x + store i32 4294967295, i32* %t0, align 4 + ret void +} + diff --git a/llvm/test/CodeGen/Hexagon/store-imm-stack-object.ll b/llvm/test/CodeGen/Hexagon/store-imm-stack-object.ll index 8de3109..c0eaea2 100644 --- a/llvm/test/CodeGen/Hexagon/store-imm-stack-object.ll +++ b/llvm/test/CodeGen/Hexagon/store-imm-stack-object.ll @@ -3,8 +3,7 @@ target triple = "hexagon" ; CHECK-LABEL: test1: -; CHECK: [[REG1:(r[0-9]+)]] = ##875770417 -; CHECK-DAG: memw(r29+#4) = [[REG1]] +; CHECK-DAG: memw(r29+#4) = ##875770417 ; CHECK-DAG: memw(r29+#8) = #51 ; CHECK-DAG: memh(r29+#12) = #50 ; CHECK-DAG: memb(r29+#15) = #49 diff --git a/llvm/test/CodeGen/Hexagon/store-shift.ll b/llvm/test/CodeGen/Hexagon/store-shift.ll index f7bed98..f92e23f 100644 --- a/llvm/test/CodeGen/Hexagon/store-shift.ll +++ b/llvm/test/CodeGen/Hexagon/store-shift.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=hexagon < %s | FileCheck %s -; CHECK-DAG: r[[BASE:[0-9]+]] += add +; CHECK-DAG: r[[BASE:[0-9]+]] = add(r1,#1000) ; CHECK-DAG: r[[IDX0:[0-9]+]] = add(r2,#5) ; CHECK-DAG: r[[IDX1:[0-9]+]] = add(r2,#6) ; CHECK-DAG: memw(r0+r[[IDX0]]<<#2) = r3 diff --git a/llvm/test/CodeGen/Hexagon/tfr-to-combine.ll b/llvm/test/CodeGen/Hexagon/tfr-to-combine.ll index 50879ff..86801db 100644 --- a/llvm/test/CodeGen/Hexagon/tfr-to-combine.ll +++ b/llvm/test/CodeGen/Hexagon/tfr-to-combine.ll @@ -6,30 +6,33 @@ @b = external global i16 @c = external global i16 -; Function Attrs: nounwind -define i64 @test1() #0 { +declare void @test0a(i32, i32) #0 +declare void @test0b(i32, i32, i32, i32) #0 + +; CHECK-LABEL: test1: ; CHECK: combine(#10,#0) +define i32 @test1() #0 { entry: - store i16 0, i16* @a, align 2 - store i16 10, i16* @b, align 2 - ret i64 10 + call void @test0a(i32 0, i32 10) #0 + ret i32 10 } -; Function Attrs: nounwind -define i64 @test2() #0 { +; CHECK-LABEL: test2: ; CHECK: combine(#0,r{{[0-9]+}}) +define i32 @test2() #0 { entry: - store i16 0, i16* @a, align 2 - %0 = load i16, i16* @c, align 2 - %conv2 = zext i16 %0 to i64 - ret i64 %conv2 + %t0 = load i16, i16* @c, align 2 + %t1 = zext i16 %t0 to i32 + call void @test0b(i32 %t1, i32 0, i32 %t1, i32 0) + ret i32 0 } -; Function Attrs: nounwind -define i64 @test4() #0 { +; CHECK-LABEL: test3: ; CHECK: combine(#0,#100) +define i32 @test3() #0 { entry: - store i16 100, i16* @b, align 2 - store i16 0, i16* @a, align 2 - ret i64 0 + call void @test0a(i32 100, i32 0) + ret i32 0 } + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/Hexagon/tls_pic.ll b/llvm/test/CodeGen/Hexagon/tls_pic.ll index 2c2be0d..c6e5f5a 100644 --- a/llvm/test/CodeGen/Hexagon/tls_pic.ll +++ b/llvm/test/CodeGen/Hexagon/tls_pic.ll @@ -5,8 +5,8 @@ ; CHECK-LABEL: test_initial_exec ; CHECK-DAG: = add(pc,##_GLOBAL_OFFSET_TABLE_@PCREL) -; CHECK-DAG: = ##src_ie@IEGOT -; CHECK-DAG: = ##dst_ie@IEGOT +; CHECK-DAG: ##src_ie@IEGOT +; CHECK-DAG: ##dst_ie@IEGOT ; CHECK-NOT: call define i32 @test_initial_exec() nounwind { entry: @@ -23,8 +23,8 @@ entry: ; CHECK-LABEL: test_dynamic ; CHECK-DAG: = add(pc,##_GLOBAL_OFFSET_TABLE_@PCREL) -; CHECK-DAG: = ##src_gd@GDGOT -; CHECK-DAG: = ##dst_gd@GDGOT +; CHECK-DAG: ##src_gd@GDGOT +; CHECK-DAG: ##dst_gd@GDGOT ; CHECK-DAG: call src_gd@GDPLT ; CHECK-DAG: call dst_gd@GDPLT diff --git a/llvm/test/CodeGen/Hexagon/tls_static.ll b/llvm/test/CodeGen/Hexagon/tls_static.ll index dbd3bd7..f4e882b 100644 --- a/llvm/test/CodeGen/Hexagon/tls_static.ll +++ b/llvm/test/CodeGen/Hexagon/tls_static.ll @@ -4,8 +4,8 @@ @src_le = thread_local global i32 0, align 4 ; CHECK-LABEL: test_local_exec -; CHECK-DAG: = ##src_le@TPREL -; CHECK-DAG: = ##dst_le@TPREL +; CHECK-DAG: ##src_le@TPREL +; CHECK-DAG: ##dst_le@TPREL define i32 @test_local_exec() nounwind { entry: %0 = load i32, i32* @src_le, align 4 diff --git a/llvm/test/CodeGen/Hexagon/vect/vect-load-1.ll b/llvm/test/CodeGen/Hexagon/vect/vect-load-1.ll index fbaf61d..0c3aaef 100644 --- a/llvm/test/CodeGen/Hexagon/vect/vect-load-1.ll +++ b/llvm/test/CodeGen/Hexagon/vect/vect-load-1.ll @@ -1,11 +1,10 @@ ; RUN: llc -march=hexagon < %s -; Used to fail with "Cannot select: v2i32,ch = load 0x16c5890, 0x16f76e0, 0x16f76e0", 0x16c5890, 0x16f76e0, 0x16f76e0" +; +; Used to fail with "Cannot select: v2i32,ch = load 0x16c5890, 0x16f76e0, 0x16f76e0" -; ModuleID = 'bugpoint-reduced-simplified.bc' -target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32" target triple = "hexagon-unknown-linux-gnu" -define void @foo() nounwind { +define void @foo(<2 x i8>* %p) nounwind { entry: br label %polly.loop_header @@ -17,7 +16,7 @@ polly.loop_header: ; preds = %polly.loop_body, %e br i1 %0, label %polly.loop_body, label %polly.loop_after polly.loop_body: ; preds = %polly.loop_header - %_p_vec_full = load <2 x i8>, <2 x i8>* undef, align 8 + %_p_vec_full = load <2 x i8>, <2 x i8>* %p, align 8 %1 = sext <2 x i8> %_p_vec_full to <2 x i32> %p_vec = mul <2 x i32> %1, %mulp_vec = add <2 x i32> %p_vec, diff --git a/llvm/test/CodeGen/Hexagon/vect/vect-mul-v4i8.ll b/llvm/test/CodeGen/Hexagon/vect/vect-mul-v4i8.ll index d60d014..5ebc3372 100644 --- a/llvm/test/CodeGen/Hexagon/vect/vect-mul-v4i8.ll +++ b/llvm/test/CodeGen/Hexagon/vect/vect-mul-v4i8.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s -; CHECK: vmpybsu +; CHECK: vmpybu ; CHECK: vtrunehb define <4 x i8> @t_i4x8(<4 x i8> %a, <4 x i8> %b) nounwind { diff --git a/llvm/test/CodeGen/Hexagon/vect/vect-mul-v8i8.ll b/llvm/test/CodeGen/Hexagon/vect/vect-mul-v8i8.ll index a84cd00..aee0437 100644 --- a/llvm/test/CodeGen/Hexagon/vect/vect-mul-v8i8.ll +++ b/llvm/test/CodeGen/Hexagon/vect/vect-mul-v8i8.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s -; CHECK: vmpybsu -; CHECK: vmpybsu +; CHECK: vmpybu +; CHECK: vmpybu define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind { entry: -- 2.7.4