From 982b8e0bbb38d7d411716192fc398500606ecedd Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 14 Dec 2022 12:10:03 +0000 Subject: [PATCH] [WebAssembly][NFC] Add ComplexPattern for loads This refactors out the offset and address operand pattern matching into a ComplexPattern, so that one pattern fragment can match the dynamic and static (offset) addresses in all possible positions. Split out from D139530, which also contained an improvement to global address folding. Differential Revision: https://reviews.llvm.org/D139631 --- .../Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp | 107 ++++++++++- .../Target/WebAssembly/WebAssemblyInstrAtomics.td | 76 ++------ .../Target/WebAssembly/WebAssemblyInstrMemory.td | 208 ++++++--------------- .../lib/Target/WebAssembly/WebAssemblyInstrSIMD.td | 30 +-- 4 files changed, 174 insertions(+), 247 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index 7e75989..777c520 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -43,8 +43,7 @@ class WebAssemblyDAGToDAGISel final : public SelectionDAGISel { public: WebAssemblyDAGToDAGISel(WebAssemblyTargetMachine &TM, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(TM, OptLevel), Subtarget(nullptr) { - } + : SelectionDAGISel(TM, OptLevel), Subtarget(nullptr) {} StringRef getPassName() const override { return "WebAssembly Instruction Selection"; @@ -67,11 +66,19 @@ public: bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector &OutOps) override; + bool SelectLoadOperands32(SDValue Op, SDValue &Offset, SDValue &Addr); + bool SelectLoadOperands64(SDValue Op, SDValue &Offset, SDValue &Addr); + // Include the pieces autogenerated from the target description. #include "WebAssemblyGenDAGISel.inc" private: // add select functions here... + + bool SelectLoadOperands(MVT AddrType, unsigned ConstOpc, SDValue Op, + SDValue &Offset, SDValue &Addr); + bool SelectLoadAddOperands(MVT OffsetType, SDValue N, SDValue &Offset, + SDValue &Addr); }; } // end anonymous namespace @@ -281,6 +288,102 @@ bool WebAssemblyDAGToDAGISel::SelectInlineAsmMemoryOperand( return true; } +bool WebAssemblyDAGToDAGISel::SelectLoadAddOperands(MVT OffsetType, SDValue N, + SDValue &Offset, + SDValue &Addr) { + assert(N.getNumOperands() == 2 && "Attempting to fold in a non-binary op"); + + // WebAssembly constant offsets are performed as unsigned with infinite + // precision, so we need to check for NoUnsignedWrap so that we don't fold an + // offset for an add that needs wrapping. + if (N.getOpcode() == ISD::ADD && !N.getNode()->getFlags().hasNoUnsignedWrap()) + return false; + + // Folds constants in an add into the offset. + for (size_t i = 0; i < 2; ++i) { + SDValue Op = N.getOperand(i); + SDValue OtherOp = N.getOperand(i == 0 ? 1 : 0); + + if (ConstantSDNode *CN = dyn_cast(Op)) { + Offset = + CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), OffsetType); + Addr = OtherOp; + return true; + } + } + return false; +} + +bool WebAssemblyDAGToDAGISel::SelectLoadOperands(MVT AddrType, + unsigned ConstOpc, SDValue N, + SDValue &Offset, + SDValue &Addr) { + SDLoc DL(N); + + // Fold target global addresses into the offset. + if (!TM.isPositionIndependent()) { + SDValue Op(N); + if (Op.getOpcode() == WebAssemblyISD::Wrapper) + Op = Op.getOperand(0); + + if (Op.getOpcode() == ISD::TargetGlobalAddress) { + Offset = Op; + Addr = SDValue( + CurDAG->getMachineNode(ConstOpc, DL, AddrType, + CurDAG->getTargetConstant(0, DL, AddrType)), + 0); + return true; + } + } + + // Fold anything inside an add into the offset. + if (N.getOpcode() == ISD::ADD && + SelectLoadAddOperands(AddrType, N, Offset, Addr)) + return true; + + // Likewise, treat an 'or' node as an 'add' if the or'ed bits are known to be + // zero and fold them into the offset too. + if (N.getOpcode() == ISD::OR) { + bool OrIsAdd; + if (ConstantSDNode *CN = dyn_cast(N.getOperand(1))) { + OrIsAdd = + CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); + } else { + KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0); + KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0); + OrIsAdd = (~Known0.Zero & ~Known1.Zero) == 0; + } + + if (OrIsAdd && SelectLoadAddOperands(AddrType, N, Offset, Addr)) + return true; + } + + // Fold constant addresses into the offset. + if (ConstantSDNode *CN = dyn_cast(N)) { + Offset = CurDAG->getTargetConstant(CN->getZExtValue(), DL, AddrType); + Addr = SDValue( + CurDAG->getMachineNode(ConstOpc, DL, AddrType, + CurDAG->getTargetConstant(0, DL, AddrType)), + 0); + return true; + } + + // Else it's a plain old load with no offset. + Offset = CurDAG->getTargetConstant(0, DL, AddrType); + Addr = N; + return true; +} + +bool WebAssemblyDAGToDAGISel::SelectLoadOperands32(SDValue Op, SDValue &Offset, + SDValue &Addr) { + return SelectLoadOperands(MVT::i32, WebAssembly::CONST_I32, Op, Offset, Addr); +} + +bool WebAssemblyDAGToDAGISel::SelectLoadOperands64(SDValue Op, SDValue &Offset, + SDValue &Addr) { + return SelectLoadOperands(MVT::i64, WebAssembly::CONST_I64, Op, Offset, Addr); +} + /// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready /// for instruction scheduling. FunctionPass *llvm::createWebAssemblyISelDag(WebAssemblyTargetMachine &TM, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td index ed80ed3..1ed74da 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -226,25 +226,9 @@ multiclass AtomicLoad { defm ATOMIC_LOAD_I32 : AtomicLoad; defm ATOMIC_LOAD_I64 : AtomicLoad; -// Select loads with no constant offset. -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; - -// Select loads with a constant offset. - -// Pattern with address + immediate offset -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; - -// Select loads with just a constant offset. -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; - -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; - +// Select loads +defm : LoadPat; +defm : LoadPat; // Extending loads. Note that there are only zero-extending atomic loads, no // sign-extending loads. @@ -283,54 +267,18 @@ def sext_aload_8_64 : def sext_aload_16_64 : PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_16 node:$addr)))>; -// Select zero-extending loads with no constant offset. -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; +// Select zero-extending loads +defm : LoadPat; +defm : LoadPat; +defm : LoadPat; -// Select sign-extending loads with no constant offset -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; +// Select sign-extending loads +defm : LoadPat; +defm : LoadPat; +defm : LoadPat; +defm : LoadPat; // 32->64 sext load gets selected as i32.atomic.load, i64.extend_i32_s -// Zero-extending loads with constant offset -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; - -// Sign-extending loads with constant offset -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -// No 32->64 patterns, just use i32.atomic.load and i64.extend_s/i64 - -// Extending loads with just a constant offset -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; - -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; - //===----------------------------------------------------------------------===// // Atomic stores diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index d5bb9e9..cbb7140 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -66,70 +66,6 @@ defm LOAD_I64 : WebAssemblyLoad; defm LOAD_F32 : WebAssemblyLoad; defm LOAD_F64 : WebAssemblyLoad; -// Select loads with no constant offset. -multiclass LoadPatNoOffset { - def : Pat<(ty (kind I32:$addr)), (!cast(inst # "_A32") 0, 0, I32:$addr)>, - Requires<[HasAddr32]>; - def : Pat<(ty (kind (i64 I64:$addr))), (!cast(inst # "_A64") 0, 0, I64:$addr)>, - Requires<[HasAddr64]>; -} - -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; - -// Select loads with a constant offset. - -// Pattern with address + immediate offset -multiclass LoadPatImmOff { - def : Pat<(ty (kind (operand I32:$addr, imm:$off))), - (!cast(inst # "_A32") 0, imm:$off, I32:$addr)>, - Requires<[HasAddr32]>; - def : Pat<(ty (kind (operand I64:$addr, imm:$off))), - (!cast(inst # "_A64") 0, imm:$off, I64:$addr)>, - Requires<[HasAddr64]>; -} - -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; - -// Select loads with just a constant offset. -multiclass LoadPatOffsetOnly { - def : Pat<(ty (kind imm:$off)), - (!cast(inst # "_A32") 0, imm:$off, (CONST_I32 0))>, - Requires<[HasAddr32]>; - def : Pat<(ty (kind imm:$off)), - (!cast(inst # "_A64") 0, imm:$off, (CONST_I64 0))>, - Requires<[HasAddr64]>; -} - -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; - -multiclass LoadPatGlobalAddrOffOnly { - def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off))), - (!cast(inst # "_A32") 0, tglobaladdr:$off, (CONST_I32 0))>, - Requires<[IsNotPIC, HasAddr32]>; - def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off))), - (!cast(inst # "_A64") 0, tglobaladdr:$off, (CONST_I64 0))>, - Requires<[IsNotPIC, HasAddr64]>; -} - -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; - // Extending load. defm LOAD8_S_I32 : WebAssemblyLoad; defm LOAD8_U_I32 : WebAssemblyLoad; @@ -142,98 +78,58 @@ defm LOAD16_U_I64 : WebAssemblyLoad; defm LOAD32_S_I64 : WebAssemblyLoad; defm LOAD32_U_I64 : WebAssemblyLoad; -// Select extending loads with no constant offset. -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; - -// Select extending loads with a constant offset. -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; - -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; - -// Select extending loads with just a constant offset. -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; - -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; - -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; - -// Resolve "don't care" extending loads to zero-extending loads. This is -// somewhat arbitrary, but zero-extending is conceptually simpler. - -// Select "don't care" extending loads with no constant offset. -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; -defm : LoadPatNoOffset; - -// Select "don't care" extending loads with a constant offset. -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatImmOff; - -// Select "don't care" extending loads with just a constant offset. -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; -defm : LoadPatOffsetOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; -defm : LoadPatGlobalAddrOffOnly; +// Pattern matching + +// Patterns that match the static (offset) and dynamic (address stack operand) +// operand) operands for loads, based on a combination of target global +// addresses and constants. + +// For example, +// (add tga x) -> load tga(x) +// tga -> load tga(0) +// (add const x) -> load const(x) +// const -> load const(0) +// x -> load 0(x) +def LoadOps32 : ComplexPattern; +def LoadOps64 : ComplexPattern; + +multiclass LoadPat { + def : Pat<(ty (kind (LoadOps32 offset32_op:$offset, I32:$addr))), + (!cast(Name # "_A32") 0, + offset32_op:$offset, + I32:$addr)>, + Requires<[HasAddr32]>; + + def : Pat<(ty (kind (LoadOps64 offset64_op:$offset, I64:$addr))), + (!cast(Name # "_A64") 0, + offset64_op:$offset, + I64:$addr)>, + Requires<[HasAddr64]>; + +} + +defm : LoadPat; +defm : LoadPat; +defm : LoadPat; +defm : LoadPat; + +defm : LoadPat; +defm : LoadPat; +defm : LoadPat; +defm : LoadPat; +defm : LoadPat; + +defm : LoadPat; +defm : LoadPat; +defm : LoadPat; +defm : LoadPat; +defm : LoadPat; + +defm : LoadPat; +defm : LoadPat; +defm : LoadPat; +defm : LoadPat; +defm : LoadPat; // Defines atomic and non-atomic stores, regular and truncating multiclass WebAssemblyStore; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatOffsetOnly; -defm : LoadPatGlobalAddrOffOnly; +defm : LoadPat; } // v128.loadX_splat @@ -206,11 +202,7 @@ def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>; foreach vec = AllVecs in { defvar inst = "LOAD"#vec.lane_bits#"_SPLAT"; -defm : LoadPatNoOffset; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatOffsetOnly; -defm : LoadPatGlobalAddrOffOnly; +defm : LoadPat; } // Load and extend @@ -255,11 +247,7 @@ foreach exts = [["sextloadvi", "_S"], ["extloadvi", "_U"]] in { defvar loadpat = !cast(exts[0]#vec.split.lane_bits); defvar inst = "LOAD_EXTEND"#exts[1]#"_"#vec; -defm : LoadPatNoOffset; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatOffsetOnly; -defm : LoadPatGlobalAddrOffOnly; +defm : LoadPat; } // Load lane into zero vector @@ -289,11 +277,7 @@ defm "" : SIMDLoadZero; foreach vec = [I32x4, I64x2] in { defvar inst = "LOAD_ZERO_"#vec; defvar pat = PatFrag<(ops node:$addr), (scalar_to_vector (vec.lane_vt (load $addr)))>; - defm : LoadPatNoOffset; - defm : LoadPatImmOff; - defm : LoadPatImmOff; - defm : LoadPatOffsetOnly; - defm : LoadPatGlobalAddrOffOnly; + defm : LoadPat; } // TODO: f32x4 and f64x2 as well @@ -301,11 +285,7 @@ foreach vec = [I32x4, I64x2] in { defvar inst = "LOAD_ZERO_"#vec; defvar pat = PatFrag<(ops node:$ptr), (vector_insert (vec.splat (vec.lane_vt 0)), (vec.lane_vt (load $ptr)), 0)>; - defm : LoadPatNoOffset; - defm : LoadPatImmOff; - defm : LoadPatImmOff; - defm : LoadPatOffsetOnly; - defm : LoadPatGlobalAddrOffOnly; + defm : LoadPat; } // Load lane -- 2.7.4