[SDAG] Add new AssertAlign ISD node.

author Michael Liao <michael.hliao@gmail.com>

Mon, 25 May 2020 03:20:57 +0000 (23:20 -0400)

committer Michael Liao <michael.hliao@gmail.com>

Tue, 23 Jun 2020 04:51:11 +0000 (00:51 -0400)
author Michael Liao <michael.hliao@gmail.com>
Mon, 25 May 2020 03:20:57 +0000 (23:20 -0400)
committer Michael Liao <michael.hliao@gmail.com>
Tue, 23 Jun 2020 04:51:11 +0000 (00:51 -0400)
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h

index cf3afd8aeabc020cd2634559a988ea0d3a1221e4..57ff02fd907f49adf67b8f422dba1f0b1a563b38 100644 (file)
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -58,6 +58,7 @@ enum NodeType {
    /// of the extension
    AssertSext,
    AssertZext,
+  AssertAlign,
  
    /// Various leaf nodes.
    BasicBlock,
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h

index f9706ee9f4e4e36d4a9e5045eec850db78ef0537..e084c42b2ffd80b61ae9df310febb8a70624f9e8 100644 (file)
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1342,6 +1342,9 @@ public:
    /// Return a freeze using the SDLoc of the value operand.
    SDValue getFreeze(SDValue V);
  
+  /// Return an AssertAlignSDNode.
+  SDValue getAssertAlign(const SDLoc &DL, SDValue V, Align A);
+
    /// Return the specified value casted to
    /// the target's desired shift amount type.
    SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op);
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h

index ee2e8ecbd67a994f2643ab5779110443f74be7de..9e98636acf633fd5fd10fdf53dab3e2966aa2524 100644 (file)
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -2526,6 +2526,22 @@ public:
    }
  };
  
+/// An SDNode that records if a register contains a value that is guaranteed to
+/// be aligned accordingly.
+class AssertAlignSDNode : public SDNode {
+  Align Alignment;
+
+public:
+  AssertAlignSDNode(unsigned Order, const DebugLoc &DL, EVT VT, Align A)
+      : SDNode(ISD::AssertAlign, Order, DL, getSDVTList(VT)), Alignment(A) {}
+
+  Align getAlign() const { return Alignment; }
+
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::AssertAlign;
+  }
+};
+
  class SDNodeIterator : public std::iterator<std::forward_iterator_tag,
                                              SDNode, ptrdiff_t> {
    const SDNode *Node;
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td

index 1f5cfba41a36c43f1a6ceca9d9af2a9526c4ac78..de809bb10d499a8931c729842cff80cbcf0299de 100644 (file)
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -667,10 +667,11 @@ def intrinsic_w_chain : SDNode<"ISD::INTRINSIC_W_CHAIN",
  def intrinsic_wo_chain : SDNode<"ISD::INTRINSIC_WO_CHAIN",
                                  SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>, []>;
  
-def SDT_assertext : SDTypeProfile<1, 1,
+def SDT_assert : SDTypeProfile<1, 1,
    [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>;
-def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>;
-def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>;
+def assertsext : SDNode<"ISD::AssertSext", SDT_assert>;
+def assertzext : SDNode<"ISD::AssertZext", SDT_assert>;
+def assertalign : SDNode<"ISD::AssertAlign", SDT_assert>;
  
  
  //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

index 01a771ebe8a7eb3f40540d826eba55bb07f339d6..0cf6f99e3754e04705409514a89098acb13bfb0c 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -455,6 +455,7 @@ namespace {
      SDValue visitZERO_EXTEND(SDNode *N);
      SDValue visitANY_EXTEND(SDNode *N);
      SDValue visitAssertExt(SDNode *N);
+    SDValue visitAssertAlign(SDNode *N);
      SDValue visitSIGN_EXTEND_INREG(SDNode *N);
      SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
      SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
@@ -1601,6 +1602,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
    case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
    case ISD::AssertSext:
    case ISD::AssertZext:         return visitAssertExt(N);
+  case ISD::AssertAlign:        return visitAssertAlign(N);
    case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
    case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
    case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
@@ -10696,6 +10698,45 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) {
    return SDValue();
  }
  
+SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
+  SDLoc DL(N);
+
+  Align AL = cast<AssertAlignSDNode>(N)->getAlign();
+  SDValue N0 = N->getOperand(0);
+
+  // Fold (assertalign (assertalign x, AL0), AL1) ->
+  // (assertalign x, max(AL0, AL1))
+  if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
+    return DAG.getAssertAlign(DL, N0.getOperand(0),
+                              std::max(AL, AAN->getAlign()));
+
+  // In rare cases, there are trivial arithmetic ops in source operands. Sink
+  // this assert down to source operands so that those arithmetic ops could be
+  // exposed to the DAG combining.
+  switch (N0.getOpcode()) {
+  default:
+    break;
+  case ISD::ADD:
+  case ISD::SUB: {
+    unsigned AlignShift = Log2(AL);
+    SDValue LHS = N0.getOperand(0);
+    SDValue RHS = N0.getOperand(1);
+    unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
+    unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
+    if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
+      if (LHSAlignShift < AlignShift)
+        LHS = DAG.getAssertAlign(DL, LHS, AL);
+      if (RHSAlignShift < AlignShift)
+        RHS = DAG.getAssertAlign(DL, RHS, AL);
+      return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
+    }
+    break;
+  }
+  }
+
+  return SDValue();
+}
+
  /// If the result of a wider load is shifted to right of N  bits and then
  /// truncated to a narrower type and where N is a multiple of number of bits of
  /// the narrower type, transform it to a narrower load from address + N / num of
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index 9ea86de65bc1faf479492ea1e5a0bba72a770df2..9b6b53f591abbb34f5b83c44f80032d9403d3f47 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3166,6 +3166,15 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
      Known.One  &= (~Known.Zero);
      break;
    }
+  case ISD::AssertAlign: {
+    unsigned LogOfAlign = Log2(cast<AssertAlignSDNode>(Op)->getAlign());
+    assert(LogOfAlign != 0);
+    // If a node is guaranteed to be aligned, set low zero bits accordingly as
+    // well as clearing one bits.
+    Known.Zero.setLowBits(LogOfAlign);
+    Known.One.clearLowBits(LogOfAlign);
+    break;
+  }
    case ISD::FGETSIGN:
      // All bits are zero except the low bit.
      Known.Zero.setBitsFrom(1);
@@ -5186,6 +5195,34 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
    return SDValue();
  }
  
+SDValue SelectionDAG::getAssertAlign(const SDLoc &DL, SDValue Val, Align A) {
+  assert(Val.getValueType().isInteger() && "Invalid AssertAlign!");
+
+  // There's no need to assert on a byte-aligned pointer. All pointers are at
+  // least byte aligned.
+  if (A == Align(1))
+    return Val;
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::AssertAlign, getVTList(Val.getValueType()), {Val});
+  ID.AddInteger(A.value());
+
+  void *IP = nullptr;
+  if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
+    return SDValue(E, 0);
+
+  auto *N = newSDNode<AssertAlignSDNode>(DL.getIROrder(), DL.getDebugLoc(),
+                                         Val.getValueType(), A);
+  createOperands(N, {Val});
+
+  CSEMap.InsertNode(N, IP);
+  InsertNode(N);
+
+  SDValue V(N, 0);
+  NewSDValueDbgMsg(V, "Creating new node: ", this);
+  return V;
+}
+
  SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
                                SDValue N1, SDValue N2, const SDNodeFlags Flags) {
    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index 1c6843489b7d522244fedd765fe3db8c84100c60..161cbc6b11898172baddb26dba0ee726eccf5a8f 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -135,6 +135,11 @@ using namespace SwitchCG;
  /// some float libcalls (6, 8 or 12 bits).
  static unsigned LimitFloatPrecision;
  
+static cl::opt<bool>
+    InsertAssertAlign("insert-assert-align", cl::init(true),
+                      cl::desc("Insert the experimental `assertalign` node."),
+                      cl::ReallyHidden);
+
  static cl::opt<unsigned, true>
      LimitFPPrecision("limit-float-precision",
                       cl::desc("Generate low-precision inline sequences "
@@ -4747,6 +4752,15 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
      } else
        Result = lowerRangeToAssertZExt(DAG, I, Result);
  
+    MaybeAlign Alignment = I.getRetAlign();
+    if (!Alignment)
+      Alignment = F->getAttributes().getRetAlignment();
+    // Insert `assertalign` node if there's an alignment.
+    if (InsertAssertAlign && Alignment) {
+      Result =
+          DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne());
+    }
+
      setValue(&I, Result);
    }
  }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp

index 7f9b8b7b28a387a19dcf3267390905bd7d2b211d..42e3016e65b83c0615054012aa32c97ab8df15cd 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -106,6 +106,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
    case ISD::TokenFactor:                return "TokenFactor";
    case ISD::AssertSext:                 return "AssertSext";
    case ISD::AssertZext:                 return "AssertZext";
+  case ISD::AssertAlign:                return "AssertAlign";
  
    case ISD::BasicBlock:                 return "BasicBlock";
    case ISD::VALUETYPE:                  return "ValueType";
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

index 733762339d6122168f4899caa4a5f22c54339cf7..51afccdcb645e48590d07a0f12f251eed66042c0 100644 (file)
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2820,6 +2820,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
      return;
    case ISD::AssertSext:
    case ISD::AssertZext:
+  case ISD::AssertAlign:
      ReplaceUses(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0));
      CurDAG->RemoveDeadNode(NodeToMatch);
      return;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

index 998e7f050a346ce3680372f9cdd9206e5404cbd7..aaf448346b5331d4593ffae263e88cf87b878198 100644 (file)
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1628,6 +1628,37 @@ static MemSDNode* findMemSDNode(SDNode *N) {
    llvm_unreachable("cannot find MemSDNode in the pattern!");
  }
  
+static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
+                                          SDValue &N0, SDValue &N1) {
+  if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST &&
+      Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+    // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e.
+    // (i64 (bitcast (v2i32 (build_vector
+    //                        (or (extract_vector_elt V, 0), OFFSET),
+    //                        (extract_vector_elt V, 1)))))
+    SDValue Lo = Addr.getOperand(0).getOperand(0);
+    if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) {
+      SDValue BaseLo = Lo.getOperand(0);
+      SDValue BaseHi = Addr.getOperand(0).getOperand(1);
+      // Check that split base (Lo and Hi) are extracted from the same one.
+      if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+          BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+          BaseLo.getOperand(0) == BaseHi.getOperand(0) &&
+          // Lo is statically extracted from index 0.
+          isa<ConstantSDNode>(BaseLo.getOperand(1)) &&
+          BaseLo.getConstantOperandVal(1) == 0 &&
+          // Hi is statically extracted from index 0.
+          isa<ConstantSDNode>(BaseHi.getOperand(1)) &&
+          BaseHi.getConstantOperandVal(1) == 1) {
+        N0 = BaseLo.getOperand(0).getOperand(0);
+        N1 = Lo.getOperand(1);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
  template <bool IsSigned>
  bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
                                            SDValue Addr,
@@ -1638,84 +1669,91 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
  
    if (Subtarget->hasFlatInstOffsets() &&
        (!Subtarget->hasFlatSegmentOffsetBug() ||
-       findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS) &&
-      CurDAG->isBaseWithConstantOffset(Addr)) {
-    SDValue N0 = Addr.getOperand(0);
-    SDValue N1 = Addr.getOperand(1);
-    uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
-
-    const SIInstrInfo *TII = Subtarget->getInstrInfo();
-    unsigned AS = findMemSDNode(N)->getAddressSpace();
-    if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) {
-      Addr = N0;
-      OffsetVal = COffsetVal;
-    } else {
-      // If the offset doesn't fit, put the low bits into the offset field and
-      // add the rest.
-
-      SDLoc DL(N);
-      uint64_t ImmField;
-      const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned);
-      if (IsSigned) {
-        ImmField = SignExtend64(COffsetVal, NumBits);
-
-        // Don't use a negative offset field if the base offset is positive.
-        // Since the scheduler currently relies on the offset field, doing so
-        // could result in strange scheduling decisions.
-
-        // TODO: Should we not do this in the opposite direction as well?
-        if (static_cast<int64_t>(COffsetVal) > 0) {
-          if (static_cast<int64_t>(ImmField) < 0) {
-            const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits - 1);
-            ImmField = COffsetVal & OffsetMask;
+       findMemSDNode(N)->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)) {
+    SDValue N0, N1;
+    if (CurDAG->isBaseWithConstantOffset(Addr)) {
+      N0 = Addr.getOperand(0);
+      N1 = Addr.getOperand(1);
+    } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
+      assert(N0 && N1 && isa<ConstantSDNode>(N1));
+    }
+    if (N0 && N1) {
+      uint64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
+
+      const SIInstrInfo *TII = Subtarget->getInstrInfo();
+      unsigned AS = findMemSDNode(N)->getAddressSpace();
+      if (TII->isLegalFLATOffset(COffsetVal, AS, IsSigned)) {
+        Addr = N0;
+        OffsetVal = COffsetVal;
+      } else {
+        // If the offset doesn't fit, put the low bits into the offset field and
+        // add the rest.
+
+        SDLoc DL(N);
+        uint64_t ImmField;
+        const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned);
+        if (IsSigned) {
+          ImmField = SignExtend64(COffsetVal, NumBits);
+
+          // Don't use a negative offset field if the base offset is positive.
+          // Since the scheduler currently relies on the offset field, doing so
+          // could result in strange scheduling decisions.
+
+          // TODO: Should we not do this in the opposite direction as well?
+          if (static_cast<int64_t>(COffsetVal) > 0) {
+            if (static_cast<int64_t>(ImmField) < 0) {
+              const uint64_t OffsetMask =
+                  maskTrailingOnes<uint64_t>(NumBits - 1);
+              ImmField = COffsetVal & OffsetMask;
+            }
            }
+        } else {
+          // TODO: Should we do this for a negative offset?
+          const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits);
+          ImmField = COffsetVal & OffsetMask;
          }
-      } else {
-        // TODO: Should we do this for a negative offset?
-        const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits);
-        ImmField = COffsetVal & OffsetMask;
-      }
  
-      uint64_t RemainderOffset = COffsetVal - ImmField;
+        uint64_t RemainderOffset = COffsetVal - ImmField;
  
-      assert(TII->isLegalFLATOffset(ImmField, AS, IsSigned));
-      assert(RemainderOffset + ImmField == COffsetVal);
+        assert(TII->isLegalFLATOffset(ImmField, AS, IsSigned));
+        assert(RemainderOffset + ImmField == COffsetVal);
  
-      OffsetVal = ImmField;
+        OffsetVal = ImmField;
  
-      // TODO: Should this try to use a scalar add pseudo if the base address is
-      // uniform and saddr is usable?
-      SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
-      SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
+        // TODO: Should this try to use a scalar add pseudo if the base address
+        // is uniform and saddr is usable?
+        SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
+        SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
  
-      SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
-                                            DL, MVT::i32, N0, Sub0);
-      SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
-                                            DL, MVT::i32, N0, Sub1);
+        SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+                                              MVT::i32, N0, Sub0);
+        SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+                                              MVT::i32, N0, Sub1);
  
-      SDValue AddOffsetLo
-        = getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
-      SDValue AddOffsetHi
-        = getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
+        SDValue AddOffsetLo =
+            getMaterializedScalarImm32(Lo_32(RemainderOffset), DL);
+        SDValue AddOffsetHi =
+            getMaterializedScalarImm32(Hi_32(RemainderOffset), DL);
  
-      SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
-      SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
+        SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1);
+        SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
  
-      SDNode *Add = CurDAG->getMachineNode(
-        AMDGPU::V_ADD_I32_e64, DL, VTs,
-        {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
+        SDNode *Add =
+            CurDAG->getMachineNode(AMDGPU::V_ADD_I32_e64, DL, VTs,
+                                   {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
  
-      SDNode *Addc = CurDAG->getMachineNode(
-        AMDGPU::V_ADDC_U32_e64, DL, VTs,
-        {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
+        SDNode *Addc = CurDAG->getMachineNode(
+            AMDGPU::V_ADDC_U32_e64, DL, VTs,
+            {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp});
  
-      SDValue RegSequenceArgs[] = {
-        CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
-        SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1
-      };
+        SDValue RegSequenceArgs[] = {
+            CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32),
+            SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1};
  
-      Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
-                                            MVT::i64, RegSequenceArgs), 0);
+        Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+                                              MVT::i64, RegSequenceArgs),
+                       0);
+      }
      }
    }
  
@@ -1824,15 +1862,21 @@ bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
    // A 32-bit (address + offset) should not cause unsigned 32-bit integer
    // wraparound, because s_load instructions perform the addition in 64 bits.
    if ((Addr.getValueType() != MVT::i32 ||
-       Addr->getFlags().hasNoUnsignedWrap()) &&
-      (CurDAG->isBaseWithConstantOffset(Addr) ||
-       Addr.getOpcode() == ISD::ADD)) {
-    SDValue N0 = Addr.getOperand(0);
-    SDValue N1 = Addr.getOperand(1);
-
-    if (SelectSMRDOffset(N1, Offset, Imm)) {
-      SBase = Expand32BitAddress(N0);
-      return true;
+       Addr->getFlags().hasNoUnsignedWrap())) {
+    SDValue N0, N1;
+    // Extract the base and offset if possible.
+    if (CurDAG->isBaseWithConstantOffset(Addr) ||
+        Addr.getOpcode() == ISD::ADD) {
+      N0 = Addr.getOperand(0);
+      N1 = Addr.getOperand(1);
+    } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) {
+      assert(N0 && N1 && isa<ConstantSDNode>(N1));
+    }
+    if (N0 && N1) {
+      if (SelectSMRDOffset(N1, Offset, Imm)) {
+        SBase = Expand32BitAddress(N0);
+        return true;
+      }
      }
    }
    SBase = Expand32BitAddress(Addr);
diff --git a/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll b/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll

index 16c6676ae582af5fb0879b2240695e0e20157d1c..2b8eba5f90149a3684c833daed728a99b81f3f36 100644 (file)
--- a/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll
@@ -30,10 +30,9 @@ define void @local_store_i56(i56 addrspace(3)* %ptr, i56 %arg) #0 {
  define amdgpu_kernel void @local_store_i55(i55 addrspace(3)* %ptr, i55 %arg) #0 {
  ; HAWAII-LABEL: local_store_i55:
  ; HAWAII:       ; %bb.0:
-; HAWAII-NEXT:    s_add_u32 s0, s4, 14
-; HAWAII-NEXT:    s_addc_u32 s1, s5, 0
+; HAWAII-NEXT:    s_or_b32 s0, s4, 14
  ; HAWAII-NEXT:    v_mov_b32_e32 v0, s0
-; HAWAII-NEXT:    v_mov_b32_e32 v1, s1
+; HAWAII-NEXT:    v_mov_b32_e32 v1, s5
  ; HAWAII-NEXT:    flat_load_ubyte v0, v[0:1]
  ; HAWAII-NEXT:    s_load_dword s0, s[4:5], 0x0
  ; HAWAII-NEXT:    s_load_dword s1, s[4:5], 0x2
@@ -52,27 +51,26 @@ define amdgpu_kernel void @local_store_i55(i55 addrspace(3)* %ptr, i55 %arg) #0
  ;
  ; FIJI-LABEL: local_store_i55:
  ; FIJI:       ; %bb.0:
+; FIJI-NEXT:    s_or_b32 s0, s4, 14
+; FIJI-NEXT:    v_mov_b32_e32 v0, s0
+; FIJI-NEXT:    v_mov_b32_e32 v1, s5
+; FIJI-NEXT:    flat_load_ubyte v0, v[0:1]
  ; FIJI-NEXT:    s_load_dword s0, s[4:5], 0x0
-; FIJI-NEXT:    s_load_dword s2, s[4:5], 0x8
-; FIJI-NEXT:    s_load_dword s1, s[4:5], 0xc
+; FIJI-NEXT:    s_load_dword s1, s[4:5], 0x8
+; FIJI-NEXT:    s_load_dword s2, s[4:5], 0xc
  ; FIJI-NEXT:    s_mov_b32 m0, -1
  ; FIJI-NEXT:    s_waitcnt lgkmcnt(0)
-; FIJI-NEXT:    v_mov_b32_e32 v2, s0
-; FIJI-NEXT:    s_and_b32 s3, s1, 0xffff
-; FIJI-NEXT:    s_add_u32 s0, s4, 14
+; FIJI-NEXT:    v_mov_b32_e32 v1, s0
  ; FIJI-NEXT:    v_mov_b32_e32 v3, s1
-; FIJI-NEXT:    s_addc_u32 s1, s5, 0
-; FIJI-NEXT:    v_mov_b32_e32 v0, s0
-; FIJI-NEXT:    v_mov_b32_e32 v1, s1
-; FIJI-NEXT:    flat_load_ubyte v0, v[0:1]
-; FIJI-NEXT:    ds_write_b16 v2, v3 offset:4
-; FIJI-NEXT:    v_mov_b32_e32 v3, s2
-; FIJI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(1)
+; FIJI-NEXT:    s_and_b32 s3, s2, 0xffff
+; FIJI-NEXT:    v_mov_b32_e32 v2, s2
+; FIJI-NEXT:    ds_write_b16 v1, v2 offset:4
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
  ; FIJI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
  ; FIJI-NEXT:    v_or_b32_e32 v0, s3, v0
  ; FIJI-NEXT:    v_bfe_u32 v0, v0, 16, 7
-; FIJI-NEXT:    ds_write_b8 v2, v0 offset:6
-; FIJI-NEXT:    ds_write_b32 v2, v3
+; FIJI-NEXT:    ds_write_b8 v1, v0 offset:6
+; FIJI-NEXT:    ds_write_b32 v1, v3
  ; FIJI-NEXT:    s_endpgm
  ;
  ; GFX9-LABEL: local_store_i55:
author	Michael Liao <michael.hliao@gmail.com>
	Mon, 25 May 2020 03:20:57 +0000 (23:20 -0400)
committer	Michael Liao <michael.hliao@gmail.com>
	Tue, 23 Jun 2020 04:51:11 +0000 (00:51 -0400)
llvm/include/llvm/CodeGen/ISDOpcodes.h		patch \| blob \| history
llvm/include/llvm/CodeGen/SelectionDAG.h		patch \| blob \| history
llvm/include/llvm/CodeGen/SelectionDAGNodes.h		patch \| blob \| history
llvm/include/llvm/Target/TargetSelectionDAG.td		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp		patch \| blob \| history
llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp		patch \| blob \| history
llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll		patch \| blob \| history