[AMDGPU/MemOpsCluster] Let mem ops clustering logic also consider number of clustered...

author hsmahesha <mahesha.comp@gmail.com>

Mon, 1 Jun 2020 17:20:29 +0000 (22:50 +0530)

committer hsmahesha <mahesha.comp@gmail.com>

Mon, 1 Jun 2020 17:22:34 +0000 (22:52 +0530)
author hsmahesha <mahesha.comp@gmail.com>
Mon, 1 Jun 2020 17:20:29 +0000 (22:50 +0530)
committer hsmahesha <mahesha.comp@gmail.com>
Mon, 1 Jun 2020 17:22:34 +0000 (22:52 +0530)
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h

index 709030b..b3b2fa2 100644 (file)
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1254,11 +1254,10 @@ public:
    /// It returns false if no base operands and offset was found.
    /// It is not guaranteed to always recognize base operands and offsets in all
    /// cases.
-  virtual bool
-  getMemOperandsWithOffset(const MachineInstr &MI,
-                           SmallVectorImpl<const MachineOperand *> &BaseOps,
-                           int64_t &Offset, bool &OffsetIsScalable,
-                           const TargetRegisterInfo *TRI) const {
+  virtual bool getMemOperandsWithOffsetWidth(
+      const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
+      int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+      const TargetRegisterInfo *TRI) const {
      return false;
    }
  
@@ -1286,9 +1285,11 @@ public:
    /// \p BaseOps1 and \p BaseOps2 are memory operands of two memory operations.
    /// \p NumLoads is the number of loads that will be in the cluster if this
    /// hook returns true.
+  /// \p NumBytes is the number of bytes that will be loaded from all the
+  /// clustered loads if this hook returns true.
    virtual bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
                                     ArrayRef<const MachineOperand *> BaseOps2,
-                                   unsigned NumLoads) const {
+                                   unsigned NumLoads, unsigned NumBytes) const {
      llvm_unreachable("target did not implement shouldClusterMemOps()");
    }
  
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp

index 92fd3ed..a688991 100644 (file)
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1473,10 +1473,12 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
      SUnit *SU;
      SmallVector<const MachineOperand *, 4> BaseOps;
      int64_t Offset;
+    unsigned Width;
  
      MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
-              int64_t Offset)
-        : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset) {}
+              int64_t Offset, unsigned Width)
+        : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
+          Width(Width) {}
  
      static bool Compare(const MachineOperand *const &A,
                          const MachineOperand *const &B) {
@@ -1565,12 +1567,14 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
      ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
    SmallVector<MemOpInfo, 32> MemOpRecords;
    for (SUnit *SU : MemOps) {
+    const MachineInstr &MI = *SU->getInstr();
      SmallVector<const MachineOperand *, 4> BaseOps;
      int64_t Offset;
      bool OffsetIsScalable;
-    if (TII->getMemOperandsWithOffset(*SU->getInstr(), BaseOps, Offset,
-                                      OffsetIsScalable, TRI))
-      MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset));
+    unsigned Width;
+    if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
+                                           OffsetIsScalable, Width, TRI))
+      MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset, Width));
  #ifndef NDEBUG
      for (auto *Op : BaseOps)
        assert(Op);
@@ -1584,16 +1588,19 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
    // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
    // cluster mem ops collected within `MemOpRecords` array.
    unsigned ClusterLength = 1;
+  unsigned CurrentClusterBytes = MemOpRecords[0].Width;
    for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
      // Decision to cluster mem ops is taken based on target dependent logic
      auto MemOpa = MemOpRecords[Idx];
      auto MemOpb = MemOpRecords[Idx + 1];
      ++ClusterLength;
-    if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps,
-                                  ClusterLength)) {
+    CurrentClusterBytes += MemOpb.Width;
+    if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
+                                  CurrentClusterBytes)) {
        // Current mem ops pair could not be clustered, reset cluster length, and
        // go to next pair
        ClusterLength = 1;
+      CurrentClusterBytes = MemOpb.Width;
        continue;
      }
  
@@ -1605,6 +1612,7 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
      // FIXME: Is this check really required?
      if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
        ClusterLength = 1;
+      CurrentClusterBytes = MemOpb.Width;
        continue;
      }
  
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp

index 5491317..228e3c1 100644 (file)
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -1037,7 +1037,9 @@ bool TargetInstrInfo::getMemOperandWithOffset(
      const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset,
      bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const {
    SmallVector<const MachineOperand *, 4> BaseOps;
-  if (!getMemOperandsWithOffset(MI, BaseOps, Offset, OffsetIsScalable, TRI) ||
+  unsigned Width;
+  if (!getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable,
+                                     Width, TRI) ||
        BaseOps.size() != 1)
      return false;
    BaseOp = BaseOps.front();
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

index 801f162..ed8c5f6 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2033,15 +2033,14 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
    return true;
  }
  
-bool AArch64InstrInfo::getMemOperandsWithOffset(
+bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
      const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
-    int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI)
-    const {
+    int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+    const TargetRegisterInfo *TRI) const {
    if (!LdSt.mayLoadOrStore())
      return false;
  
    const MachineOperand *BaseOp;
-  unsigned Width;
    if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
                                      Width, TRI))
      return false;
@@ -2513,7 +2512,8 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
  /// Only called for LdSt for which getMemOperandWithOffset returns true.
  bool AArch64InstrInfo::shouldClusterMemOps(
      ArrayRef<const MachineOperand *> BaseOps1,
-    ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads) const {
+    ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
+    unsigned NumBytes) const {
    assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
    const MachineOperand &BaseOp1 = *BaseOps1.front();
    const MachineOperand &BaseOp2 = *BaseOps2.front();
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h

index 13ef984..e05b183 100644 (file)
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -113,10 +113,10 @@ public:
    /// Hint that pairing the given load or store is unprofitable.
    static void suppressLdStPair(MachineInstr &MI);
  
-  bool getMemOperandsWithOffset(
+  bool getMemOperandsWithOffsetWidth(
        const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
-      int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI)
-      const override;
+      int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+      const TargetRegisterInfo *TRI) const override;
  
    /// If \p OffsetIsScalable is set to 'true', the offset is scaled by `vscale`.
    /// This is true for some SVE instructions like ldr/str that have a
@@ -140,7 +140,7 @@ public:
  
    bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
                             ArrayRef<const MachineOperand *> BaseOps2,
-                           unsigned NumLoads) const override;
+                           unsigned NumLoads, unsigned NumBytes) const override;
  
    void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                          const DebugLoc &DL, MCRegister DestReg,
diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp

index 4c84051..35c49ae 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -146,10 +146,11 @@ public:
  
          int64_t Dummy1;
          bool Dummy2;
+        unsigned Dummy3;
          SmallVector<const MachineOperand *, 4> BaseOps;
          if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
-          if (!SII->getMemOperandsWithOffset(MI, BaseOps, Dummy1, Dummy2,
-                                             TRI)) {
+          if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
+                                                  Dummy3, TRI)) {
              // We failed to get the base operands, so we'll never clause this
              // instruction with any other, so pretend it's illegal.
              Type = HARDCLAUSE_ILLEGAL;
@@ -164,7 +165,7 @@ public:
                // scheduler it limits the size of the cluster to avoid increasing
                // register pressure too much, but this pass runs after register
                // allocation so there is no need for that kind of limit.
-              !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2)))) {
+              !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
            // Finish the current clause.
            Changed |= emitClause(CI, SII);
            CI = ClauseInfo();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

index d7508c3..5feed9d 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -264,16 +264,27 @@ static bool isStride64(unsigned Opc) {
    }
  }
  
-bool SIInstrInfo::getMemOperandsWithOffset(
+unsigned SIInstrInfo::getOperandSizeInBytes(const MachineInstr &LdSt,
+                                            const MachineOperand *MOp) const {
+  assert(MOp && "Unexpected null machine operand!");
+  const MachineRegisterInfo &MRI = LdSt.getParent()->getParent()->getRegInfo();
+  const Register Reg = MOp->getReg();
+  const TargetRegisterClass *DstRC = Register::isVirtualRegister(Reg)
+                                         ? MRI.getRegClass(Reg)
+                                         : RI.getPhysRegClass(Reg);
+  return (RI.getRegSizeInBits(*DstRC) / 8);
+}
+
+bool SIInstrInfo::getMemOperandsWithOffsetWidth(
      const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
-    int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI)
-    const {
+    int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+    const TargetRegisterInfo *TRI) const {
    if (!LdSt.mayLoadOrStore())
      return false;
  
    unsigned Opc = LdSt.getOpcode();
    OffsetIsScalable = false;
-  const MachineOperand *BaseOp, *OffsetOp;
+  const MachineOperand *BaseOp, *OffsetOp, *MOp;
  
    if (isDS(LdSt)) {
      BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
@@ -287,6 +298,11 @@ bool SIInstrInfo::getMemOperandsWithOffset(
        }
        BaseOps.push_back(BaseOp);
        Offset = OffsetOp->getImm();
+      // Get appropriate operand, and compute width accordingly.
+      MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdst);
+      if (!MOp)
+        MOp = getNamedOperand(LdSt, AMDGPU::OpName::data0);
+      Width = getOperandSizeInBytes(LdSt, MOp);
      } else {
        // The 2 offset instructions use offset0 and offset1 instead. We can treat
        // these as a load with a single offset if the 2 offsets are consecutive.
@@ -318,6 +334,16 @@ bool SIInstrInfo::getMemOperandsWithOffset(
  
        BaseOps.push_back(BaseOp);
        Offset = EltSize * Offset0;
+      // Get appropriate operand(s), and compute width accordingly.
+      MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdst);
+      if (!MOp) {
+        MOp = getNamedOperand(LdSt, AMDGPU::OpName::data0);
+        Width = getOperandSizeInBytes(LdSt, MOp);
+        MOp = getNamedOperand(LdSt, AMDGPU::OpName::data1);
+        Width += getOperandSizeInBytes(LdSt, MOp);
+      } else {
+        Width = getOperandSizeInBytes(LdSt, MOp);
+      }
      }
      return true;
    }
@@ -342,6 +368,11 @@ bool SIInstrInfo::getMemOperandsWithOffset(
        BaseOps.push_back(RSrc);
        BaseOps.push_back(SOffset);
        Offset = OffsetImm->getImm();
+      // Get appropriate operand, and compute width accordingly.
+      MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdst);
+      if (!MOp)
+        MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdata);
+      Width = getOperandSizeInBytes(LdSt, MOp);
        return true;
      }
  
@@ -359,6 +390,11 @@ bool SIInstrInfo::getMemOperandsWithOffset(
      Offset = OffsetImm->getImm();
      if (SOffset) // soffset can be an inline immediate.
        Offset += SOffset->getImm();
+    // Get appropriate operand, and compute width accordingly.
+    MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdst);
+    if (!MOp)
+      MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdata);
+    Width = getOperandSizeInBytes(LdSt, MOp);
      return true;
    }
  
@@ -369,6 +405,9 @@ bool SIInstrInfo::getMemOperandsWithOffset(
      BaseOps.push_back(BaseOp);
      OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset);
      Offset = OffsetOp ? OffsetOp->getImm() : 0;
+    // Get appropriate operand, and compute width accordingly.
+    MOp = getNamedOperand(LdSt, AMDGPU::OpName::sdst);
+    Width = getOperandSizeInBytes(LdSt, MOp);
      return true;
    }
  
@@ -381,6 +420,11 @@ bool SIInstrInfo::getMemOperandsWithOffset(
      if (BaseOp)
        BaseOps.push_back(BaseOp);
      Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
+    // Get appropriate operand, and compute width accordingly.
+    MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdst);
+    if (!MOp)
+      MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdata);
+    Width = getOperandSizeInBytes(LdSt, MOp);
      return true;
    }
  
@@ -430,7 +474,8 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
  
  bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
                                        ArrayRef<const MachineOperand *> BaseOps2,
-                                      unsigned NumLoads) const {
+                                      unsigned NumLoads,
+                                      unsigned NumBytes) const {
    assert(!BaseOps1.empty() && !BaseOps2.empty());
    const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
    const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
@@ -2730,9 +2775,12 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
                                                 const MachineInstr &MIb) const {
    SmallVector<const MachineOperand *, 4> BaseOps0, BaseOps1;
    int64_t Offset0, Offset1;
+  unsigned Dummy0, Dummy1;
    bool Offset0IsScalable, Offset1IsScalable;
-  if (!getMemOperandsWithOffset(MIa, BaseOps0, Offset0, Offset0IsScalable, &RI) ||
-      !getMemOperandsWithOffset(MIb, BaseOps1, Offset1, Offset1IsScalable, &RI))
+  if (!getMemOperandsWithOffsetWidth(MIa, BaseOps0, Offset0, Offset0IsScalable,
+                                     Dummy0, &RI) ||
+      !getMemOperandsWithOffsetWidth(MIb, BaseOps1, Offset1, Offset1IsScalable,
+                                     Dummy1, &RI))
      return false;
  
    if (!memOpsHaveSameBaseOperands(BaseOps0, BaseOps1))
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h

index 8231a96..c6d0349 100644 (file)
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -181,15 +181,18 @@ public:
                                 int64_t &Offset1,
                                 int64_t &Offset2) const override;
  
-  bool
-  getMemOperandsWithOffset(const MachineInstr &LdSt,
-                           SmallVectorImpl<const MachineOperand *> &BaseOps,
-                           int64_t &Offset, bool &OffsetIsScalable,
-                           const TargetRegisterInfo *TRI) const final;
+  unsigned getOperandSizeInBytes(const MachineInstr &LdSt,
+                                 const MachineOperand *MOp) const;
+
+  bool getMemOperandsWithOffsetWidth(
+      const MachineInstr &LdSt,
+      SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+      bool &OffsetIsScalable, unsigned &Width,
+      const TargetRegisterInfo *TRI) const final;
  
    bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
                             ArrayRef<const MachineOperand *> BaseOps2,
-                           unsigned NumLoads) const override;
+                           unsigned NumLoads, unsigned NumBytes) const override;
  
    bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
                                 int64_t Offset1, unsigned NumLoads) const override;
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp

index 0bfb28b..64922d3 100644 (file)
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -2963,12 +2963,12 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
  }
  
  /// Get the base register and byte offset of a load/store instr.
-bool HexagonInstrInfo::getMemOperandsWithOffset(
+bool HexagonInstrInfo::getMemOperandsWithOffsetWidth(
      const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
-    int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const {
-  unsigned AccessSize = 0;
+    int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+    const TargetRegisterInfo *TRI) const {
    OffsetIsScalable = false;
-  const MachineOperand *BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize);
+  const MachineOperand *BaseOp = getBaseAndOffset(LdSt, Offset, Width);
    if (!BaseOp || !BaseOp->isReg())
      return false;
    BaseOps.push_back(BaseOp);
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h

index d8998d2..847b9a6 100644 (file)
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -204,11 +204,11 @@ public:
    bool expandPostRAPseudo(MachineInstr &MI) const override;
  
    /// Get the base register and byte offset of a load/store instr.
-  bool
-  getMemOperandsWithOffset(const MachineInstr &LdSt,
-                           SmallVectorImpl<const MachineOperand *> &BaseOps,
-                           int64_t &Offset, bool &OffsetIsScalable,
-                           const TargetRegisterInfo *TRI) const override;
+  bool getMemOperandsWithOffsetWidth(
+      const MachineInstr &LdSt,
+      SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+      bool &OffsetIsScalable, unsigned &Width,
+      const TargetRegisterInfo *TRI) const override;
  
    /// Reverses the branch condition of the specified condition list,
    /// returning false on success and true if it cannot be reversed.
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp

index 8afd27d..c821429 100644 (file)
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -795,9 +795,10 @@ bool LanaiInstrInfo::getMemOperandWithOffsetWidth(
    return true;
  }
  
-bool LanaiInstrInfo::getMemOperandsWithOffset(
+bool LanaiInstrInfo::getMemOperandsWithOffsetWidth(
      const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
-    int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const {
+    int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+    const TargetRegisterInfo *TRI) const {
    switch (LdSt.getOpcode()) {
    default:
      return false;
@@ -811,7 +812,6 @@ bool LanaiInstrInfo::getMemOperandsWithOffset(
    case Lanai::LDBs_RI:
    case Lanai::LDBz_RI:
      const MachineOperand *BaseOp;
-    unsigned Width;
      OffsetIsScalable = false;
      if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
        return false;
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/llvm/lib/Target/Lanai/LanaiInstrInfo.h

index 216dbfb..44c1e62 100644 (file)
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.h
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.h
@@ -67,11 +67,11 @@ public:
  
    bool expandPostRAPseudo(MachineInstr &MI) const override;
  
-  bool
-  getMemOperandsWithOffset(const MachineInstr &LdSt,
-                           SmallVectorImpl<const MachineOperand *> &BaseOps,
-                           int64_t &Offset, bool &OffsetIsScalable,
-                           const TargetRegisterInfo *TRI) const override;
+  bool getMemOperandsWithOffsetWidth(
+      const MachineInstr &LdSt,
+      SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+      bool &OffsetIsScalable, unsigned &Width,
+      const TargetRegisterInfo *TRI) const override;
  
    bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
                                      const MachineOperand *&BaseOp,
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp

index c8939e3..46ff62f 100644 (file)
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3662,9 +3662,10 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
    }
  }
  
-bool X86InstrInfo::getMemOperandsWithOffset(
+bool X86InstrInfo::getMemOperandsWithOffsetWidth(
      const MachineInstr &MemOp, SmallVectorImpl<const MachineOperand *> &BaseOps,
-    int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const {
+    int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+    const TargetRegisterInfo *TRI) const {
    const MCInstrDesc &Desc = MemOp.getDesc();
    int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
    if (MemRefBegin < 0)
@@ -3696,6 +3697,11 @@ bool X86InstrInfo::getMemOperandsWithOffset(
      return false;
  
    OffsetIsScalable = false;
+  // FIXME: Relying on memoperands() may not be right thing to do here. Check
+  // with X86 maintainers, and fix it accordingly. For now, it is ok, since
+  // there is no use of `Width` for X86 back-end at the moment.
+  Width =
+      !MemOp.memoperands_empty() ? MemOp.memoperands().front()->getSize() : 0;
    BaseOps.push_back(BaseOp);
    return true;
  }
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h

index fe79073..89f2ff1 100644 (file)
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -317,11 +317,11 @@ public:
                       SmallVectorImpl<MachineOperand> &Cond,
                       bool AllowModify) const override;
  
-  bool
-  getMemOperandsWithOffset(const MachineInstr &LdSt,
-                           SmallVectorImpl<const MachineOperand *> &BaseOps,
-                           int64_t &Offset, bool &OffsetIsScalable,
-                           const TargetRegisterInfo *TRI) const override;
+  bool getMemOperandsWithOffsetWidth(
+      const MachineInstr &LdSt,
+      SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+      bool &OffsetIsScalable, unsigned &Width,
+      const TargetRegisterInfo *TRI) const override;
    bool analyzeBranchPredicate(MachineBasicBlock &MBB,
                                TargetInstrInfo::MachineBranchPredicate &MBP,
                                bool AllowModify = false) const override;
author	hsmahesha <mahesha.comp@gmail.com>
	Mon, 1 Jun 2020 17:20:29 +0000 (22:50 +0530)
committer	hsmahesha <mahesha.comp@gmail.com>
	Mon, 1 Jun 2020 17:22:34 +0000 (22:52 +0530)
llvm/include/llvm/CodeGen/TargetInstrInfo.h		patch \| blob \| history
llvm/lib/CodeGen/MachineScheduler.cpp		patch \| blob \| history
llvm/lib/CodeGen/TargetInstrInfo.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp		patch \| blob \| history
llvm/lib/Target/AArch64/AArch64InstrInfo.h		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
llvm/lib/Target/AMDGPU/SIInstrInfo.h		patch \| blob \| history
llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp		patch \| blob \| history
llvm/lib/Target/Hexagon/HexagonInstrInfo.h		patch \| blob \| history
llvm/lib/Target/Lanai/LanaiInstrInfo.cpp		patch \| blob \| history
llvm/lib/Target/Lanai/LanaiInstrInfo.h		patch \| blob \| history
llvm/lib/Target/X86/X86InstrInfo.cpp		patch \| blob \| history
llvm/lib/Target/X86/X86InstrInfo.h		patch \| blob \| history