[GlobalISel] Fix load-or combine moving loads across potential aliasing stores.

author Amara Emerson <amara@apple.com>

Mon, 19 Jul 2021 06:34:09 +0000 (23:34 -0700)

committer Amara Emerson <amara@apple.com>

Mon, 19 Jul 2021 17:23:23 +0000 (10:23 -0700)
author Amara Emerson <amara@apple.com>
Mon, 19 Jul 2021 06:34:09 +0000 (23:34 -0700)
committer Amara Emerson <amara@apple.com>
Mon, 19 Jul 2021 17:23:23 +0000 (10:23 -0700)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

index 0ce9d8e..775563a 100644 (file)
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -596,8 +596,10 @@ private:
    /// at to the index of the load.
    /// \param [in] MemSizeInBits - The number of bits each load should produce.
    ///
-  /// \returns The lowest-index load found and the lowest index on success.
-  Optional<std::pair<GZExtLoad *, int64_t>> findLoadOffsetsForLoadOrCombine(
+  /// \returns On success, a 3-tuple containing lowest-index load found, the
+  /// lowest index, and the last load in the sequence.
+  Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
+  findLoadOffsetsForLoadOrCombine(
        SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
        const SmallVector<Register, 8> &RegsToVisit,
        const unsigned MemSizeInBits);
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

index e6ca02e..590a65a 100644 (file)
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -28,6 +28,7 @@
  #include "llvm/CodeGen/TargetOpcodes.h"
  #include "llvm/Support/MathExtras.h"
  #include "llvm/Target/TargetMachine.h"
+#include <tuple>
  
  #define DEBUG_TYPE "gi-combiner"
  
@@ -3455,7 +3456,7 @@ matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
    return std::make_pair(Load, Shift / MemSizeInBits);
  }
  
-Optional<std::pair<GZExtLoad *, int64_t>>
+Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
  CombinerHelper::findLoadOffsetsForLoadOrCombine(
      SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
      const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
@@ -3478,10 +3479,10 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
    const MachineMemOperand *MMO = nullptr;
  
    // Earliest instruction-order load in the pattern.
-  MachineInstr *EarliestLoad = nullptr;
+  GZExtLoad *EarliestLoad = nullptr;
  
    // Latest instruction-order load in the pattern.
-  MachineInstr *LatestLoad = nullptr;
+  GZExtLoad *LatestLoad = nullptr;
  
    // Base pointer which every load should share.
    Register BasePtr;
@@ -3586,7 +3587,7 @@ CombinerHelper::findLoadOffsetsForLoadOrCombine(
        return None;
    }
  
-  return std::make_pair(LowestIdxLoad, LowestIdx);
+  return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
  }
  
  bool CombinerHelper::matchLoadOrCombine(
@@ -3634,13 +3635,13 @@ bool CombinerHelper::matchLoadOrCombine(
    // Also verify that each of these ends up putting a[i] into the same memory
    // offset as a load into a wide type would.
    SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
-  GZExtLoad *LowestIdxLoad;
+  GZExtLoad *LowestIdxLoad, *LatestLoad;
    int64_t LowestIdx;
    auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
        MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
    if (!MaybeLoadInfo)
      return false;
-  std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo;
+  std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
  
    // We have a bunch of loads being OR'd together. Using the addresses + offsets
    // we found before, check if this corresponds to a big or little endian byte
@@ -3695,6 +3696,7 @@ bool CombinerHelper::matchLoadOrCombine(
      return false;
  
    MatchInfo = [=](MachineIRBuilder &MIB) {
+    MIB.setInstrAndDebugLoc(*LatestLoad);
      Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
      MIB.buildLoad(LoadDst, Ptr, *NewMMO);
      if (NeedsBSwap)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir

index 4b02fe2..428def1 100644 (file)
--- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir
@@ -1397,9 +1397,6 @@ body:             |
  name:            dont_combine_store_between_different_mbb
  tracksRegLiveness: true
  body:             |
-  ; There is a store between the two loads, hidden away in a different MBB.
-  ; We should not combine here.
-
    ; LITTLE-LABEL: name: dont_combine_store_between_different_mbb
    ; LITTLE: bb.0:
    ; LITTLE:   successors: %bb.1(0x80000000)
@@ -1444,6 +1441,9 @@ body:             |
    ; BIG:   %full_load:_(s32) = G_OR %low_half, %high_half
    ; BIG:   $w1 = COPY %full_load(s32)
    ; BIG:   RET_ReallyLR implicit $w1
+  ; There is a store between the two loads, hidden away in a different MBB.
+  ; We should not combine here.
+
  
    bb.0:
      successors: %bb.1(0x80000000)
@@ -1479,8 +1479,6 @@ body:             |
  name:            different_mbb
  tracksRegLiveness: true
  body:             |
-  ; It should be possible to combine here, but it's not supported right now.
-
    ; LITTLE-LABEL: name: different_mbb
    ; LITTLE: bb.0:
    ; LITTLE:   successors: %bb.1(0x80000000)
@@ -1513,6 +1511,8 @@ body:             |
    ; BIG:   %full_load:_(s32) = G_OR %low_half, %high_half
    ; BIG:   $w1 = COPY %full_load(s32)
    ; BIG:   RET_ReallyLR implicit $w1
+  ; It should be possible to combine here, but it's not supported right now.
+
  
    bb.0:
      successors: %bb.1(0x80000000)
@@ -1569,3 +1569,67 @@ body:             |
      %full_load:_(s32) = G_OR %low_half, %high_half
      $w1 = COPY %full_load(s32)
      RET_ReallyLR implicit $w1
+
+...
+---
+name:            store_between_loads_and_or
+alignment:       4
+tracksRegLiveness: true
+
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$x1' }
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; Check that we build the G_LOAD at the point of the last load, instead of place of the G_OR.
+    ; We could have a G_STORE in between which may not be safe to move the load across.
+  liveins: $x0, $x1
+    ; LITTLE-LABEL: name: store_between_loads_and_or
+    ; LITTLE: liveins: $x0, $x1, $x0, $x1
+    ; LITTLE: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; LITTLE: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+    ; LITTLE: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
+    ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1)
+    ; LITTLE: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8))
+    ; LITTLE: $w0 = COPY [[LOAD]](s32)
+    ; LITTLE: RET_ReallyLR implicit $w0
+    ; BIG-LABEL: name: store_between_loads_and_or
+    ; BIG: liveins: $x0, $x1, $x0, $x1
+    ; BIG: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; BIG: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+    ; BIG: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
+    ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1)
+    ; BIG: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[LOAD]]
+    ; BIG: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8))
+    ; BIG: $w0 = COPY [[BSWAP]](s32)
+    ; BIG: RET_ReallyLR implicit $w0
+  %0:_(p0) = COPY $x0
+  %1:_(p0) = COPY $x1
+  %12:_(s8) = G_CONSTANT i8 1
+  %15:_(s32) = G_CONSTANT i32 8
+  %19:_(s32) = G_CONSTANT i32 16
+  %23:_(s32) = G_CONSTANT i32 24
+  %13:_(s32) = G_ZEXTLOAD %0:_(p0) :: (load (s8))
+  %3:_(s64) = G_CONSTANT i64 1
+  %4:_(p0) = G_PTR_ADD %0:_, %3:_(s64)
+  %14:_(s32) = G_ZEXTLOAD %4:_(p0) :: (load (s8))
+  %6:_(s64) = G_CONSTANT i64 2
+  %7:_(p0) = G_PTR_ADD %0:_, %6:_(s64)
+  %18:_(s32) = G_ZEXTLOAD %7:_(p0) :: (load (s8))
+  %9:_(s64) = G_CONSTANT i64 3
+  %10:_(p0) = G_PTR_ADD %0:_, %9:_(s64)
+  %22:_(s32) = G_ZEXTLOAD %10:_(p0) :: (load (s8))
+  G_STORE %12:_(s8), %1:_(p0) :: (store (s8))
+  %16:_(s32) = nuw nsw G_SHL %14:_, %15:_(s32)
+  %17:_(s32) = G_OR %16:_, %13:_
+  %20:_(s32) = nuw nsw G_SHL %18:_, %19:_(s32)
+  %21:_(s32) = G_OR %17:_, %20:_
+  %24:_(s32) = nuw G_SHL %22:_, %23:_(s32)
+  %25:_(s32) = G_OR %21:_, %24:_
+  $w0 = COPY %25:_(s32)
+  RET_ReallyLR implicit $w0
+
+...
author	Amara Emerson <amara@apple.com>
	Mon, 19 Jul 2021 06:34:09 +0000 (23:34 -0700)
committer	Amara Emerson <amara@apple.com>
	Mon, 19 Jul 2021 17:23:23 +0000 (10:23 -0700)
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h		patch \| blob \| history
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp		patch \| blob \| history
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir		patch \| blob \| history