/// at to the index of the load.
/// \param [in] MemSizeInBits - The number of bits each load should produce.
///
- /// \returns The lowest-index load found and the lowest index on success.
- Optional<std::pair<GZExtLoad *, int64_t>> findLoadOffsetsForLoadOrCombine(
+ /// \returns On success, a 3-tuple containing lowest-index load found, the
+ /// lowest index, and the last load in the sequence.
+ Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
+ findLoadOffsetsForLoadOrCombine(
SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
const SmallVector<Register, 8> &RegsToVisit,
const unsigned MemSizeInBits);
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
+#include <tuple>
#define DEBUG_TYPE "gi-combiner"
return std::make_pair(Load, Shift / MemSizeInBits);
}
-Optional<std::pair<GZExtLoad *, int64_t>>
+Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
CombinerHelper::findLoadOffsetsForLoadOrCombine(
SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
const MachineMemOperand *MMO = nullptr;
// Earliest instruction-order load in the pattern.
- MachineInstr *EarliestLoad = nullptr;
+ GZExtLoad *EarliestLoad = nullptr;
// Latest instruction-order load in the pattern.
- MachineInstr *LatestLoad = nullptr;
+ GZExtLoad *LatestLoad = nullptr;
// Base pointer which every load should share.
Register BasePtr;
return None;
}
- return std::make_pair(LowestIdxLoad, LowestIdx);
+ return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
}
bool CombinerHelper::matchLoadOrCombine(
// Also verify that each of these ends up putting a[i] into the same memory
// offset as a load into a wide type would.
SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
- GZExtLoad *LowestIdxLoad;
+ GZExtLoad *LowestIdxLoad, *LatestLoad;
int64_t LowestIdx;
auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
if (!MaybeLoadInfo)
return false;
- std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo;
+ std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
// We have a bunch of loads being OR'd together. Using the addresses + offsets
// we found before, check if this corresponds to a big or little endian byte
return false;
MatchInfo = [=](MachineIRBuilder &MIB) {
+ MIB.setInstrAndDebugLoc(*LatestLoad);
Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
MIB.buildLoad(LoadDst, Ptr, *NewMMO);
if (NeedsBSwap)
name: dont_combine_store_between_different_mbb
tracksRegLiveness: true
body: |
- ; There is a store between the two loads, hidden away in a different MBB.
- ; We should not combine here.
-
; LITTLE-LABEL: name: dont_combine_store_between_different_mbb
; LITTLE: bb.0:
; LITTLE: successors: %bb.1(0x80000000)
; BIG: %full_load:_(s32) = G_OR %low_half, %high_half
; BIG: $w1 = COPY %full_load(s32)
; BIG: RET_ReallyLR implicit $w1
+ ; There is a store between the two loads, hidden away in a different MBB.
+ ; We should not combine here.
+
bb.0:
successors: %bb.1(0x80000000)
name: different_mbb
tracksRegLiveness: true
body: |
- ; It should be possible to combine here, but it's not supported right now.
-
; LITTLE-LABEL: name: different_mbb
; LITTLE: bb.0:
; LITTLE: successors: %bb.1(0x80000000)
; BIG: %full_load:_(s32) = G_OR %low_half, %high_half
; BIG: $w1 = COPY %full_load(s32)
; BIG: RET_ReallyLR implicit $w1
+ ; It should be possible to combine here, but it's not supported right now.
+
bb.0:
successors: %bb.1(0x80000000)
%full_load:_(s32) = G_OR %low_half, %high_half
$w1 = COPY %full_load(s32)
RET_ReallyLR implicit $w1
+
+...
+---
+name: store_between_loads_and_or
+alignment: 4
+tracksRegLiveness: true
+
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$x1' }
+frameInfo:
+ maxAlignment: 1
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; Check that we build the G_LOAD at the point of the last load, instead of place of the G_OR.
+ ; We could have a G_STORE in between which may not be safe to move the load across.
+ liveins: $x0, $x1
+ ; LITTLE-LABEL: name: store_between_loads_and_or
+ ; LITTLE: liveins: $x0, $x1, $x0, $x1
+ ; LITTLE: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; LITTLE: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; LITTLE: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
+ ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1)
+ ; LITTLE: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8))
+ ; LITTLE: $w0 = COPY [[LOAD]](s32)
+ ; LITTLE: RET_ReallyLR implicit $w0
+ ; BIG-LABEL: name: store_between_loads_and_or
+ ; BIG: liveins: $x0, $x1, $x0, $x1
+ ; BIG: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; BIG: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; BIG: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
+ ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32), align 1)
+ ; BIG: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[LOAD]]
+ ; BIG: G_STORE [[C]](s8), [[COPY1]](p0) :: (store (s8))
+ ; BIG: $w0 = COPY [[BSWAP]](s32)
+ ; BIG: RET_ReallyLR implicit $w0
+ %0:_(p0) = COPY $x0
+ %1:_(p0) = COPY $x1
+ %12:_(s8) = G_CONSTANT i8 1
+ %15:_(s32) = G_CONSTANT i32 8
+ %19:_(s32) = G_CONSTANT i32 16
+ %23:_(s32) = G_CONSTANT i32 24
+ %13:_(s32) = G_ZEXTLOAD %0:_(p0) :: (load (s8))
+ %3:_(s64) = G_CONSTANT i64 1
+ %4:_(p0) = G_PTR_ADD %0:_, %3:_(s64)
+ %14:_(s32) = G_ZEXTLOAD %4:_(p0) :: (load (s8))
+ %6:_(s64) = G_CONSTANT i64 2
+ %7:_(p0) = G_PTR_ADD %0:_, %6:_(s64)
+ %18:_(s32) = G_ZEXTLOAD %7:_(p0) :: (load (s8))
+ %9:_(s64) = G_CONSTANT i64 3
+ %10:_(p0) = G_PTR_ADD %0:_, %9:_(s64)
+ %22:_(s32) = G_ZEXTLOAD %10:_(p0) :: (load (s8))
+ G_STORE %12:_(s8), %1:_(p0) :: (store (s8))
+ %16:_(s32) = nuw nsw G_SHL %14:_, %15:_(s32)
+ %17:_(s32) = G_OR %16:_, %13:_
+ %20:_(s32) = nuw nsw G_SHL %18:_, %19:_(s32)
+ %21:_(s32) = G_OR %17:_, %20:_
+ %24:_(s32) = nuw G_SHL %22:_, %23:_(s32)
+ %25:_(s32) = G_OR %21:_, %24:_
+ $w0 = COPY %25:_(s32)
+ RET_ReallyLR implicit $w0
+
+...