(apply [{ applyMutateAnyExtToZExt(*${d}, MRI, B, Observer); }])
>;
+def split_store_zero_128 : GICombineRule<
+ (defs root:$d),
+ (match (wip_match_opcode G_STORE):$d,
+ [{ return matchSplitStoreZero128(*${d}, MRI); }]),
+ (apply [{ applySplitStoreZero128(*${d}, MRI, B, Observer); }])
+>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
icmp_to_true_false_known_bits, merge_unmerge,
select_combines, fold_merge_to_zext,
constant_fold, identity_combines,
- ptr_add_immed_chain, overlapping_and]> {
+ ptr_add_immed_chain, overlapping_and,
+ split_store_zero_128]> {
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
}
Observer.changedInstr(MI);
}
+/// Match a 128b store of zero and split it into two 64 bit stores, for
+/// size/performance reasons.
+static bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ GStore &Store = cast<GStore>(MI);
+ if (!Store.isSimple())
+ return false;
+ LLT ValTy = MRI.getType(Store.getValueReg());
+ if (!ValTy.isVector() || ValTy.getSizeInBits() != 128)
+ return false;
+ if (ValTy.getSizeInBits() != Store.getMemSizeInBits())
+ return false; // Don't split truncating stores.
+ if (!MRI.hasOneNonDBGUse(Store.getValueReg()))
+ return false;
+ auto MaybeCst = isConstantOrConstantSplatVector(
+ *MRI.getVRegDef(Store.getValueReg()), MRI);
+ return MaybeCst && MaybeCst->isZero();
+}
+
+static void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ GISelChangeObserver &Observer) {
+ B.setInstrAndDebugLoc(MI);
+ GStore &Store = cast<GStore>(MI);
+ LLT ValTy = MRI.getType(Store.getValueReg());
+ assert(ValTy.isVector() && "Expected a vector store value");
+ LLT NewTy = LLT::scalar(64);
+ Register PtrReg = Store.getPointerReg();
+ auto Zero = B.buildConstant(NewTy, 0);
+ auto HighPtr = B.buildPtrAdd(MRI.getType(PtrReg), PtrReg,
+ B.buildConstant(LLT::scalar(64), 8));
+ auto &MF = *MI.getMF();
+ auto *LowMMO = MF.getMachineMemOperand(&Store.getMMO(), 0, NewTy);
+ auto *HighMMO = MF.getMachineMemOperand(&Store.getMMO(), 8, NewTy);
+ B.buildStore(Zero, PtrReg, *LowMMO);
+ B.buildStore(Zero, HighPtr, *HighMMO);
+ Store.eraseFromParent();
+}
+
#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AArch64GenPostLegalizeGICombiner.inc"
#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name: v2s64_split
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $x0
+
+ ; Split a store of <2 x i64> into two scalar stores.
+
+ ; CHECK-LABEL: name: v2s64_split
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64)
+ ; CHECK-NEXT: G_STORE %zero(s64), [[COPY]](p0) :: (store (s64), align 16)
+ ; CHECK-NEXT: G_STORE %zero(s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8)
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:_(p0) = COPY $x0
+ %zero:_(s64) = G_CONSTANT i64 0
+ %zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero, %zero
+ G_STORE %zerovec(<2 x s64>), %0(p0) :: (store (<2 x s64>))
+ RET_ReallyLR
+
+...
+---
+name: v4i32_split
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: v4i32_split
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+ ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64), align 16)
+ ; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8)
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:_(p0) = COPY $x0
+ %zero:_(s32) = G_CONSTANT i32 0
+ %zerovec:_(<4 x s32>) = G_BUILD_VECTOR %zero, %zero, %zero, %zero
+ G_STORE %zerovec(<4 x s32>), %0(p0) :: (store (<4 x s32>))
+ RET_ReallyLR
+
+...
+---
+name: v8i16_split
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: v8i16_split
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
+ ; CHECK-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64), align 16)
+ ; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into unknown-address + 8)
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:_(p0) = COPY $x0
+ %zero:_(s16) = G_CONSTANT i16 0
+ %zerovec:_(<8 x s16>) = G_BUILD_VECTOR %zero, %zero, %zero, %zero, %zero, %zero, %zero, %zero
+ G_STORE %zerovec(<8 x s16>), %0(p0) :: (store (<8 x s16>))
+ RET_ReallyLR
+
+...
+
+# Negative tests
+---
+name: v2i32_nosplit
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: v2i32_nosplit
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: %zerovec:_(<2 x s32>) = G_BUILD_VECTOR %zero(s32), %zero(s32)
+ ; CHECK-NEXT: G_STORE %zerovec(<2 x s32>), [[COPY]](p0) :: (store (<2 x s32>))
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:_(p0) = COPY $x0
+ %zero:_(s32) = G_CONSTANT i32 0
+ %zerovec:_(<2 x s32>) = G_BUILD_VECTOR %zero, %zero
+ G_STORE %zerovec(<2 x s32>), %0(p0) :: (store (<2 x s32>))
+ RET_ReallyLR
+
+...
+---
+name: multiple_uses
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: multiple_uses
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
+ ; CHECK-NEXT: G_STORE %zerovec(<2 x s64>), [[COPY]](p0) :: (store (<2 x s64>))
+ ; CHECK-NEXT: $q0 = COPY %zerovec(<2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:_(p0) = COPY $x0
+ %zero:_(s64) = G_CONSTANT i64 0
+ %zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero, %zero
+ G_STORE %zerovec(<2 x s64>), %0(p0) :: (store (<2 x s64>))
+ $q0 = COPY %zerovec
+ RET_ReallyLR
+
+...
+---
+name: truncating
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: truncating
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
+ ; CHECK-NEXT: G_STORE %zerovec(<2 x s64>), [[COPY]](p0) :: (store (<2 x s32>))
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:_(p0) = COPY $x0
+ %zero:_(s64) = G_CONSTANT i64 0
+ %zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero, %zero
+ G_STORE %zerovec(<2 x s64>), %0(p0) :: (store (<2 x s32>))
+ RET_ReallyLR
+
+...
+---
+name: volatile
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: volatile
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: %zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
+ ; CHECK-NEXT: G_STORE %zerovec(<2 x s64>), [[COPY]](p0) :: (volatile store (<4 x s32>))
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:_(p0) = COPY $x0
+ %zero:_(s64) = G_CONSTANT i64 0
+ %zerovec:_(<2 x s64>) = G_BUILD_VECTOR %zero, %zero
+ G_STORE %zerovec(<2 x s64>), %0(p0) :: (volatile store (<4 x s32>))
+ RET_ReallyLR
+
+...
+---
+name: s128_scalar
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $x0
+
+ ; Split a store of <2 x i64> into two scalar stores.
+
+ ; CHECK-LABEL: name: s128_scalar
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: %zero:_(s128) = G_CONSTANT i128 0
+ ; CHECK-NEXT: G_STORE %zero(s128), [[COPY]](p0) :: (store (s128))
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:_(p0) = COPY $x0
+ %zero:_(s128) = G_CONSTANT i128 0
+ G_STORE %zero(s128), %0(p0) :: (store (s128))
+ RET_ReallyLR
+
+...