#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
STATISTIC(NumFastStores, "Number of stores deleted");
STATISTIC(NumFastOther , "Number of other instrs removed");
STATISTIC(NumCompletePartials, "Number of stores dead by later partials");
+STATISTIC(NumModifiedStores, "Number of stores modified");
static cl::opt<bool>
EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
cl::init(true), cl::Hidden,
cl::desc("Enable partial-overwrite tracking in DSE"));
+static cl::opt<bool>
+EnablePartialStoreMerging("enable-dse-partial-store-merging",
+ cl::init(true), cl::Hidden,
+ cl::desc("Enable partial store merging in DSE"));
+
//===----------------------------------------------------------------------===//
// Helper functions
}
namespace {
-enum OverwriteResult { OW_Begin, OW_Complete, OW_End, OW_Unknown };
+enum OverwriteResult {
+ OW_Begin,
+ OW_Complete,
+ OW_End,
+ OW_PartialEarlierWithFullLater,
+ OW_Unknown
+};
}
/// Return 'OW_Complete' if a store to the 'Later' location completely
/// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
/// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
-/// beginning of the 'Earlier' location is overwritten by 'Later', or
-/// 'OW_Unknown' if nothing can be determined.
+/// beginning of the 'Earlier' location is overwritten by 'Later'.
+/// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was
+/// overwritten by a latter (smaller) store which doesn't write outside the big
+/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
static OverwriteResult isOverwrite(const MemoryLocation &Later,
const MemoryLocation &Earlier,
const DataLayout &DL,
}
}
+ // Check for an earlier store which writes to all the memory locations that
+ // the later store writes to.
+ if (EnablePartialStoreMerging && LaterOff >= EarlierOff &&
+ int64_t(EarlierOff + Earlier.Size) > LaterOff &&
+ uint64_t(LaterOff - EarlierOff) + Later.Size <= Earlier.Size) {
+ DEBUG(dbgs() << "DSE: Partial overwrite an earlier load [" << EarlierOff
+ << ", " << int64_t(EarlierOff + Earlier.Size)
+ << ") by a later store [" << LaterOff << ", "
+ << int64_t(LaterOff + Later.Size) << ")\n");
+ // TODO: Maybe come up with a better name?
+ return OW_PartialEarlierWithFullLater;
+ }
+
// Another interesting case is if the later store overwrites the end of the
// earlier store.
//
// If we find a write that is a) removable (i.e., non-volatile), b) is
// completely obliterated by the store to 'Loc', and c) which we know that
// 'Inst' doesn't load from, then we can remove it.
+ // Also try to merge two stores if a later one only touches memory written
+ // to by the earlier one.
if (isRemovable(DepWrite) &&
!isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
int64_t InstWriteOffset, DepWriteOffset;
bool IsOverwriteEnd = (OR == OW_End);
MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
InstWriteOffset, LaterSize, IsOverwriteEnd);
+ } else if (EnablePartialStoreMerging &&
+ OR == OW_PartialEarlierWithFullLater) {
+ auto *Earlier = dyn_cast<StoreInst>(DepWrite);
+ auto *Later = dyn_cast<StoreInst>(Inst);
+ if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
+ Later && isa<ConstantInt>(Later->getValueOperand())) {
+ // If the store we find is:
+ // a) partially overwritten by the store to 'Loc'
+ // b) the later store is fully contained in the earlier one and
+ // c) they both have a constant value
+ // Merge the two stores, replacing the earlier store's value with a
+ // merge of both values.
+ // TODO: Deal with other constant types (vectors, etc), and probably
+ // some mem intrinsics (if needed)
+
+ APInt EarlierValue =
+ cast<ConstantInt>(Earlier->getValueOperand())->getValue();
+ APInt LaterValue =
+ cast<ConstantInt>(Later->getValueOperand())->getValue();
+ unsigned LaterBits = LaterValue.getBitWidth();
+ assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth());
+ LaterValue = LaterValue.zext(EarlierValue.getBitWidth());
+
+ // Offset of the smaller store inside the larger store
+ unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8;
+ unsigned LShiftAmount =
+ DL.isBigEndian()
+ ? EarlierValue.getBitWidth() - BitOffsetDiff - LaterBits
+ : BitOffsetDiff;
+ APInt Mask =
+ APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount,
+ LShiftAmount + LaterBits);
+ // Clear the bits we'll be replacing, then OR with the smaller
+ // store, shifted appropriately.
+ APInt Merged =
+ (EarlierValue & ~Mask) | (LaterValue << LShiftAmount);
+ DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *DepWrite
+ << "\n Later: " << *Inst
+ << "\n Merged Value: " << Merged << '\n');
+
+ auto *SI = new StoreInst(
+ ConstantInt::get(Earlier->getValueOperand()->getType(), Merged),
+ Earlier->getPointerOperand(), false, Earlier->getAlignment(),
+ Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite);
+
+ unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa,
+ LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias,
+ LLVMContext::MD_nontemporal};
+ SI->copyMetadata(*DepWrite, MDToKeep);
+ ++NumModifiedStores;
+
+ // Remove earlier, wider, store
+ size_t Idx = InstrOrdering.lookup(DepWrite);
+ InstrOrdering.erase(DepWrite);
+ InstrOrdering.insert(std::make_pair(SI, Idx));
+
+ // Delete the old stores and now-dead instructions that feed them.
+ deleteDeadInstruction(Inst, &BBI, *MD, *TLI, IOL, &InstrOrdering);
+ deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL,
+ &InstrOrdering);
+ MadeChange = true;
+
+ // We erased DepWrite and Inst (Loc); start over.
+ break;
+ }
}
}
-; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+; RUN: opt < %s -basicaa -dse -enable-dse-partial-store-merging=false -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
; Ensure that the dead store is deleted in this case. It is wholely
-; RUN: opt -S -dse < %s | FileCheck %s
+; RUN: opt -S -dse -enable-dse-partial-store-merging=false < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-bgq-linux"
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128"
+
+define void @byte_by_byte_replacement(i32 *%ptr) {
+; CHECK-LABEL: @byte_by_byte_replacement(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 151653132, i32* [[PTR:%.*]]
+; CHECK-NEXT: ret void
+;
+entry:
+ ;; This store's value should be modified as it should be better to use one
+ ;; larger store than several smaller ones.
+ ;; store will turn into 0x090A0B0C == 151653132
+ store i32 305419896, i32* %ptr ; 0x12345678
+ %bptr = bitcast i32* %ptr to i8*
+ %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
+ %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
+ %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+
+ ;; We should be able to merge these four stores with the i32 above
+ ; value (and bytes) stored before ; 0x12345678
+ store i8 9, i8* %bptr ; 09
+ store i8 10, i8* %bptr1 ; 0A
+ store i8 11, i8* %bptr2 ; 0B
+ store i8 12, i8* %bptr3 ; 0C
+ ; 0x090A0B0C
+
+ ret void
+}
+
+define void @word_replacement(i64 *%ptr) {
+; CHECK-LABEL: @word_replacement(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 72638273700655232, i64* [[PTR:%.*]]
+; CHECK-NEXT: ret void
+;
+entry:
+ store i64 72623859790382856, i64* %ptr ; 0x0102030405060708
+
+ %wptr = bitcast i64* %ptr to i16*
+ %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+ %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
+ %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3
+
+ ;; We should be able to merge these two stores with the i64 one above
+ ; value (and bytes) stored before ; 0x0102030405060708
+ store i16 4128, i16* %wptr1 ; 1020
+ store i16 28800, i16* %wptr3 ; 7080
+ ; 0x0102102005067080
+
+ ret void
+}
+
+
+define void @differently_sized_replacements(i64 *%ptr) {
+; CHECK-LABEL: @differently_sized_replacements(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 289077004501059343, i64* [[PTR:%.*]]
+; CHECK-NEXT: ret void
+;
+entry:
+ store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f
+
+ %bptr = bitcast i64* %ptr to i8*
+ %bptr6 = getelementptr inbounds i8, i8* %bptr, i64 6
+ %wptr = bitcast i64* %ptr to i16*
+ %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
+ %dptr = bitcast i64* %ptr to i32*
+
+ ;; We should be able to merge all these stores with the i64 one above
+ ; value (and bytes) stored before ; 0x08090a0b0c0d0e0f
+ store i8 7, i8* %bptr6 ; 07
+ store i16 1541, i16* %wptr2 ; 0605
+ store i32 67305985, i32* %dptr ; 04030201
+ ; 0x040302010605070f
+ ret void
+}
+
+
+define void @multiple_replacements_to_same_byte(i64 *%ptr) {
+; CHECK-LABEL: @multiple_replacements_to_same_byte(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 289077004602248719, i64* [[PTR:%.*]]
+; CHECK-NEXT: ret void
+;
+entry:
+ store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f
+
+ %bptr = bitcast i64* %ptr to i8*
+ %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+ %wptr = bitcast i64* %ptr to i16*
+ %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+ %dptr = bitcast i64* %ptr to i32*
+
+ ;; We should be able to merge all these stores with the i64 one above
+ ; value (and bytes) stored before ; 0x08090a0b0c0d0e0f
+ store i8 7, i8* %bptr3 ; 07
+ store i16 1541, i16* %wptr1 ; 0605
+ store i32 67305985, i32* %dptr ; 04030201
+ ; 0x040302010c0d0e0f
+ ret void
+}
+
+define void @merged_merges(i64 *%ptr) {
+; CHECK-LABEL: @merged_merges(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 289081428418563599, i64* [[PTR:%.*]]
+; CHECK-NEXT: ret void
+;
+entry:
+ store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f
+
+ %bptr = bitcast i64* %ptr to i8*
+ %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+ %wptr = bitcast i64* %ptr to i16*
+ %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+ %dptr = bitcast i64* %ptr to i32*
+
+ ;; We should be able to merge all these stores with the i64 one above
+ ; value (not bytes) stored before ; 0x08090a0b0c0d0e0f
+ store i32 67305985, i32* %dptr ; 04030201
+ store i16 1541, i16* %wptr1 ; 0605
+ store i8 7, i8* %bptr3 ; 07
+ ; 0x040306070c0d0e0f
+ ret void
+}
+
+define signext i8 @shouldnt_merge_since_theres_a_full_overlap(i64 *%ptr) {
+; CHECK-LABEL: @shouldnt_merge_since_theres_a_full_overlap(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BPTR:%.*]] = bitcast i64* [[PTR:%.*]] to i8*
+; CHECK-NEXT: [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1
+; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3
+; CHECK-NEXT: [[DPTR:%.*]] = bitcast i8* [[BPTRM1]] to i32*
+; CHECK-NEXT: [[QPTR:%.*]] = bitcast i8* [[BPTR3]] to i64*
+; CHECK-NEXT: store i32 1234, i32* [[DPTR]], align 1
+; CHECK-NEXT: store i64 5678, i64* [[QPTR]], align 1
+; CHECK-NEXT: ret i8 0
+;
+entry:
+
+ store i64 0, i64* %ptr
+
+ %bptr = bitcast i64* %ptr to i8*
+ %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1
+ %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+ %dptr = bitcast i8* %bptrm1 to i32*
+ %qptr = bitcast i8* %bptr3 to i64*
+
+ store i32 1234, i32* %dptr, align 1
+ store i64 5678, i64* %qptr, align 1
+
+ ret i8 0
+}
+
+;; Test case from PR31777
+%union.U = type { i64 }
+
+define void @foo(%union.U* nocapture %u) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [[UNION_U:%.*]], %union.U* [[U:%.*]], i64 0, i32 0
+; CHECK-NEXT: store i64 11821949021847552, i64* [[I]], align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %i = getelementptr inbounds %union.U, %union.U* %u, i64 0, i32 0
+ store i64 0, i64* %i, align 8
+ %s = bitcast %union.U* %u to i16*
+ store i16 42, i16* %s, align 8
+ ret void
+}
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -dse -enable-dse-partial-store-merging -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
+
+define void @byte_by_byte_replacement(i32 *%ptr) {
+; CHECK-LABEL: @byte_by_byte_replacement(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 202050057, i32* [[PTR:%.*]]
+; CHECK-NEXT: ret void
+;
+entry:
+ ;; This store's value should be modified as it should be better to use one
+ ;; larger store than several smaller ones.
+ ;; store will turn into 0x0C0B0A09 == 202050057
+ store i32 305419896, i32* %ptr ; 0x12345678
+ %bptr = bitcast i32* %ptr to i8*
+ %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
+ %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
+ %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+
+ ;; We should be able to merge these four stores with the i32 above
+ ; value (and bytes) stored before ; 0x12345678
+ store i8 9, i8* %bptr ; 09
+ store i8 10, i8* %bptr1 ; 0A
+ store i8 11, i8* %bptr2 ; 0B
+ store i8 12, i8* %bptr3 ; 0C
+ ; 0x0C0B0A09
+ ret void
+}
+
+define void @word_replacement(i64 *%ptr) {
+; CHECK-LABEL: @word_replacement(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 8106482645252179720, i64* [[PTR:%.*]]
+; CHECK-NEXT: ret void
+;
+entry:
+ store i64 72623859790382856, i64* %ptr ; 0x0102030405060708
+
+ %wptr = bitcast i64* %ptr to i16*
+ %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+ %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
+ %wptr3 = getelementptr inbounds i16, i16* %wptr, i64 3
+
+ ;; We should be able to merge these two stores with the i64 one above
+ ; value (not bytes) stored before ; 0x0102030405060708
+ store i16 4128, i16* %wptr1 ; 1020
+ store i16 28800, i16* %wptr3 ; 7080
+ ; 0x7080030410200708
+ ret void
+}
+
+
+define void @differently_sized_replacements(i64 *%ptr) {
+; CHECK-LABEL: @differently_sized_replacements(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 578437695752307201, i64* [[PTR:%.*]]
+; CHECK-NEXT: ret void
+;
+entry:
+ store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f
+
+ %bptr = bitcast i64* %ptr to i8*
+ %bptr6 = getelementptr inbounds i8, i8* %bptr, i64 6
+ %wptr = bitcast i64* %ptr to i16*
+ %wptr2 = getelementptr inbounds i16, i16* %wptr, i64 2
+ %dptr = bitcast i64* %ptr to i32*
+
+ ;; We should be able to merge all these stores with the i64 one above
+ ; value (not bytes) stored before ; 0x08090a0b0c0d0e0f
+ store i8 7, i8* %bptr6 ; 07
+ store i16 1541, i16* %wptr2 ; 0605
+ store i32 67305985, i32* %dptr ; 04030201
+ ; 0x0807060504030201
+ ret void
+}
+
+
+define void @multiple_replacements_to_same_byte(i64 *%ptr) {
+; CHECK-LABEL: @multiple_replacements_to_same_byte(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 579005069522043393, i64* [[PTR:%.*]]
+; CHECK-NEXT: ret void
+;
+entry:
+ store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f
+
+ %bptr = bitcast i64* %ptr to i8*
+ %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+ %wptr = bitcast i64* %ptr to i16*
+ %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+ %dptr = bitcast i64* %ptr to i32*
+
+ ;; We should be able to merge all these stores with the i64 one above
+ ; value (not bytes) stored before ; 0x08090a0b0c0d0e0f
+ store i8 7, i8* %bptr3 ; 07
+ store i16 1541, i16* %wptr1 ; 0605
+ store i32 67305985, i32* %dptr ; 04030201
+ ; 0x08090a0b04030201
+ ret void
+}
+
+define void @merged_merges(i64 *%ptr) {
+; CHECK-LABEL: @merged_merges(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i64 579005069572506113, i64* [[PTR:%.*]]
+; CHECK-NEXT: ret void
+;
+entry:
+ store i64 579005069656919567, i64* %ptr ; 0x08090a0b0c0d0e0f
+
+ %bptr = bitcast i64* %ptr to i8*
+ %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+ %wptr = bitcast i64* %ptr to i16*
+ %wptr1 = getelementptr inbounds i16, i16* %wptr, i64 1
+ %dptr = bitcast i64* %ptr to i32*
+
+ ;; We should be able to merge all these stores with the i64 one above
+ ; value (not bytes) stored before ; 0x08090a0b0c0d0e0f
+ store i32 67305985, i32* %dptr ; 04030201
+ store i16 1541, i16* %wptr1 ; 0605
+ store i8 7, i8* %bptr3 ; 07
+ ; 0x08090a0b07050201
+ ret void
+}
+
+define signext i8 @shouldnt_merge_since_theres_a_full_overlap(i64 *%ptr) {
+; CHECK-LABEL: @shouldnt_merge_since_theres_a_full_overlap(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BPTR:%.*]] = bitcast i64* [[PTR:%.*]] to i8*
+; CHECK-NEXT: [[BPTRM1:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 -1
+; CHECK-NEXT: [[BPTR3:%.*]] = getelementptr inbounds i8, i8* [[BPTR]], i64 3
+; CHECK-NEXT: [[DPTR:%.*]] = bitcast i8* [[BPTRM1]] to i32*
+; CHECK-NEXT: [[QPTR:%.*]] = bitcast i8* [[BPTR3]] to i64*
+; CHECK-NEXT: store i32 1234, i32* [[DPTR]], align 1
+; CHECK-NEXT: store i64 5678, i64* [[QPTR]], align 1
+; CHECK-NEXT: ret i8 0
+;
+entry:
+
+ ; Also check that alias.scope metadata doesn't get dropped
+ store i64 0, i64* %ptr, !alias.scope !32
+
+ %bptr = bitcast i64* %ptr to i8*
+ %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1
+ %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+ %dptr = bitcast i8* %bptrm1 to i32*
+ %qptr = bitcast i8* %bptr3 to i64*
+
+ store i32 1234, i32* %dptr, align 1
+ store i64 5678, i64* %qptr, align 1
+
+ ret i8 0
+}
+
+;; Test case from PR31777
+%union.U = type { i64 }
+
+define void @foo(%union.U* nocapture %u) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [[UNION_U:%.*]], %union.U* [[U:%.*]], i64 0, i32 0
+; CHECK-NEXT: store i64 42, i64* [[I]], align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %i = getelementptr inbounds %union.U, %union.U* %u, i64 0, i32 0
+ store i64 0, i64* %i, align 8, !dbg !22, !tbaa !26, !noalias !30, !nontemporal !29
+ %s = bitcast %union.U* %u to i16*
+ store i16 42, i16* %s, align 8
+ ret void
+}
+
+; Don't crash by operating on stale data if we merge (kill) the last 2 stores.
+
+define void @PR34074(i32* %x, i64* %y) {
+; CHECK-LABEL: @PR34074(
+; CHECK-NEXT: store i64 42, i64* %y
+; CHECK-NEXT: store i32 4, i32* %x
+; CHECK-NEXT: ret void
+;
+ store i64 42, i64* %y ; independent store
+ %xbc = bitcast i32* %x to i8*
+ store i32 0, i32* %x ; big store of constant
+ store i8 4, i8* %xbc ; small store with mergeable constant
+ ret void
+}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 5.0.0 (trunk 306512)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "me.cpp", directory: "/compiler-explorer")
+!2 = !{}
+!7 = distinct !DISubprogram(name: "foo", linkageName: "foo(U*)", scope: !1, file: !1, line: 9, type: !8, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !20)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null, !10}
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64)
+!11 = distinct !DICompositeType(tag: DW_TAG_union_type, name: "U", file: !1, line: 4, size: 64, elements: !12, identifier: "typeinfo name for U")
+!12 = !{!13, !17}
+!13 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !11, file: !1, line: 5, baseType: !14, size: 64)
+!14 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", file: !15, line: 55, baseType: !16)
+!15 = !DIFile(filename: "/usr/include/stdint.h", directory: "/compiler-explorer")
+!16 = !DIBasicType(name: "long unsigned int", size: 64, encoding: DW_ATE_unsigned)
+!17 = !DIDerivedType(tag: DW_TAG_member, name: "s", scope: !11, file: !1, line: 6, baseType: !18, size: 16)
+!18 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint16_t", file: !15, line: 49, baseType: !19)
+!19 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned)
+!20 = !{!21}
+!21 = !DILocalVariable(name: "u", arg: 1, scope: !7, file: !1, line: 9, type: !10)
+!22 = !DILocation(line: 10, column: 8, scope: !7)
+
+!26 = !{!27, !27, i64 0}
+!27 = !{!"omnipotent char", !28, i64 0}
+!28 = !{!"Simple C++ TBAA"}
+
+!29 = !{i32 1}
+
+; Domains and scopes which might alias
+!30 = !{!30}
+!31 = !{!31, !30}
+
+!32 = !{!32}
+!33 = !{!33, !32}