if (!OtherBr || BBI == OtherBB->begin())
return false;
+ auto OtherStoreIsMergeable = [&](StoreInst *OtherStore) -> bool {
+ if (!OtherStore ||
+ OtherStore->getPointerOperand() != SI.getPointerOperand())
+ return false;
+
+ auto *SIVTy = SI.getValueOperand()->getType();
+ auto *OSVTy = OtherStore->getValueOperand()->getType();
+ return CastInst::isBitOrNoopPointerCastable(OSVTy, SIVTy, DL) &&
+ SI.hasSameSpecialState(OtherStore);
+ };
+
// If the other block ends in an unconditional branch, check for the 'if then
// else' case. There is an instruction before the branch.
StoreInst *OtherStore = nullptr;
// If this isn't a store, isn't a store to the same location, or is not the
// right kind of store, bail out.
OtherStore = dyn_cast<StoreInst>(BBI);
- if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
- !SI.isSameOperationAs(OtherStore))
+ if (!OtherStoreIsMergeable(OtherStore))
return false;
} else {
// Otherwise, the other block ended with a conditional branch. If one of the
// lives in OtherBB.
for (;; --BBI) {
// Check to see if we find the matching store.
- if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
- if (OtherStore->getOperand(1) != SI.getOperand(1) ||
- !SI.isSameOperationAs(OtherStore))
- return false;
+ OtherStore = dyn_cast<StoreInst>(BBI);
+ if (OtherStoreIsMergeable(OtherStore))
break;
- }
+
// If we find something that may be using or overwriting the stored
// value, or if we run out of instructions, we can't do the transform.
if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
}
// Insert a PHI node now if we need it.
- Value *MergedVal = OtherStore->getOperand(0);
+ Value *MergedVal = OtherStore->getValueOperand();
// The debug locations of the original instructions might differ. Merge them.
DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(),
OtherStore->getDebugLoc());
- if (MergedVal != SI.getOperand(0)) {
- PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
- PN->addIncoming(SI.getOperand(0), SI.getParent());
- PN->addIncoming(OtherStore->getOperand(0), OtherBB);
+ if (MergedVal != SI.getValueOperand()) {
+ PHINode *PN =
+ PHINode::Create(SI.getValueOperand()->getType(), 2, "storemerge");
+ PN->addIncoming(SI.getValueOperand(), SI.getParent());
+ Builder.SetInsertPoint(OtherStore);
+ PN->addIncoming(Builder.CreateBitOrPointerCast(MergedVal, PN->getType()),
+ OtherBB);
MergedVal = InsertNewInstBefore(PN, DestBB->front());
PN->setDebugLoc(MergedLoc);
}
bb12: ; preds = %bb10, %bb9
ret void
}
+
+define half @diff_types_same_width_merge(i1 %cond, half %a, i16 %b) {
+; CHECK-LABEL: @diff_types_same_width_merge(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK: BB0:
+; CHECK-NEXT: br label [[SINK:%.*]]
+; CHECK: BB1:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16 [[B:%.*]] to half
+; CHECK-NEXT: br label [[SINK]]
+; CHECK: sink:
+; CHECK-NEXT: [[STOREMERGE:%.*]] = phi half [ [[TMP0]], [[BB1]] ], [ [[A:%.*]], [[BB0]] ]
+; CHECK-NEXT: ret half [[STOREMERGE]]
+;
+entry:
+ %alloca = alloca half
+ br i1 %cond, label %BB0, label %BB1
+BB0:
+ store half %a, ptr %alloca
+ br label %sink
+BB1:
+ store i16 %b, ptr %alloca
+ br label %sink
+sink:
+ %val = load half, ptr %alloca
+ ret half %val
+}
+
+define i32 @diff_types_diff_width_no_merge(i1 %cond, i32 %a, i64 %b) {
+; CHECK-LABEL: @diff_types_diff_width_no_merge(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i64, align 8
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]]
+; CHECK: A:
+; CHECK-NEXT: store i32 [[A:%.*]], ptr [[ALLOCA]], align 8
+; CHECK-NEXT: br label [[SINK:%.*]]
+; CHECK: B:
+; CHECK-NEXT: store i64 [[B:%.*]], ptr [[ALLOCA]], align 8
+; CHECK-NEXT: br label [[SINK]]
+; CHECK: sink:
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[ALLOCA]], align 8
+; CHECK-NEXT: ret i32 [[VAL]]
+;
+entry:
+ %alloca = alloca i64
+ br i1 %cond, label %A, label %B
+A:
+ store i32 %a, ptr %alloca
+ br label %sink
+B:
+ store i64 %b, ptr %alloca
+ br label %sink
+sink:
+ %val = load i32, ptr %alloca
+ ret i32 %val
+}
+
+define <4 x i32> @vec_no_merge(i1 %cond, <2 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @vec_no_merge(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i64, align 16
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]]
+; CHECK: A:
+; CHECK-NEXT: store <2 x i32> [[A:%.*]], ptr [[ALLOCA]], align 16
+; CHECK-NEXT: br label [[SINK:%.*]]
+; CHECK: B:
+; CHECK-NEXT: store <4 x i32> [[B:%.*]], ptr [[ALLOCA]], align 16
+; CHECK-NEXT: br label [[SINK]]
+; CHECK: sink:
+; CHECK-NEXT: [[VAL:%.*]] = load <4 x i32>, ptr [[ALLOCA]], align 16
+; CHECK-NEXT: ret <4 x i32> [[VAL]]
+;
+entry:
+ %alloca = alloca i64
+ br i1 %cond, label %A, label %B
+A:
+ store <2 x i32> %a, ptr %alloca
+ br label %sink
+B:
+ store <4 x i32> %b, ptr %alloca
+ br label %sink
+sink:
+ %val = load <4 x i32>, ptr %alloca
+ ret <4 x i32> %val
+}
+
+%struct.half = type { half };
+
+define %struct.half @one_elem_struct_merge(i1 %cond, %struct.half %a, half %b) {
+; CHECK-LABEL: @one_elem_struct_merge(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK: BB0:
+; CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_HALF:%.*]] [[A:%.*]], 0
+; CHECK-NEXT: br label [[SINK:%.*]]
+; CHECK: BB1:
+; CHECK-NEXT: br label [[SINK]]
+; CHECK: sink:
+; CHECK-NEXT: [[STOREMERGE:%.*]] = phi half [ [[TMP0]], [[BB0]] ], [ [[B:%.*]], [[BB1]] ]
+; CHECK-NEXT: [[VAL1:%.*]] = insertvalue [[STRUCT_HALF]] poison, half [[STOREMERGE]], 0
+; CHECK-NEXT: ret [[STRUCT_HALF]] [[VAL1]]
+;
+entry:
+ %alloca = alloca i64
+ br i1 %cond, label %BB0, label %BB1
+BB0:
+ store %struct.half %a, ptr %alloca
+ br label %sink
+BB1:
+ store half %b, ptr %alloca
+ br label %sink
+sink:
+ %val = load %struct.half, ptr %alloca
+ ret %struct.half %val
+}
+
+%struct.tup = type { half, i32 };
+
+define %struct.tup @multi_elem_struct_no_merge(i1 %cond, %struct.tup %a, half %b) {
+; CHECK-LABEL: @multi_elem_struct_no_merge(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i64, align 8
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]]
+; CHECK: A:
+; CHECK-NEXT: store [[STRUCT_TUP:%.*]] [[A:%.*]], ptr [[ALLOCA]], align 8
+; CHECK-NEXT: br label [[SINK:%.*]]
+; CHECK: B:
+; CHECK-NEXT: store half [[B:%.*]], ptr [[ALLOCA]], align 8
+; CHECK-NEXT: br label [[SINK]]
+; CHECK: sink:
+; CHECK-NEXT: [[VAL:%.*]] = load [[STRUCT_TUP]], ptr [[ALLOCA]], align 8
+; CHECK-NEXT: ret [[STRUCT_TUP]] [[VAL]]
+;
+entry:
+ %alloca = alloca i64
+ br i1 %cond, label %A, label %B
+A:
+ store %struct.tup %a, ptr %alloca
+ br label %sink
+B:
+ store half %b, ptr %alloca
+ br label %sink
+sink:
+ %val = load %struct.tup, ptr %alloca
+ ret %struct.tup %val
+}
+
+define i16 @same_types_diff_align_no_merge(i1 %cond, i16 %a, i16 %b) {
+; CHECK-LABEL: @same_types_diff_align_no_merge(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i16, align 4
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK: BB0:
+; CHECK-NEXT: store i16 [[A:%.*]], ptr [[ALLOCA]], align 8
+; CHECK-NEXT: br label [[SINK:%.*]]
+; CHECK: BB1:
+; CHECK-NEXT: store i16 [[B:%.*]], ptr [[ALLOCA]], align 4
+; CHECK-NEXT: br label [[SINK]]
+; CHECK: sink:
+; CHECK-NEXT: [[VAL:%.*]] = load i16, ptr [[ALLOCA]], align 4
+; CHECK-NEXT: ret i16 [[VAL]]
+;
+entry:
+ %alloca = alloca i16, align 4
+ br i1 %cond, label %BB0, label %BB1
+BB0:
+ store i16 %a, ptr %alloca, align 8
+ br label %sink
+BB1:
+ store i16 %b, ptr %alloca, align 4
+ br label %sink
+sink:
+ %val = load i16, ptr %alloca
+ ret i16 %val
+}
+
+define i64 @ptrtoint_merge(i1 %cond, i64 %a, ptr %b) {
+; CHECK-LABEL: @ptrtoint_merge(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK: BB0:
+; CHECK-NEXT: br label [[SINK:%.*]]
+; CHECK: BB1:
+; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[B:%.*]] to i64
+; CHECK-NEXT: br label [[SINK]]
+; CHECK: sink:
+; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i64 [ [[A:%.*]], [[BB0]] ], [ [[TMP0]], [[BB1]] ]
+; CHECK-NEXT: ret i64 [[STOREMERGE]]
+;
+entry:
+ %alloca = alloca ptr
+ br i1 %cond, label %BB0, label %BB1
+BB0:
+ store i64 %a, ptr %alloca
+ br label %sink
+BB1:
+ store ptr %b, ptr %alloca
+ br label %sink
+sink:
+ %val = load i64, ptr %alloca
+ ret i64 %val
+}
+
+define ptr @inttoptr_merge(i1 %cond, i64 %a, ptr %b) {
+; CHECK-LABEL: define ptr @inttoptr_merge
+; CHECK-SAME: (i1 [[COND:%.*]], i64 [[A:%.*]], ptr [[B:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK: BB0:
+; CHECK-NEXT: [[TMP0:%.*]] = inttoptr i64 [[A]] to ptr
+; CHECK-NEXT: br label [[SINK:%.*]]
+; CHECK: BB1:
+; CHECK-NEXT: br label [[SINK]]
+; CHECK: sink:
+; CHECK-NEXT: [[STOREMERGE:%.*]] = phi ptr [ [[B]], [[BB1]] ], [ [[TMP0]], [[BB0]] ]
+; CHECK-NEXT: ret ptr [[STOREMERGE]]
+;
+entry:
+ %alloca = alloca ptr
+ br i1 %cond, label %BB0, label %BB1
+BB0:
+ store i64 %a, ptr %alloca, align 8
+ br label %sink
+BB1:
+ store ptr %b, ptr %alloca, align 8
+ br label %sink
+sink:
+ %val = load ptr, ptr %alloca
+ ret ptr %val
+}