--- /dev/null
+; RUN: opt -passes=slp-vectorizer -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck %s
+
+define <4 x half> @phis(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
+; CHECK-LABEL: @phis(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 0
+; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x half> [[IN1]], i64 1
+; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1]], i64 2
+; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A0]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A1]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[A2]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[A3]], i32 1
+; CHECK-NEXT: br i1 [[CMP:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
+; CHECK: bb0:
+; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 0
+; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x half> [[IN2]], i64 1
+; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2]], i64 2
+; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B0]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B1]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x half> poison, half [[B2]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x half> [[TMP6]], half [[B3]], i32 1
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x half> [ [[TMP1]], %entry ], [ [[TMP5]], %bb0 ]
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x half> [ [[TMP3]], %entry ], [ [[TMP7]], %bb0 ]
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x half> [[TMP9]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: ret <4 x half> [[TMP12]]
+entry:
+ %a0 = extractelement <4 x half> %in1, i64 0
+ %a1 = extractelement <4 x half> %in1, i64 1
+ %a2 = extractelement <4 x half> %in1, i64 2
+ %a3 = extractelement <4 x half> %in1, i64 3
+ br i1 %cmp1, label %bb1, label %bb0
+
+bb0:
+ %b0 = extractelement <4 x half> %in2, i64 0
+ %b1 = extractelement <4 x half> %in2, i64 1
+ %b2 = extractelement <4 x half> %in2, i64 2
+ %b3 = extractelement <4 x half> %in2, i64 3
+ br label %bb1
+
+bb1:
+ %c0 = phi half [ %a0, %entry ], [ %b0, %bb0 ]
+ %c1 = phi half [ %a1, %entry ], [ %b1, %bb0 ]
+ %c2 = phi half [ %a2, %entry ], [ %b2, %bb0 ]
+ %c3 = phi half [ %a3, %entry ], [ %b3, %bb0 ]
+
+ %o0 = insertelement <4 x half> undef, half %c0, i64 0
+ %o1 = insertelement <4 x half> %o0, half %c1, i64 1
+ %o2 = insertelement <4 x half> %o1, half %c2, i64 2
+ %o3 = insertelement <4 x half> %o2, half %c3, i64 3
+ ret <4 x half> %o3
+}
+
+define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
+; CHECK-LABEL: @phis_reverse(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 0
+; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x half> [[IN1]], i64 1
+; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1]], i64 2
+; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x half> poison, half [[A1]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x half> [[TMP0]], half [[A0]], i32 1
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x half> poison, half [[A2]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x half> [[TMP2]], half [[A3]], i32 1
+; CHECK-NEXT: br i1 [[CMP:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
+; CHECK: bb0:
+; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 0
+; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x half> [[IN2]], i64 1
+; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2]], i64 2
+; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[B1]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x half> [[TMP4]], half [[B0]], i32 1
+; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x half> poison, half [[B2]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x half> [[TMP6]], half [[B3]], i32 1
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x half> [ [[TMP1]], %entry ], [ [[TMP5]], %bb0 ]
+; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x half> [ [[TMP3]], %entry ], [ [[TMP7]], %bb0 ]
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x half> [[TMP8]], <2 x half> poison, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x half> [[TMP9]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: ret <4 x half> [[TMP12]]
+entry:
+ %a0 = extractelement <4 x half> %in1, i64 0
+ %a1 = extractelement <4 x half> %in1, i64 1
+ %a2 = extractelement <4 x half> %in1, i64 2
+ %a3 = extractelement <4 x half> %in1, i64 3
+ br i1 %cmp1, label %bb1, label %bb0
+
+bb0:
+ %b0 = extractelement <4 x half> %in2, i64 0
+ %b1 = extractelement <4 x half> %in2, i64 1
+ %b2 = extractelement <4 x half> %in2, i64 2
+ %b3 = extractelement <4 x half> %in2, i64 3
+ br label %bb1
+
+bb1:
+ %c3 = phi half [ %a3, %entry ], [ %b3, %bb0 ]
+ %c2 = phi half [ %a2, %entry ], [ %b2, %bb0 ]
+ %c1 = phi half [ %a1, %entry ], [ %b1, %bb0 ]
+ %c0 = phi half [ %a0, %entry ], [ %b0, %bb0 ]
+
+ %o0 = insertelement <4 x half> undef, half %c0, i64 0
+ %o1 = insertelement <4 x half> %o0, half %c1, i64 1
+ %o2 = insertelement <4 x half> %o1, half %c2, i64 2
+ %o3 = insertelement <4 x half> %o2, half %c3, i64 3
+ ret <4 x half> %o3
+}