// CHECK-LABEL: define{{.*}} %struct.int8x16x2_t @test_vld2q_lane_s8(i8* noundef %ptr, [2 x <16 x i8>] %src.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
// CHECK: [[SRC:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[SRC]], i32 0, i32 0
// CHECK: store [2 x <16 x i8>] [[SRC]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint8x16x2_t @test_vld2q_lane_u8(i8* noundef %ptr, [2 x <16 x i8>] %src.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
// CHECK: [[SRC:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[SRC]], i32 0, i32 0
// CHECK: store [2 x <16 x i8>] [[SRC]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly8x16x2_t @test_vld2q_lane_p8(i8* noundef %ptr, [2 x <16 x i8>] %src.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
// CHECK: [[SRC:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[SRC]], i32 0, i32 0
// CHECK: store [2 x <16 x i8>] [[SRC]].coerce, [2 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int8x16x3_t @test_vld3q_lane_s8(i8* noundef %ptr, [3 x <16 x i8>] %src.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
// CHECK: [[SRC:%.*]] = alloca %struct.int8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[SRC]], i32 0, i32 0
// CHECK: store [3 x <16 x i8>] [[SRC]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint8x16x3_t @test_vld3q_lane_u8(i8* noundef %ptr, [3 x <16 x i8>] %src.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
// CHECK: [[SRC:%.*]] = alloca %struct.uint8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[SRC]], i32 0, i32 0
// CHECK: store [3 x <16 x i8>] [[SRC]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint16x8x2_t @test_vld2q_lane_u16(i16* noundef %a, [2 x <8 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint32x4x2_t @test_vld2q_lane_u32(i32* noundef %a, [2 x <4 x i32>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint64x2x2_t @test_vld2q_lane_u64(i64* noundef %a, [2 x <2 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, %struct.uint64x2x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int16x8x2_t @test_vld2q_lane_s16(i16* noundef %a, [2 x <8 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int32x4x2_t @test_vld2q_lane_s32(i32* noundef %a, [2 x <4 x i32>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <4 x i32>] [[B]].coerce, [2 x <4 x i32>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int64x2x2_t @test_vld2q_lane_s64(i64* noundef %a, [2 x <2 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, %struct.int64x2x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float16x8x2_t @test_vld2q_lane_f16(half* noundef %a, [2 x <8 x half>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <8 x half>] [[B]].coerce, [2 x <8 x half>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float32x4x2_t @test_vld2q_lane_f32(float* noundef %a, [2 x <4 x float>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <4 x float>] [[B]].coerce, [2 x <4 x float>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float64x2x2_t @test_vld2q_lane_f64(double* noundef %a, [2 x <2 x double>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, %struct.float64x2x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <2 x double>] [[B]].coerce, [2 x <2 x double>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly16x8x2_t @test_vld2q_lane_p16(i16* noundef %a, [2 x <8 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <8 x i16>] [[B]].coerce, [2 x <8 x i16>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly64x2x2_t @test_vld2q_lane_p64(i64* noundef %a, [2 x <2 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <2 x i64>] [[B]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint8x8x2_t @test_vld2_lane_u8(i8* noundef %a, [2 x <8 x i8>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint16x4x2_t @test_vld2_lane_u16(i16* noundef %a, [2 x <4 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint32x2x2_t @test_vld2_lane_u32(i32* noundef %a, [2 x <2 x i32>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint64x1x2_t @test_vld2_lane_u64(i64* noundef %a, [2 x <1 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int8x8x2_t @test_vld2_lane_s8(i8* noundef %a, [2 x <8 x i8>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int16x4x2_t @test_vld2_lane_s16(i16* noundef %a, [2 x <4 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int32x2x2_t @test_vld2_lane_s32(i32* noundef %a, [2 x <2 x i32>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <2 x i32>] [[B]].coerce, [2 x <2 x i32>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int64x1x2_t @test_vld2_lane_s64(i64* noundef %a, [2 x <1 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float16x4x2_t @test_vld2_lane_f16(half* noundef %a, [2 x <4 x half>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <4 x half>] [[B]].coerce, [2 x <4 x half>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float32x2x2_t @test_vld2_lane_f32(float* noundef %a, [2 x <2 x float>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <2 x float>] [[B]].coerce, [2 x <2 x float>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float64x1x2_t @test_vld2_lane_f64(double* noundef %a, [2 x <1 x double>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, %struct.float64x1x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <1 x double>] [[B]].coerce, [2 x <1 x double>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly8x8x2_t @test_vld2_lane_p8(i8* noundef %a, [2 x <8 x i8>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <8 x i8>] [[B]].coerce, [2 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly16x4x2_t @test_vld2_lane_p16(i16* noundef %a, [2 x <4 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <4 x i16>] [[B]].coerce, [2 x <4 x i16>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly64x1x2_t @test_vld2_lane_p64(i64* noundef %a, [2 x <1 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[B]], i32 0, i32 0
// CHECK: store [2 x <1 x i64>] [[B]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint16x8x3_t @test_vld3q_lane_u16(i16* noundef %a, [3 x <8 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint32x4x3_t @test_vld3q_lane_u32(i32* noundef %a, [3 x <4 x i32>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint64x2x3_t @test_vld3q_lane_u64(i64* noundef %a, [3 x <2 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, %struct.uint64x2x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int16x8x3_t @test_vld3q_lane_s16(i16* noundef %a, [3 x <8 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int32x4x3_t @test_vld3q_lane_s32(i32* noundef %a, [3 x <4 x i32>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <4 x i32>] [[B]].coerce, [3 x <4 x i32>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int64x2x3_t @test_vld3q_lane_s64(i64* noundef %a, [3 x <2 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, %struct.int64x2x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float16x8x3_t @test_vld3q_lane_f16(half* noundef %a, [3 x <8 x half>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <8 x half>] [[B]].coerce, [3 x <8 x half>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float32x4x3_t @test_vld3q_lane_f32(float* noundef %a, [3 x <4 x float>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <4 x float>] [[B]].coerce, [3 x <4 x float>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float64x2x3_t @test_vld3q_lane_f64(double* noundef %a, [3 x <2 x double>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, %struct.float64x2x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <2 x double>] [[B]].coerce, [3 x <2 x double>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly8x16x3_t @test_vld3q_lane_p8(i8* noundef %a, [3 x <16 x i8>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <16 x i8>] [[B]].coerce, [3 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly16x8x3_t @test_vld3q_lane_p16(i16* noundef %a, [3 x <8 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <8 x i16>] [[B]].coerce, [3 x <8 x i16>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly64x2x3_t @test_vld3q_lane_p64(i64* noundef %a, [3 x <2 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <2 x i64>] [[B]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint8x8x3_t @test_vld3_lane_u8(i8* noundef %a, [3 x <8 x i8>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint16x4x3_t @test_vld3_lane_u16(i16* noundef %a, [3 x <4 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint32x2x3_t @test_vld3_lane_u32(i32* noundef %a, [3 x <2 x i32>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint64x1x3_t @test_vld3_lane_u64(i64* noundef %a, [3 x <1 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int8x8x3_t @test_vld3_lane_s8(i8* noundef %a, [3 x <8 x i8>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int16x4x3_t @test_vld3_lane_s16(i16* noundef %a, [3 x <4 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int32x2x3_t @test_vld3_lane_s32(i32* noundef %a, [3 x <2 x i32>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <2 x i32>] [[B]].coerce, [3 x <2 x i32>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int64x1x3_t @test_vld3_lane_s64(i64* noundef %a, [3 x <1 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float16x4x3_t @test_vld3_lane_f16(half* noundef %a, [3 x <4 x half>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <4 x half>] [[B]].coerce, [3 x <4 x half>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float32x2x3_t @test_vld3_lane_f32(float* noundef %a, [3 x <2 x float>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <2 x float>] [[B]].coerce, [3 x <2 x float>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float64x1x3_t @test_vld3_lane_f64(double* noundef %a, [3 x <1 x double>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, %struct.float64x1x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <1 x double>] [[B]].coerce, [3 x <1 x double>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly8x8x3_t @test_vld3_lane_p8(i8* noundef %a, [3 x <8 x i8>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <8 x i8>] [[B]].coerce, [3 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly16x4x3_t @test_vld3_lane_p16(i16* noundef %a, [3 x <4 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <4 x i16>] [[B]].coerce, [3 x <4 x i16>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly64x1x3_t @test_vld3_lane_p64(i64* noundef %a, [3 x <1 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[B]], i32 0, i32 0
// CHECK: store [3 x <1 x i64>] [[B]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint8x16x4_t @test_vld4q_lane_u8(i8* noundef %a, [4 x <16 x i8>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint16x8x4_t @test_vld4q_lane_u16(i16* noundef %a, [4 x <8 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint32x4x4_t @test_vld4q_lane_u32(i32* noundef %a, [4 x <4 x i32>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint64x2x4_t @test_vld4q_lane_u64(i64* noundef %a, [4 x <2 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, %struct.uint64x2x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int8x16x4_t @test_vld4q_lane_s8(i8* noundef %a, [4 x <16 x i8>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int16x8x4_t @test_vld4q_lane_s16(i16* noundef %a, [4 x <8 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int32x4x4_t @test_vld4q_lane_s32(i32* noundef %a, [4 x <4 x i32>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <4 x i32>] [[B]].coerce, [4 x <4 x i32>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int64x2x4_t @test_vld4q_lane_s64(i64* noundef %a, [4 x <2 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, %struct.int64x2x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float16x8x4_t @test_vld4q_lane_f16(half* noundef %a, [4 x <8 x half>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <8 x half>] [[B]].coerce, [4 x <8 x half>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float32x4x4_t @test_vld4q_lane_f32(float* noundef %a, [4 x <4 x float>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <4 x float>] [[B]].coerce, [4 x <4 x float>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float64x2x4_t @test_vld4q_lane_f64(double* noundef %a, [4 x <2 x double>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, %struct.float64x2x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <2 x double>] [[B]].coerce, [4 x <2 x double>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x2x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly8x16x4_t @test_vld4q_lane_p8(i8* noundef %a, [4 x <16 x i8>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <16 x i8>] [[B]].coerce, [4 x <16 x i8>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly16x8x4_t @test_vld4q_lane_p16(i16* noundef %a, [4 x <8 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <8 x i16>] [[B]].coerce, [4 x <8 x i16>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly64x2x4_t @test_vld4q_lane_p64(i64* noundef %a, [4 x <2 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <2 x i64>] [[B]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint8x8x4_t @test_vld4_lane_u8(i8* noundef %a, [4 x <8 x i8>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint16x4x4_t @test_vld4_lane_u16(i16* noundef %a, [4 x <4 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint32x2x4_t @test_vld4_lane_u32(i32* noundef %a, [4 x <2 x i32>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.uint64x1x4_t @test_vld4_lane_u64(i64* noundef %a, [4 x <1 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int8x8x4_t @test_vld4_lane_s8(i8* noundef %a, [4 x <8 x i8>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int16x4x4_t @test_vld4_lane_s16(i16* noundef %a, [4 x <4 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int32x2x4_t @test_vld4_lane_s32(i32* noundef %a, [4 x <2 x i32>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <2 x i32>] [[B]].coerce, [4 x <2 x i32>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.int64x1x4_t @test_vld4_lane_s64(i64* noundef %a, [4 x <1 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float16x4x4_t @test_vld4_lane_f16(half* noundef %a, [4 x <4 x half>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <4 x half>] [[B]].coerce, [4 x <4 x half>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float32x2x4_t @test_vld4_lane_f32(float* noundef %a, [4 x <2 x float>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <2 x float>] [[B]].coerce, [4 x <2 x float>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.float64x1x4_t @test_vld4_lane_f64(double* noundef %a, [4 x <1 x double>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, %struct.float64x1x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <1 x double>] [[B]].coerce, [4 x <1 x double>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.float64x1x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly8x8x4_t @test_vld4_lane_p8(i8* noundef %a, [4 x <8 x i8>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <8 x i8>] [[B]].coerce, [4 x <8 x i8>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly16x4x4_t @test_vld4_lane_p16(i16* noundef %a, [4 x <4 x i16>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <4 x i16>] [[B]].coerce, [4 x <4 x i16>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
// CHECK-LABEL: define{{.*}} %struct.poly64x1x4_t @test_vld4_lane_p64(i64* noundef %a, [4 x <1 x i64>] %b.coerce) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
// CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[B]], i32 0, i32 0
// CHECK: store [4 x <1 x i64>] [[B]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8*
// CHECK-LABEL: @test_vld2q_lane_u16(
// CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld2q_lane_u32(
// CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld2q_lane_s16(
// CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld2q_lane_s32(
// CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld2q_lane_f16(
// CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld2q_lane_f32(
// CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld2q_lane_p16(
// CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld2_lane_u8(
// CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld2_lane_u16(
// CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld2_lane_u32(
// CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld2_lane_s8(
// CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld2_lane_s16(
// CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld2_lane_s32(
// CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld2_lane_f16(
// CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]*
// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld2_lane_f32(
// CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]*
// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld2_lane_p8(
// CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld2_lane_p16(
// CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
// CHECK: store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld3q_lane_u16(
// CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld3q_lane_u32(
// CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld3q_lane_s16(
// CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld3q_lane_s32(
// CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld3q_lane_f16(
// CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld3q_lane_f32(
// CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld3q_lane_p16(
// CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
// CHECK: store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld3_lane_u8(
// CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld3_lane_u16(
// CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld3_lane_u32(
// CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld3_lane_s8(
// CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld3_lane_s16(
// CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld3_lane_s32(
// CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld3_lane_f16(
// CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]*
// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld3_lane_f32(
// CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]*
// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld3_lane_p8(
// CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld3_lane_p16(
// CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
// CHECK: store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld4q_lane_u16(
// CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld4q_lane_u32(
// CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld4q_lane_s16(
// CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld4q_lane_s32(
// CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld4q_lane_f16(
// CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld4q_lane_f32(
// CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld4q_lane_p16(
// CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
// CHECK: store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
// CHECK-LABEL: @test_vld4_lane_u8(
// CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld4_lane_u16(
// CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld4_lane_u32(
// CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld4_lane_s8(
// CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld4_lane_s16(
// CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld4_lane_s32(
// CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld4_lane_f16(
// CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]*
// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld4_lane_f32(
// CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]*
// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld4_lane_p8(
// CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
// CHECK-LABEL: @test_vld4_lane_p16(
// CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
+// CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
// CHECK: [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
// CHECK: store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8