From 6491e0165e96a93960e2dfb338c52c7eb155f408 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 13 Mar 2021 15:59:09 +0100 Subject: [PATCH] [SROA] Regenerate test checks (NFC) --- llvm/test/Transforms/SROA/basictest.ll | 1405 ++++++++++++++----------- llvm/test/Transforms/SROA/vector-promotion.ll | 375 +++---- 2 files changed, 1001 insertions(+), 779 deletions(-) diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll index d15d01e..24cc524 100644 --- a/llvm/test/Transforms/SROA/basictest.ll +++ b/llvm/test/Transforms/SROA/basictest.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -sroa -S | FileCheck %s ; RUN: opt < %s -passes=sroa -S | FileCheck %s @@ -8,8 +9,11 @@ declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) define i32 @test0() { ; CHECK-LABEL: @test0( -; CHECK-NOT: alloca -; CHECK: ret i32 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V2_INT:%.*]] = bitcast float 0.000000e+00 to i32 +; CHECK-NEXT: [[SUM1:%.*]] = add i32 0, [[V2_INT]] +; CHECK-NEXT: ret i32 [[SUM1]] +; entry: %a1 = alloca i32 @@ -38,8 +42,9 @@ entry: define i32 @test1() { ; CHECK-LABEL: @test1( -; CHECK-NOT: alloca -; CHECK: ret i32 0 +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 0 +; entry: %X = alloca { i32, float } @@ -51,8 +56,11 @@ entry: define i64 @test2(i64 %X) { ; CHECK-LABEL: @test2( -; CHECK-NOT: alloca -; CHECK: ret i64 %X +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[L2:%.*]] +; CHECK: L2: +; CHECK-NEXT: ret i64 [[X:%.*]] +; entry: %A = alloca [8 x i8] @@ -67,8 +75,11 @@ L2: define i64 @test2_addrspacecast(i64 %X) { ; CHECK-LABEL: @test2_addrspacecast( -; CHECK-NOT: alloca -; CHECK: ret i64 %X +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[L2:%.*]] +; CHECK: L2: +; CHECK-NEXT: ret i64 [[X:%.*]] +; entry: %A = alloca [8 x i8] @@ -83,8 +94,11 @@ L2: define i64 @test2_addrspacecast_gep(i64 %X, i16 %idx) { ; CHECK-LABEL: @test2_addrspacecast_gep( -; CHECK-NOT: alloca -; CHECK: ret i64 %X +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[L2:%.*]] +; CHECK: L2: +; CHECK-NEXT: ret i64 [[X:%.*]] +; entry: %A = alloca [256 x i8] @@ -103,15 +117,18 @@ L2: ; Avoid crashing when load/storing at at different offsets. define i64 @test2_addrspacecast_gep_offset(i64 %X) { ; CHECK-LABEL: @test2_addrspacecast_gep_offset( -; CHECK: %A.sroa.0 = alloca [10 x i8] -; CHECK: [[GEP0:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* %A.sroa.0, i16 0, i16 2 -; CHECK-NEXT: [[GEP1:%.*]] = addrspacecast i8* [[GEP0]] to i64 addrspace(1)* -; CHECK-NEXT: store i64 %X, i64 addrspace(1)* [[GEP1]], align 1 -; CHECK: br - -; CHECK: [[BITCAST:%.*]] = bitcast [10 x i8]* %A.sroa.0 to i64* -; CHECK: %A.sroa.0.0.A.sroa.0.30.Z = load i64, i64* [[BITCAST]], align 1 -; CHECK-NEXT: ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [10 x i8], align 1 +; CHECK-NEXT: [[A_SROA_0_2_GEPB_SROA_IDX:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[A_SROA_0]], i16 0, i16 2 +; CHECK-NEXT: [[A_SROA_0_2_GEPB_SROA_CAST:%.*]] = addrspacecast i8* [[A_SROA_0_2_GEPB_SROA_IDX]] to i64 addrspace(1)* +; CHECK-NEXT: store i64 [[X:%.*]], i64 addrspace(1)* [[A_SROA_0_2_GEPB_SROA_CAST]], align 1 +; CHECK-NEXT: br label [[L2:%.*]] +; CHECK: L2: +; CHECK-NEXT: [[A_SROA_0_0_GEPA_BC_SROA_CAST:%.*]] = bitcast [10 x i8]* [[A_SROA_0]] to i64* +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_30_Z:%.*]] = load i64, i64* [[A_SROA_0_0_GEPA_BC_SROA_CAST]], align 1 +; CHECK-NEXT: ret i64 [[A_SROA_0_0_A_SROA_0_30_Z]] +; + entry: %A = alloca [256 x i8] %B = addrspacecast [256 x i8]* %A to i64 addrspace(1)* @@ -128,45 +145,149 @@ L2: define void @test3(i8* %dst, i8* align 8 %src) { ; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [42 x i8], align 1 +; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca [99 x i8], align 1 +; CHECK-NEXT: [[A_SROA_34:%.*]] = alloca [16 x i8], align 1 +; CHECK-NEXT: [[A_SROA_15:%.*]] = alloca [42 x i8], align 1 +; CHECK-NEXT: [[A_SROA_16:%.*]] = alloca [7 x i8], align 1 +; CHECK-NEXT: [[A_SROA_239:%.*]] = alloca [7 x i8], align 1 +; CHECK-NEXT: [[A_SROA_31:%.*]] = alloca [85 x i8], align 1 +; CHECK-NEXT: [[A_SROA_0_0_B_SROA_IDX:%.*]] = getelementptr inbounds [42 x i8], [42 x i8]* [[A_SROA_0]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_0_0_B_SROA_IDX]], i8* align 8 [[SRC:%.*]], i32 42, i1 false), !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[A_SROA_2_0_SRC_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 42 +; CHECK-NEXT: [[A_SROA_2_0_COPYLOAD:%.*]] = load i8, i8* [[A_SROA_2_0_SRC_SROA_RAW_IDX]], align 2, !tbaa [[TBAA3:![0-9]+]] +; CHECK-NEXT: [[A_SROA_3_0_SRC_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 43 +; CHECK-NEXT: [[A_SROA_3_0_B_SROA_IDX:%.*]] = getelementptr inbounds [99 x i8], [99 x i8]* [[A_SROA_3]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_3_0_B_SROA_IDX]], i8* align 1 [[A_SROA_3_0_SRC_SROA_RAW_IDX]], i32 99, i1 false), !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: [[A_SROA_34_0_SRC_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 142 +; CHECK-NEXT: [[A_SROA_34_0_B_SROA_IDX:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A_SROA_34]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_34_0_B_SROA_IDX]], i8* align 2 [[A_SROA_34_0_SRC_SROA_RAW_IDX]], i32 16, i1 false), !tbaa [[TBAA5:![0-9]+]] +; CHECK-NEXT: [[A_SROA_15_0_SRC_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 158 +; CHECK-NEXT: [[A_SROA_15_0_B_SROA_IDX:%.*]] = getelementptr inbounds [42 x i8], [42 x i8]* [[A_SROA_15]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_15_0_B_SROA_IDX]], i8* align 2 [[A_SROA_15_0_SRC_SROA_RAW_IDX]], i32 42, i1 false), !tbaa [[TBAA6:![0-9]+]] +; CHECK-NEXT: [[A_SROA_16_0_SRC_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 200 +; CHECK-NEXT: [[A_SROA_16_0_B_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_16]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_16_0_B_SROA_IDX]], i8* align 8 [[A_SROA_16_0_SRC_SROA_RAW_IDX]], i32 7, i1 false) +; CHECK-NEXT: [[A_SROA_23_0_SRC_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 207 +; CHECK-NEXT: [[A_SROA_23_0_COPYLOAD:%.*]] = load i8, i8* [[A_SROA_23_0_SRC_SROA_RAW_IDX]], align 1 +; CHECK-NEXT: [[A_SROA_239_0_SRC_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 208 +; CHECK-NEXT: [[A_SROA_239_0_B_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_239]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_239_0_B_SROA_IDX]], i8* align 8 [[A_SROA_239_0_SRC_SROA_RAW_IDX]], i32 7, i1 false) +; CHECK-NEXT: [[A_SROA_31_0_SRC_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 215 +; CHECK-NEXT: [[A_SROA_31_0_B_SROA_IDX:%.*]] = getelementptr inbounds [85 x i8], [85 x i8]* [[A_SROA_31]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_31_0_B_SROA_IDX]], i8* align 1 [[A_SROA_31_0_SRC_SROA_RAW_IDX]], i32 85, i1 false) +; CHECK-NEXT: [[A_SROA_34_0_OVERLAP_1_I8_SROA_IDX:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A_SROA_34]], i64 0, i64 0 +; CHECK-NEXT: store i8 1, i8* [[A_SROA_34_0_OVERLAP_1_I8_SROA_IDX]], align 1, !tbaa [[TBAA7:![0-9]+]] +; CHECK-NEXT: [[A_SROA_34_0_OVERLAP_1_I16_SROA_CAST:%.*]] = bitcast [16 x i8]* [[A_SROA_34]] to i16* +; CHECK-NEXT: store i16 1, i16* [[A_SROA_34_0_OVERLAP_1_I16_SROA_CAST]], align 1, !tbaa [[TBAA9:![0-9]+]] +; CHECK-NEXT: [[A_SROA_34_0_OVERLAP_1_I32_SROA_CAST:%.*]] = bitcast [16 x i8]* [[A_SROA_34]] to i32* +; CHECK-NEXT: store i32 1, i32* [[A_SROA_34_0_OVERLAP_1_I32_SROA_CAST]], align 1, !tbaa [[TBAA11:![0-9]+]] +; CHECK-NEXT: [[A_SROA_34_0_OVERLAP_1_I64_SROA_CAST:%.*]] = bitcast [16 x i8]* [[A_SROA_34]] to i64* +; CHECK-NEXT: store i64 1, i64* [[A_SROA_34_0_OVERLAP_1_I64_SROA_CAST]], align 1, !tbaa [[TBAA13:![0-9]+]] +; CHECK-NEXT: [[A_SROA_34_1_OVERLAP_2_I64_SROA_IDX:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A_SROA_34]], i64 0, i64 1 +; CHECK-NEXT: [[A_SROA_34_1_OVERLAP_2_I64_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_34_1_OVERLAP_2_I64_SROA_IDX]] to i64* +; CHECK-NEXT: store i64 2, i64* [[A_SROA_34_1_OVERLAP_2_I64_SROA_CAST]], align 1, !tbaa [[TBAA15:![0-9]+]] +; CHECK-NEXT: [[A_SROA_34_2_OVERLAP_3_I64_SROA_IDX:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A_SROA_34]], i64 0, i64 2 +; CHECK-NEXT: [[A_SROA_34_2_OVERLAP_3_I64_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_34_2_OVERLAP_3_I64_SROA_IDX]] to i64* +; CHECK-NEXT: store i64 3, i64* [[A_SROA_34_2_OVERLAP_3_I64_SROA_CAST]], align 1, !tbaa [[TBAA17:![0-9]+]] +; CHECK-NEXT: [[A_SROA_34_3_OVERLAP_4_I64_SROA_IDX:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A_SROA_34]], i64 0, i64 3 +; CHECK-NEXT: [[A_SROA_34_3_OVERLAP_4_I64_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_34_3_OVERLAP_4_I64_SROA_IDX]] to i64* +; CHECK-NEXT: store i64 4, i64* [[A_SROA_34_3_OVERLAP_4_I64_SROA_CAST]], align 1, !tbaa [[TBAA19:![0-9]+]] +; CHECK-NEXT: [[A_SROA_34_4_OVERLAP_5_I64_SROA_IDX:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A_SROA_34]], i64 0, i64 4 +; CHECK-NEXT: [[A_SROA_34_4_OVERLAP_5_I64_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_34_4_OVERLAP_5_I64_SROA_IDX]] to i64* +; CHECK-NEXT: store i64 5, i64* [[A_SROA_34_4_OVERLAP_5_I64_SROA_CAST]], align 1, !tbaa [[TBAA21:![0-9]+]] +; CHECK-NEXT: [[A_SROA_34_5_OVERLAP_6_I64_SROA_IDX:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A_SROA_34]], i64 0, i64 5 +; CHECK-NEXT: [[A_SROA_34_5_OVERLAP_6_I64_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_34_5_OVERLAP_6_I64_SROA_IDX]] to i64* +; CHECK-NEXT: store i64 6, i64* [[A_SROA_34_5_OVERLAP_6_I64_SROA_CAST]], align 1, !tbaa [[TBAA23:![0-9]+]] +; CHECK-NEXT: [[A_SROA_34_6_OVERLAP_7_I64_SROA_IDX:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A_SROA_34]], i64 0, i64 6 +; CHECK-NEXT: [[A_SROA_34_6_OVERLAP_7_I64_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_34_6_OVERLAP_7_I64_SROA_IDX]] to i64* +; CHECK-NEXT: store i64 7, i64* [[A_SROA_34_6_OVERLAP_7_I64_SROA_CAST]], align 1, !tbaa [[TBAA25:![0-9]+]] +; CHECK-NEXT: [[A_SROA_34_7_OVERLAP_8_I64_SROA_IDX:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A_SROA_34]], i64 0, i64 7 +; CHECK-NEXT: [[A_SROA_34_7_OVERLAP_8_I64_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_34_7_OVERLAP_8_I64_SROA_IDX]] to i64* +; CHECK-NEXT: store i64 8, i64* [[A_SROA_34_7_OVERLAP_8_I64_SROA_CAST]], align 1, !tbaa [[TBAA27:![0-9]+]] +; CHECK-NEXT: [[A_SROA_34_8_OVERLAP_9_I64_SROA_IDX:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A_SROA_34]], i64 0, i64 8 +; CHECK-NEXT: [[A_SROA_34_8_OVERLAP_9_I64_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_34_8_OVERLAP_9_I64_SROA_IDX]] to i64* +; CHECK-NEXT: store i64 9, i64* [[A_SROA_34_8_OVERLAP_9_I64_SROA_CAST]], align 1, !tbaa [[TBAA29:![0-9]+]] +; CHECK-NEXT: [[A_SROA_16_0_OVERLAP2_1_0_I8_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_16]], i64 0, i64 0 +; CHECK-NEXT: store i8 1, i8* [[A_SROA_16_0_OVERLAP2_1_0_I8_SROA_IDX]], align 1, !tbaa [[TBAA31:![0-9]+]] +; CHECK-NEXT: [[A_SROA_16_0_OVERLAP2_1_0_I16_SROA_CAST:%.*]] = bitcast [7 x i8]* [[A_SROA_16]] to i16* +; CHECK-NEXT: store i16 1, i16* [[A_SROA_16_0_OVERLAP2_1_0_I16_SROA_CAST]], align 1, !tbaa [[TBAA33:![0-9]+]] +; CHECK-NEXT: [[A_SROA_16_0_OVERLAP2_1_0_I32_SROA_CAST:%.*]] = bitcast [7 x i8]* [[A_SROA_16]] to i32* +; CHECK-NEXT: store i32 1, i32* [[A_SROA_16_0_OVERLAP2_1_0_I32_SROA_CAST]], align 1, !tbaa [[TBAA35:![0-9]+]] +; CHECK-NEXT: [[A_SROA_16_1_OVERLAP2_1_1_I32_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_16]], i64 0, i64 1 +; CHECK-NEXT: [[A_SROA_16_1_OVERLAP2_1_1_I32_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_16_1_OVERLAP2_1_1_I32_SROA_IDX]] to i32* +; CHECK-NEXT: store i32 2, i32* [[A_SROA_16_1_OVERLAP2_1_1_I32_SROA_CAST]], align 1, !tbaa [[TBAA37:![0-9]+]] +; CHECK-NEXT: [[A_SROA_16_2_OVERLAP2_1_2_I32_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_16]], i64 0, i64 2 +; CHECK-NEXT: [[A_SROA_16_2_OVERLAP2_1_2_I32_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_16_2_OVERLAP2_1_2_I32_SROA_IDX]] to i32* +; CHECK-NEXT: store i32 3, i32* [[A_SROA_16_2_OVERLAP2_1_2_I32_SROA_CAST]], align 1, !tbaa [[TBAA39:![0-9]+]] +; CHECK-NEXT: [[A_SROA_16_3_OVERLAP2_1_3_I32_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_16]], i64 0, i64 3 +; CHECK-NEXT: [[A_SROA_16_3_OVERLAP2_1_3_I32_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_16_3_OVERLAP2_1_3_I32_SROA_IDX]] to i32* +; CHECK-NEXT: store i32 4, i32* [[A_SROA_16_3_OVERLAP2_1_3_I32_SROA_CAST]], align 1, !tbaa [[TBAA41:![0-9]+]] +; CHECK-NEXT: [[A_SROA_239_0_OVERLAP2_2_0_I32_SROA_CAST:%.*]] = bitcast [7 x i8]* [[A_SROA_239]] to i32* +; CHECK-NEXT: store i32 1, i32* [[A_SROA_239_0_OVERLAP2_2_0_I32_SROA_CAST]], align 1, !tbaa [[TBAA43:![0-9]+]] +; CHECK-NEXT: [[A_SROA_239_1_OVERLAP2_2_1_I8_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_239]], i64 0, i64 1 +; CHECK-NEXT: store i8 1, i8* [[A_SROA_239_1_OVERLAP2_2_1_I8_SROA_IDX]], align 1, !tbaa [[TBAA45:![0-9]+]] +; CHECK-NEXT: [[A_SROA_239_1_OVERLAP2_2_1_I16_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_239]], i64 0, i64 1 +; CHECK-NEXT: [[A_SROA_239_1_OVERLAP2_2_1_I16_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_239_1_OVERLAP2_2_1_I16_SROA_IDX]] to i16* +; CHECK-NEXT: store i16 1, i16* [[A_SROA_239_1_OVERLAP2_2_1_I16_SROA_CAST]], align 1, !tbaa [[TBAA47:![0-9]+]] +; CHECK-NEXT: [[A_SROA_239_1_OVERLAP2_2_1_I32_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_239]], i64 0, i64 1 +; CHECK-NEXT: [[A_SROA_239_1_OVERLAP2_2_1_I32_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_239_1_OVERLAP2_2_1_I32_SROA_IDX]] to i32* +; CHECK-NEXT: store i32 1, i32* [[A_SROA_239_1_OVERLAP2_2_1_I32_SROA_CAST]], align 1, !tbaa [[TBAA49:![0-9]+]] +; CHECK-NEXT: [[A_SROA_239_2_OVERLAP2_2_2_I32_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_239]], i64 0, i64 2 +; CHECK-NEXT: [[A_SROA_239_2_OVERLAP2_2_2_I32_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_239_2_OVERLAP2_2_2_I32_SROA_IDX]] to i32* +; CHECK-NEXT: store i32 3, i32* [[A_SROA_239_2_OVERLAP2_2_2_I32_SROA_CAST]], align 1, !tbaa [[TBAA51:![0-9]+]] +; CHECK-NEXT: [[A_SROA_239_3_OVERLAP2_2_3_I32_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_239]], i64 0, i64 3 +; CHECK-NEXT: [[A_SROA_239_3_OVERLAP2_2_3_I32_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_239_3_OVERLAP2_2_3_I32_SROA_IDX]] to i32* +; CHECK-NEXT: store i32 4, i32* [[A_SROA_239_3_OVERLAP2_2_3_I32_SROA_CAST]], align 1, !tbaa [[TBAA53:![0-9]+]] +; CHECK-NEXT: [[A_SROA_15_197_OVERLAP2_PREFIX_SROA_IDX:%.*]] = getelementptr inbounds [42 x i8], [42 x i8]* [[A_SROA_15]], i64 0, i64 39 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_15_197_OVERLAP2_PREFIX_SROA_IDX]], i8* align 1 [[SRC]], i32 3, i1 false), !tbaa [[TBAA55:![0-9]+]] +; CHECK-NEXT: [[A_SROA_16_197_SRC_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 3 +; CHECK-NEXT: [[A_SROA_16_197_OVERLAP2_PREFIX_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_16]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_16_197_OVERLAP2_PREFIX_SROA_IDX]], i8* align 1 [[A_SROA_16_197_SRC_SROA_RAW_IDX]], i32 5, i1 false) +; CHECK-NEXT: [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_16]], i64 0, i64 2 +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX]], i8 42, i32 5, i1 false), !tbaa [[TBAA57:![0-9]+]] +; CHECK-NEXT: [[A_SROA_239_0_OVERLAP2_1_2_I8_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_239]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 [[A_SROA_239_0_OVERLAP2_1_2_I8_SROA_IDX]], i8 42, i32 2, i1 false) +; CHECK-NEXT: [[A_SROA_239_209_OVERLAP2_2_1_I8_SROA_IDX11:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_239]], i64 0, i64 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_239_209_OVERLAP2_2_1_I8_SROA_IDX11]], i8* align 1 [[SRC]], i32 5, i1 false), !tbaa [[TBAA59:![0-9]+]] +; CHECK-NEXT: [[A_SROA_239_210_OVERLAP2_2_2_I8_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_239]], i64 0, i64 2 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_239_210_OVERLAP2_2_2_I8_SROA_IDX]], i8* align 1 [[SRC]], i32 5, i1 false), !tbaa [[TBAA61:![0-9]+]] +; CHECK-NEXT: [[A_SROA_31_210_SRC_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 5 +; CHECK-NEXT: [[A_SROA_31_210_OVERLAP2_2_2_I8_SROA_IDX:%.*]] = getelementptr inbounds [85 x i8], [85 x i8]* [[A_SROA_31]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_31_210_OVERLAP2_2_2_I8_SROA_IDX]], i8* align 1 [[A_SROA_31_210_SRC_SROA_RAW_IDX]], i32 3, i1 false) +; CHECK-NEXT: [[A_SROA_0_0_B_SROA_IDX1:%.*]] = getelementptr inbounds [42 x i8], [42 x i8]* [[A_SROA_0]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[DST:%.*]], i8* align 1 [[A_SROA_0_0_B_SROA_IDX1]], i32 42, i1 false), !tbaa [[TBAA63:![0-9]+]] +; CHECK-NEXT: [[A_SROA_2_0_DST_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 42 +; CHECK-NEXT: store i8 0, i8* [[A_SROA_2_0_DST_SROA_RAW_IDX]], align 1 +; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 43 +; CHECK-NEXT: [[A_SROA_3_0_B_SROA_IDX3:%.*]] = getelementptr inbounds [99 x i8], [99 x i8]* [[A_SROA_3]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_3_0_DST_SROA_RAW_IDX]], i8* align 1 [[A_SROA_3_0_B_SROA_IDX3]], i32 99, i1 false) +; CHECK-NEXT: [[A_SROA_34_0_DST_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 142 +; CHECK-NEXT: [[A_SROA_34_0_B_SROA_IDX5:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A_SROA_34]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_34_0_DST_SROA_RAW_IDX]], i8* align 1 [[A_SROA_34_0_B_SROA_IDX5]], i32 16, i1 false) +; CHECK-NEXT: [[A_SROA_15_0_DST_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 158 +; CHECK-NEXT: [[A_SROA_15_0_B_SROA_IDX6:%.*]] = getelementptr inbounds [42 x i8], [42 x i8]* [[A_SROA_15]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_15_0_DST_SROA_RAW_IDX]], i8* align 1 [[A_SROA_15_0_B_SROA_IDX6]], i32 42, i1 false) +; CHECK-NEXT: [[A_SROA_16_0_DST_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 200 +; CHECK-NEXT: [[A_SROA_16_0_B_SROA_IDX7:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_16]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_16_0_DST_SROA_RAW_IDX]], i8* align 1 [[A_SROA_16_0_B_SROA_IDX7]], i32 7, i1 false) +; CHECK-NEXT: [[A_SROA_23_0_DST_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 207 +; CHECK-NEXT: store i8 42, i8* [[A_SROA_23_0_DST_SROA_RAW_IDX]], align 1 +; CHECK-NEXT: [[A_SROA_239_0_DST_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 208 +; CHECK-NEXT: [[A_SROA_239_0_B_SROA_IDX10:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_239]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_239_0_DST_SROA_RAW_IDX]], i8* align 1 [[A_SROA_239_0_B_SROA_IDX10]], i32 7, i1 false) +; CHECK-NEXT: [[A_SROA_31_0_DST_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 215 +; CHECK-NEXT: [[A_SROA_31_0_B_SROA_IDX12:%.*]] = getelementptr inbounds [85 x i8], [85 x i8]* [[A_SROA_31]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_31_0_DST_SROA_RAW_IDX]], i8* align 1 [[A_SROA_31_0_B_SROA_IDX12]], i32 85, i1 false) +; CHECK-NEXT: ret void +; entry: %a = alloca [300 x i8] -; CHECK-NOT: alloca -; CHECK: %[[test3_a1:.*]] = alloca [42 x i8] -; CHECK-NEXT: %[[test3_a2:.*]] = alloca [99 x i8] -; CHECK-NEXT: %[[test3_a3:.*]] = alloca [16 x i8] -; CHECK-NEXT: %[[test3_a4:.*]] = alloca [42 x i8] -; CHECK-NEXT: %[[test3_a5:.*]] = alloca [7 x i8] -; CHECK-NEXT: %[[test3_a6:.*]] = alloca [7 x i8] -; CHECK-NEXT: %[[test3_a7:.*]] = alloca [85 x i8] %b = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 0 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* align 8 %src, i32 300, i1 false), !tbaa !0 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a1]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %src, i32 42, {{.*}}), !tbaa [[TAG_0:!.*]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 42 -; CHECK-NEXT: %[[test3_r1:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0_M42:!.*]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 43 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [99 x i8], [99 x i8]* %[[test3_a2]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 99, {{.*}}), !tbaa [[TAG_0_M43:!.*]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 142 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 2 %[[gep_src]], i32 16, {{.*}}), !tbaa [[TAG_0_M142:!.*]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 158 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 2 %[[gep_src]], i32 42, {{.*}}), !tbaa [[TAG_0_M158:!.*]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 200 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %[[gep_src]], i32 7, {{.*}}) -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 207 -; CHECK-NOT: %[[bad_test3_r2:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa -; CHECK-NEXT: %[[test3_r2:.*]] = load i8, i8* %[[gep]], {{.*}} -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 208 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 8 %[[gep_src]], i32 7, {{.*}}) -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 215 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 85, {{.*}}) ; Clobber a single element of the array, this should be promotable, and be deleted. %c = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 42 @@ -195,49 +316,17 @@ entry: %overlap.8.i64 = bitcast i8* %overlap.8.i8 to i64* %overlap.9.i64 = bitcast i8* %overlap.9.i8 to i64* store i8 1, i8* %overlap.1.i8, !tbaa !3 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0 -; CHECK-NEXT: store i8 1, i8* %[[gep]], align 1, !tbaa [[TAG_3:!.*]] store i16 1, i16* %overlap.1.i16, !tbaa !5 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i16* -; CHECK-NEXT: store i16 1, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_5:!.*]] store i32 1, i32* %overlap.1.i32, !tbaa !7 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i32* -; CHECK-NEXT: store i32 1, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_7:!.*]] store i64 1, i64* %overlap.1.i64, !tbaa !9 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i64* -; CHECK-NEXT: store i64 1, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_9:!.*]] store i64 2, i64* %overlap.2.i64, !tbaa !11 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 1 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 2, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_11:!.*]] store i64 3, i64* %overlap.3.i64, !tbaa !13 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 2 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 3, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_13:!.*]] store i64 4, i64* %overlap.4.i64, !tbaa !15 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 3 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 4, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_15:!.*]] store i64 5, i64* %overlap.5.i64, !tbaa !17 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 4 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 5, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_17:!.*]] store i64 6, i64* %overlap.6.i64, !tbaa !19 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 5 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 6, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_19:!.*]] store i64 7, i64* %overlap.7.i64, !tbaa !21 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 6 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 7, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_21:!.*]] store i64 8, i64* %overlap.8.i64, !tbaa !23 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 7 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 8, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_23:!.*]] store i64 9, i64* %overlap.9.i64, !tbaa !25 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 8 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64* -; CHECK-NEXT: store i64 9, i64* %[[bitcast]], {{.*}}, !tbaa [[TAG_25:!.*]] ; Make two sequences of overlapping stores with more gaps and irregularities. %overlap2.1.0.i8 = getelementptr [300 x i8], [300 x i8]* %a, i64 0, i64 200 @@ -256,26 +345,11 @@ entry: %overlap2.1.2.i32 = bitcast i8* %overlap2.1.2.i8 to i32* %overlap2.1.3.i32 = bitcast i8* %overlap2.1.3.i8 to i32* store i8 1, i8* %overlap2.1.0.i8, !tbaa !27 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0 -; CHECK-NEXT: store i8 1, i8* %[[gep]], align 1, !tbaa [[TAG_27:!.*]] store i16 1, i16* %overlap2.1.0.i16, !tbaa !29 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i16* -; CHECK-NEXT: store i16 1, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_29:!.*]] store i32 1, i32* %overlap2.1.0.i32, !tbaa !31 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i32* -; CHECK-NEXT: store i32 1, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_31:!.*]] store i32 2, i32* %overlap2.1.1.i32, !tbaa !33 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 1 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 2, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_33:!.*]] store i32 3, i32* %overlap2.1.2.i32, !tbaa !35 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 2 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 3, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_35:!.*]] store i32 4, i32* %overlap2.1.3.i32, !tbaa !37 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 3 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 4, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_37:!.*]] %overlap2.2.0.i32 = bitcast i8* %overlap2.2.0.i8 to i32* %overlap2.2.1.i16 = bitcast i8* %overlap2.2.1.i8 to i16* @@ -283,140 +357,121 @@ entry: %overlap2.2.2.i32 = bitcast i8* %overlap2.2.2.i8 to i32* %overlap2.2.3.i32 = bitcast i8* %overlap2.2.3.i8 to i32* store i32 1, i32* %overlap2.2.0.i32, !tbaa !39 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a6]] to i32* -; CHECK-NEXT: store i32 1, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_39:!.*]] store i8 1, i8* %overlap2.2.1.i8, !tbaa !41 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1 -; CHECK-NEXT: store i8 1, i8* %[[gep]], align 1, !tbaa [[TAG_41:!.*]] store i16 1, i16* %overlap2.2.1.i16, !tbaa !43 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16* -; CHECK-NEXT: store i16 1, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_43:!.*]] store i32 1, i32* %overlap2.2.1.i32, !tbaa !45 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 1, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_45:!.*]] store i32 3, i32* %overlap2.2.2.i32, !tbaa !47 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 2 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 3, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_47:!.*]] store i32 4, i32* %overlap2.2.3.i32, !tbaa !49 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 3 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32* -; CHECK-NEXT: store i32 4, i32* %[[bitcast]], {{.*}}, !tbaa [[TAG_49:!.*]] %overlap2.prefix = getelementptr i8, i8* %overlap2.1.1.i8, i64 -4 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.prefix, i8* %src, i32 8, i1 false), !tbaa !51 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 39 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %src, i32 3, {{.*}}), !tbaa [[TAG_51:!.*]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 3 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 5, {{.*}}) ; Bridge between the overlapping areas call void @llvm.memset.p0i8.i32(i8* %overlap2.1.2.i8, i8 42, i32 8, i1 false), !tbaa !53 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 2 -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[gep]], i8 42, i32 5, {{.*}}), !tbaa [[TAG_53:!.*]] ; ...promoted i8 store... -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[gep]], i8 42, i32 2, {{.*}}) ; Entirely within the second overlap. call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.1.i8, i8* %src, i32 5, i1 false), !tbaa !55 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 1 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 5, {{.*}}), !tbaa [[TAG_55:!.*]] ; Trailing past the second overlap. call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.2.i8, i8* %src, i32 8, i1 false), !tbaa !57 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 2 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 5, {{.*}}), !tbaa [[TAG_57:!.*]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 5 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 3, {{.*}}) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 300, i1 false), !tbaa !59 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a1]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %[[gep]], i32 42, {{.*}}), !tbaa [[TAG_59:!.*]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 42 -; CHECK-NEXT: store i8 0, i8* %[[gep]], {{.*}} -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 43 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [99 x i8], [99 x i8]* %[[test3_a2]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 99, {{.*}}) -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 142 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[test3_a3]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 16, {{.*}}) -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 158 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a4]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 42, {{.*}}) -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 200 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}) -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 207 -; CHECK-NEXT: store i8 42, i8* %[[gep]], {{.*}} -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 208 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}) -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 215 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [85 x i8], [85 x i8]* %[[test3_a7]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 85, {{.*}}) ret void } define void @test4(i8* %dst, i8* %src) { ; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [20 x i8], align 1 +; CHECK-NEXT: [[A_SROA_2_SROA_4:%.*]] = alloca [7 x i8], align 1 +; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca [10 x i8], align 1 +; CHECK-NEXT: [[A_SROA_34_SROA_5:%.*]] = alloca [7 x i8], align 1 +; CHECK-NEXT: [[A_SROA_6_SROA_4:%.*]] = alloca [7 x i8], align 1 +; CHECK-NEXT: [[A_SROA_7:%.*]] = alloca [40 x i8], align 1 +; CHECK-NEXT: [[A_SROA_0_0_B_SROA_IDX:%.*]] = getelementptr inbounds [20 x i8], [20 x i8]* [[A_SROA_0]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_0_0_B_SROA_IDX]], i8* align 1 [[SRC:%.*]], i32 20, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_2_SROA_0_0_A_SROA_2_0_SRC_SROA_RAW_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 20 +; CHECK-NEXT: [[A_SROA_2_SROA_0_0_A_SROA_2_0_SRC_SROA_RAW_IDX_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_2_SROA_0_0_A_SROA_2_0_SRC_SROA_RAW_IDX_SROA_IDX]] to i16* +; CHECK-NEXT: [[A_SROA_2_SROA_0_0_COPYLOAD:%.*]] = load i16, i16* [[A_SROA_2_SROA_0_0_A_SROA_2_0_SRC_SROA_RAW_IDX_SROA_CAST]], align 1, !tbaa [[TBAA65:![0-9]+]] +; CHECK-NEXT: [[A_SROA_2_SROA_3_0_A_SROA_2_0_SRC_SROA_RAW_IDX_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 22 +; CHECK-NEXT: [[A_SROA_2_SROA_3_0_COPYLOAD:%.*]] = load i8, i8* [[A_SROA_2_SROA_3_0_A_SROA_2_0_SRC_SROA_RAW_IDX_SROA_RAW_IDX]], align 1, !tbaa [[TBAA66:![0-9]+]] +; CHECK-NEXT: [[A_SROA_2_SROA_4_0_A_SROA_2_0_SRC_SROA_RAW_IDX_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 23 +; CHECK-NEXT: [[A_SROA_2_SROA_4_0_B_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_2_SROA_4]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_2_SROA_4_0_B_SROA_IDX]], i8* align 1 [[A_SROA_2_SROA_4_0_A_SROA_2_0_SRC_SROA_RAW_IDX_SROA_RAW_IDX]], i32 7, i1 false), !tbaa [[TBAA67:![0-9]+]] +; CHECK-NEXT: [[A_SROA_3_0_SRC_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 30 +; CHECK-NEXT: [[A_SROA_3_0_B_SROA_IDX:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[A_SROA_3]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_3_0_B_SROA_IDX]], i8* align 1 [[A_SROA_3_0_SRC_SROA_RAW_IDX]], i32 10, i1 false), !tbaa [[TBAA68:![0-9]+]] +; CHECK-NEXT: [[A_SROA_34_SROA_0_0_A_SROA_34_0_SRC_SROA_RAW_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 40 +; CHECK-NEXT: [[A_SROA_34_SROA_0_0_A_SROA_34_0_SRC_SROA_RAW_IDX_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_34_SROA_0_0_A_SROA_34_0_SRC_SROA_RAW_IDX_SROA_IDX]] to i16* +; CHECK-NEXT: [[A_SROA_34_SROA_0_0_COPYLOAD:%.*]] = load i16, i16* [[A_SROA_34_SROA_0_0_A_SROA_34_0_SRC_SROA_RAW_IDX_SROA_CAST]], align 1, !tbaa [[TBAA69:![0-9]+]] +; CHECK-NEXT: [[A_SROA_34_SROA_4_0_A_SROA_34_0_SRC_SROA_RAW_IDX_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 42 +; CHECK-NEXT: [[A_SROA_34_SROA_4_0_COPYLOAD:%.*]] = load i8, i8* [[A_SROA_34_SROA_4_0_A_SROA_34_0_SRC_SROA_RAW_IDX_SROA_RAW_IDX]], align 1, !tbaa [[TBAA3]] +; CHECK-NEXT: [[A_SROA_34_SROA_5_0_A_SROA_34_0_SRC_SROA_RAW_IDX_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 43 +; CHECK-NEXT: [[A_SROA_34_SROA_5_0_B_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_34_SROA_5]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_34_SROA_5_0_B_SROA_IDX]], i8* align 1 [[A_SROA_34_SROA_5_0_A_SROA_34_0_SRC_SROA_RAW_IDX_SROA_RAW_IDX]], i32 7, i1 false), !tbaa [[TBAA4]] +; CHECK-NEXT: [[A_SROA_6_SROA_0_0_A_SROA_6_0_SRC_SROA_RAW_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 50 +; CHECK-NEXT: [[A_SROA_6_SROA_0_0_A_SROA_6_0_SRC_SROA_RAW_IDX_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_6_SROA_0_0_A_SROA_6_0_SRC_SROA_RAW_IDX_SROA_IDX]] to i16* +; CHECK-NEXT: [[A_SROA_6_SROA_0_0_COPYLOAD:%.*]] = load i16, i16* [[A_SROA_6_SROA_0_0_A_SROA_6_0_SRC_SROA_RAW_IDX_SROA_CAST]], align 1, !tbaa [[TBAA70:![0-9]+]] +; CHECK-NEXT: [[A_SROA_6_SROA_3_0_A_SROA_6_0_SRC_SROA_RAW_IDX_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 52 +; CHECK-NEXT: [[A_SROA_6_SROA_3_0_COPYLOAD:%.*]] = load i8, i8* [[A_SROA_6_SROA_3_0_A_SROA_6_0_SRC_SROA_RAW_IDX_SROA_RAW_IDX]], align 1, !tbaa [[TBAA71:![0-9]+]] +; CHECK-NEXT: [[A_SROA_6_SROA_4_0_A_SROA_6_0_SRC_SROA_RAW_IDX_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 53 +; CHECK-NEXT: [[A_SROA_6_SROA_4_0_B_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_6_SROA_4]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_6_SROA_4_0_B_SROA_IDX]], i8* align 1 [[A_SROA_6_SROA_4_0_A_SROA_6_0_SRC_SROA_RAW_IDX_SROA_RAW_IDX]], i32 7, i1 false), !tbaa [[TBAA72:![0-9]+]] +; CHECK-NEXT: [[A_SROA_7_0_SRC_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 60 +; CHECK-NEXT: [[A_SROA_7_0_B_SROA_IDX:%.*]] = getelementptr inbounds [40 x i8], [40 x i8]* [[A_SROA_7]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_7_0_B_SROA_IDX]], i8* align 1 [[A_SROA_7_0_SRC_SROA_RAW_IDX]], i32 40, i1 false), !tbaa [[TBAA73:![0-9]+]] +; CHECK-NEXT: [[A_SROA_2_SROA_4_3_A_SROA_34_SROA_5_0_A_DST_1_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_34_SROA_5]], i64 0, i64 0 +; CHECK-NEXT: [[A_SROA_2_SROA_4_3_A_SRC_1_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_2_SROA_4]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_2_SROA_4_3_A_SROA_34_SROA_5_0_A_DST_1_SROA_IDX_SROA_IDX]], i8* align 1 [[A_SROA_2_SROA_4_3_A_SRC_1_SROA_IDX]], i32 7, i1 false) +; CHECK-NEXT: [[A_SROA_6_SROA_4_3_A_SROA_34_SROA_5_0_A_DST_1_SROA_IDX16_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_34_SROA_5]], i64 0, i64 0 +; CHECK-NEXT: [[A_SROA_6_SROA_4_3_A_SRC_2_SROA_IDX:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_6_SROA_4]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_6_SROA_4_3_A_SROA_34_SROA_5_0_A_DST_1_SROA_IDX16_SROA_IDX]], i8* align 1 [[A_SROA_6_SROA_4_3_A_SRC_2_SROA_IDX]], i32 7, i1 false) +; CHECK-NEXT: [[A_SROA_0_0_B_SROA_IDX1:%.*]] = getelementptr inbounds [20 x i8], [20 x i8]* [[A_SROA_0]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[DST:%.*]], i8* align 1 [[A_SROA_0_0_B_SROA_IDX1]], i32 20, i1 false), !tbaa [[TBAA11]] +; CHECK-NEXT: [[A_SROA_2_SROA_0_0_A_SROA_2_0_DST_SROA_RAW_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 20 +; CHECK-NEXT: [[A_SROA_2_SROA_0_0_A_SROA_2_0_DST_SROA_RAW_IDX_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_2_SROA_0_0_A_SROA_2_0_DST_SROA_RAW_IDX_SROA_IDX]] to i16* +; CHECK-NEXT: store i16 [[A_SROA_2_SROA_0_0_COPYLOAD]], i16* [[A_SROA_2_SROA_0_0_A_SROA_2_0_DST_SROA_RAW_IDX_SROA_CAST]], align 1 +; CHECK-NEXT: [[A_SROA_2_SROA_3_0_A_SROA_2_0_DST_SROA_RAW_IDX_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 22 +; CHECK-NEXT: store i8 [[A_SROA_2_SROA_3_0_COPYLOAD]], i8* [[A_SROA_2_SROA_3_0_A_SROA_2_0_DST_SROA_RAW_IDX_SROA_RAW_IDX]], align 1 +; CHECK-NEXT: [[A_SROA_2_SROA_4_0_A_SROA_2_0_DST_SROA_RAW_IDX_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 23 +; CHECK-NEXT: [[A_SROA_2_SROA_4_0_B_SROA_IDX22:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_2_SROA_4]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_2_SROA_4_0_A_SROA_2_0_DST_SROA_RAW_IDX_SROA_RAW_IDX]], i8* align 1 [[A_SROA_2_SROA_4_0_B_SROA_IDX22]], i32 7, i1 false) +; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 30 +; CHECK-NEXT: [[A_SROA_3_0_B_SROA_IDX3:%.*]] = getelementptr inbounds [10 x i8], [10 x i8]* [[A_SROA_3]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_3_0_DST_SROA_RAW_IDX]], i8* align 1 [[A_SROA_3_0_B_SROA_IDX3]], i32 10, i1 false) +; CHECK-NEXT: [[A_SROA_34_SROA_0_0_A_SROA_34_0_DST_SROA_RAW_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 40 +; CHECK-NEXT: [[A_SROA_34_SROA_0_0_A_SROA_34_0_DST_SROA_RAW_IDX_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_34_SROA_0_0_A_SROA_34_0_DST_SROA_RAW_IDX_SROA_IDX]] to i16* +; CHECK-NEXT: store i16 [[A_SROA_6_SROA_0_0_COPYLOAD]], i16* [[A_SROA_34_SROA_0_0_A_SROA_34_0_DST_SROA_RAW_IDX_SROA_CAST]], align 1 +; CHECK-NEXT: [[A_SROA_34_SROA_4_0_A_SROA_34_0_DST_SROA_RAW_IDX_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 42 +; CHECK-NEXT: store i8 [[A_SROA_6_SROA_3_0_COPYLOAD]], i8* [[A_SROA_34_SROA_4_0_A_SROA_34_0_DST_SROA_RAW_IDX_SROA_RAW_IDX]], align 1 +; CHECK-NEXT: [[A_SROA_34_SROA_5_0_A_SROA_34_0_DST_SROA_RAW_IDX_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 43 +; CHECK-NEXT: [[A_SROA_34_SROA_5_0_B_SROA_IDX15:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_34_SROA_5]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_34_SROA_5_0_A_SROA_34_0_DST_SROA_RAW_IDX_SROA_RAW_IDX]], i8* align 1 [[A_SROA_34_SROA_5_0_B_SROA_IDX15]], i32 7, i1 false) +; CHECK-NEXT: [[A_SROA_6_SROA_0_0_A_SROA_6_0_DST_SROA_RAW_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 50 +; CHECK-NEXT: [[A_SROA_6_SROA_0_0_A_SROA_6_0_DST_SROA_RAW_IDX_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_6_SROA_0_0_A_SROA_6_0_DST_SROA_RAW_IDX_SROA_IDX]] to i16* +; CHECK-NEXT: store i16 [[A_SROA_6_SROA_0_0_COPYLOAD]], i16* [[A_SROA_6_SROA_0_0_A_SROA_6_0_DST_SROA_RAW_IDX_SROA_CAST]], align 1 +; CHECK-NEXT: [[A_SROA_6_SROA_3_0_A_SROA_6_0_DST_SROA_RAW_IDX_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 52 +; CHECK-NEXT: store i8 [[A_SROA_6_SROA_3_0_COPYLOAD]], i8* [[A_SROA_6_SROA_3_0_A_SROA_6_0_DST_SROA_RAW_IDX_SROA_RAW_IDX]], align 1 +; CHECK-NEXT: [[A_SROA_6_SROA_4_0_A_SROA_6_0_DST_SROA_RAW_IDX_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 53 +; CHECK-NEXT: [[A_SROA_6_SROA_4_0_B_SROA_IDX19:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* [[A_SROA_6_SROA_4]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_6_SROA_4_0_A_SROA_6_0_DST_SROA_RAW_IDX_SROA_RAW_IDX]], i8* align 1 [[A_SROA_6_SROA_4_0_B_SROA_IDX19]], i32 7, i1 false) +; CHECK-NEXT: [[A_SROA_7_0_DST_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 60 +; CHECK-NEXT: [[A_SROA_7_0_B_SROA_IDX8:%.*]] = getelementptr inbounds [40 x i8], [40 x i8]* [[A_SROA_7]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_7_0_DST_SROA_RAW_IDX]], i8* align 1 [[A_SROA_7_0_B_SROA_IDX8]], i32 40, i1 false) +; CHECK-NEXT: ret void +; entry: %a = alloca [100 x i8] -; CHECK-NOT: alloca -; CHECK: %[[test4_a1:.*]] = alloca [20 x i8] -; CHECK-NEXT: %[[test4_a2:.*]] = alloca [7 x i8] -; CHECK-NEXT: %[[test4_a3:.*]] = alloca [10 x i8] -; CHECK-NEXT: %[[test4_a4:.*]] = alloca [7 x i8] -; CHECK-NEXT: %[[test4_a5:.*]] = alloca [7 x i8] -; CHECK-NEXT: %[[test4_a6:.*]] = alloca [40 x i8] %b = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 0 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 100, i1 false), !tbaa !0 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8], [20 x i8]* %[[test4_a1]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep]], i8* align 1 %src, i32 20, {{.*}}), !tbaa [[TAG_0]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 20 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16* -; CHECK-NEXT: %[[test4_r1:.*]] = load i16, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_0_M20:!.*]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 22 -; CHECK-NEXT: %[[test4_r2:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0_M22:!.*]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 23 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0_M23:!.*]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 30 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [10 x i8], [10 x i8]* %[[test4_a3]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 10, {{.*}}), !tbaa [[TAG_0_M30:!.*]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 40 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16* -; CHECK-NEXT: %[[test4_r3:.*]] = load i16, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_0_M40:!.*]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 42 -; CHECK-NEXT: %[[test4_r4:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0_M42]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 43 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0_M43]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 50 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16* -; CHECK-NEXT: %[[test4_r5:.*]] = load i16, i16* %[[bitcast]], {{.*}}, !tbaa [[TAG_0_M50:!.*]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 52 -; CHECK-NEXT: %[[test4_r6:.*]] = load i8, i8* %[[gep]], {{.*}}, !tbaa [[TAG_0_M52:!.*]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 53 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}), !tbaa [[TAG_0_M53:!.+]] -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 60 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [40 x i8], [40 x i8]* %[[test4_a6]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 40, {{.*}}), !tbaa [[TAG_0_M60:!.+]] %a.src.1 = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 20 %a.dst.1 = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 40 call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.1, i32 10, i1 false), !tbaa !3 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}) ; Clobber a single element of the array, this should be promotable, and be deleted. %c = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 42 @@ -424,43 +479,8 @@ entry: %a.src.2 = getelementptr [100 x i8], [100 x i8]* %a, i64 0, i64 50 call void @llvm.memmove.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.2, i32 10, i1 false), !tbaa !5 -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}) call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 100, i1 false), !tbaa !7 -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8], [20 x i8]* %[[test4_a1]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %dst, i8* align 1 %[[gep]], i32 20, {{.*}}), !tbaa [[TAG_7]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 20 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16* -; CHECK-NEXT: store i16 %[[test4_r1]], i16* %[[bitcast]], {{.*}} -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 22 -; CHECK-NEXT: store i8 %[[test4_r2]], i8* %[[gep]], {{.*}} -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 23 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}) -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 30 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [10 x i8], [10 x i8]* %[[test4_a3]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 10, {{.*}}) -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 40 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16* -; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]], {{.*}} -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 42 -; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]], {{.*}} -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 43 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}) -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 50 -; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16* -; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]], {{.*}} -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %dst, i64 52 -; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]], {{.*}} -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 53 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 7, {{.*}}) -; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8, i8* %dst, i64 60 -; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [40 x i8], [40 x i8]* %[[test4_a6]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[gep_dst]], i8* align 1 %[[gep_src]], i32 40, {{.*}}) ret void } @@ -472,11 +492,12 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind define i16 @test5() { ; CHECK-LABEL: @test5( -; CHECK-NOT: alloca float -; CHECK: %[[cast:.*]] = bitcast float 0.0{{.*}} to i32 -; CHECK-NEXT: %[[shr:.*]] = lshr i32 %[[cast]], 16 -; CHECK-NEXT: %[[trunc:.*]] = trunc i32 %[[shr]] to i16 -; CHECK-NEXT: ret i16 %[[trunc]] +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float 0.000000e+00 to i32 +; CHECK-NEXT: [[A_SROA_0_2_EXTRACT_SHIFT:%.*]] = lshr i32 [[TMP0]], 16 +; CHECK-NEXT: [[A_SROA_0_2_EXTRACT_TRUNC:%.*]] = trunc i32 [[A_SROA_0_2_EXTRACT_SHIFT]] to i16 +; CHECK-NEXT: ret i16 [[A_SROA_0_2_EXTRACT_TRUNC]] +; entry: %a = alloca [4 x i8] @@ -490,11 +511,12 @@ entry: define i16 @test5_multi_addrspace_access() { ; CHECK-LABEL: @test5_multi_addrspace_access( -; CHECK-NOT: alloca float -; CHECK: %[[cast:.*]] = bitcast float 0.0{{.*}} to i32 -; CHECK-NEXT: %[[shr:.*]] = lshr i32 %[[cast]], 16 -; CHECK-NEXT: %[[trunc:.*]] = trunc i32 %[[shr]] to i16 -; CHECK-NEXT: ret i16 %[[trunc]] +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float 0.000000e+00 to i32 +; CHECK-NEXT: [[A_SROA_0_2_EXTRACT_SHIFT:%.*]] = lshr i32 [[TMP0]], 16 +; CHECK-NEXT: [[A_SROA_0_2_EXTRACT_TRUNC:%.*]] = trunc i32 [[A_SROA_0_2_EXTRACT_SHIFT]] to i16 +; CHECK-NEXT: ret i16 [[A_SROA_0_2_EXTRACT_TRUNC]] +; entry: %a = alloca [4 x i8] @@ -509,10 +531,12 @@ entry: define i32 @test6() { ; CHECK-LABEL: @test6( -; CHECK: alloca i32 -; CHECK-NEXT: store volatile i32 -; CHECK-NEXT: load i32, i32* -; CHECK-NEXT: ret i32 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store volatile i32 707406378, i32* [[A_SROA_0]], align 4 +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_VAL:%.*]] = load i32, i32* [[A_SROA_0]], align 4 +; CHECK-NEXT: ret i32 [[A_SROA_0_0_A_SROA_0_0_VAL]] +; entry: %a = alloca [4 x i8] @@ -525,14 +549,16 @@ entry: define void @test7(i8* %src, i8* %dst) { ; CHECK-LABEL: @test7( -; CHECK: alloca i32 -; CHECK-NEXT: bitcast i8* %src to i32* -; CHECK-NEXT: load volatile i32, {{.*}}, !tbaa [[TAG_0]] -; CHECK-NEXT: store volatile i32 {{.*}}, !tbaa [[TAG_0]] -; CHECK-NEXT: bitcast i8* %dst to i32* -; CHECK-NEXT: load volatile i32, {{.*}}, !tbaa [[TAG_3]] -; CHECK-NEXT: store volatile i32 {{.*}}, !tbaa [[TAG_3]] -; CHECK-NEXT: ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A_SROA_0_0_SRC_SROA_CAST:%.*]] = bitcast i8* [[SRC:%.*]] to i32* +; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load volatile i32, i32* [[A_SROA_0_0_SRC_SROA_CAST]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_COPYLOAD]], i32* [[A_SROA_0]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_0_0_DST_SROA_CAST:%.*]] = bitcast i8* [[DST:%.*]] to i32* +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1:%.*]] = load volatile i32, i32* [[A_SROA_0]], align 4, !tbaa [[TBAA7]] +; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1]], i32* [[A_SROA_0_0_DST_SROA_CAST]], align 1, !tbaa [[TBAA7]] +; CHECK-NEXT: ret void +; entry: %a = alloca [4 x i8] @@ -546,16 +572,24 @@ entry: %S1 = type { i32, i32, [16 x i8] } %S2 = type { %S1*, %S2* } -define %S2 @test8(%S2* %s2) { +define %S2 @test8(%S2* %arg) { ; CHECK-LABEL: @test8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S2_NEXT_PTR:%.*]] = getelementptr [[S2:%.*]], %S2* [[ARG:%.*]], i64 0, i32 1 +; CHECK-NEXT: [[S2_NEXT:%.*]] = load %S2*, %S2** [[S2_NEXT_PTR]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: [[S2_NEXT_S1_PTR:%.*]] = getelementptr [[S2]], %S2* [[S2_NEXT]], i64 0, i32 0 +; CHECK-NEXT: [[S2_NEXT_S1:%.*]] = load %S1*, %S1** [[S2_NEXT_S1_PTR]], align 8, !tbaa [[TBAA7]] +; CHECK-NEXT: [[S2_NEXT_NEXT_PTR:%.*]] = getelementptr [[S2]], %S2* [[S2_NEXT]], i64 0, i32 1 +; CHECK-NEXT: [[S2_NEXT_NEXT:%.*]] = load %S2*, %S2** [[S2_NEXT_NEXT_PTR]], align 8, !tbaa [[TBAA11]] +; CHECK-NEXT: [[RESULT1:%.*]] = insertvalue [[S2]] undef, %S1* [[S2_NEXT_S1]], 0 +; CHECK-NEXT: [[RESULT2:%.*]] = insertvalue [[S2]] [[RESULT1]], %S2* [[S2_NEXT_NEXT]], 1 +; CHECK-NEXT: ret [[S2]] [[RESULT2]] +; entry: %new = alloca %S2 -; CHECK-NOT: alloca - %s2.next.ptr = getelementptr %S2, %S2* %s2, i64 0, i32 1 + %s2.next.ptr = getelementptr %S2, %S2* %arg, i64 0, i32 1 %s2.next = load %S2*, %S2** %s2.next.ptr, !tbaa !0 -; CHECK: %[[gep:.*]] = getelementptr %S2, %S2* %s2, i64 0, i32 1 -; CHECK-NEXT: %[[next:.*]] = load %S2*, %S2** %[[gep]], align 8, !tbaa [[TAG_0]] %s2.next.s1.ptr = getelementptr %S2, %S2* %s2.next, i64 0, i32 0 %s2.next.s1 = load %S1*, %S1** %s2.next.s1.ptr, !tbaa !3 @@ -565,19 +599,12 @@ entry: %s2.next.next = load %S2*, %S2** %s2.next.next.ptr, !tbaa !7 %new.next.ptr = getelementptr %S2, %S2* %new, i64 0, i32 1 store %S2* %s2.next.next, %S2** %new.next.ptr, !tbaa !9 -; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2, %S2* %[[next]], i64 0, i32 0 -; CHECK-NEXT: %[[next_s1:.*]] = load %S1*, %S1** %[[gep]], align 8, !tbaa [[TAG_3]] -; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2, %S2* %[[next]], i64 0, i32 1 -; CHECK-NEXT: %[[next_next:.*]] = load %S2*, %S2** %[[gep]], align 8, !tbaa [[TAG_7]] %new.s1 = load %S1*, %S1** %new.s1.ptr %result1 = insertvalue %S2 undef, %S1* %new.s1, 0 -; CHECK-NEXT: %[[result1:.*]] = insertvalue %S2 undef, %S1* %[[next_s1]], 0 %new.next = load %S2*, %S2** %new.next.ptr %result2 = insertvalue %S2 %result1, %S2* %new.next, 1 -; CHECK-NEXT: %[[result2:.*]] = insertvalue %S2 %[[result1]], %S2* %[[next_next]], 1 ret %S2 %result2 -; CHECK-NEXT: ret %S2 %[[result2]] } define i64 @test9() { @@ -586,20 +613,21 @@ define i64 @test9() { ; off the bits past the end of the alloca. ; ; CHECK-LABEL: @test9( -; CHECK-NOT: alloca -; CHECK: %[[b2:.*]] = zext i8 26 to i64 -; CHECK-NEXT: %[[s2:.*]] = shl i64 %[[b2]], 16 -; CHECK-NEXT: %[[m2:.*]] = and i64 undef, -16711681 -; CHECK-NEXT: %[[i2:.*]] = or i64 %[[m2]], %[[s2]] -; CHECK-NEXT: %[[b1:.*]] = zext i8 0 to i64 -; CHECK-NEXT: %[[s1:.*]] = shl i64 %[[b1]], 8 -; CHECK-NEXT: %[[m1:.*]] = and i64 %[[i2]], -65281 -; CHECK-NEXT: %[[i1:.*]] = or i64 %[[m1]], %[[s1]] -; CHECK-NEXT: %[[b0:.*]] = zext i8 0 to i64 -; CHECK-NEXT: %[[m0:.*]] = and i64 %[[i1]], -256 -; CHECK-NEXT: %[[i0:.*]] = or i64 %[[m0]], %[[b0]] -; CHECK-NEXT: %[[result:.*]] = and i64 %[[i0]], 16777215 -; CHECK-NEXT: ret i64 %[[result]] +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_3_0_INSERT_EXT:%.*]] = zext i8 26 to i64 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_SHIFT:%.*]] = shl i64 [[A_SROA_3_0_INSERT_EXT]], 16 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_MASK:%.*]] = and i64 undef, -16711681 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_INSERT:%.*]] = or i64 [[A_SROA_3_0_INSERT_MASK]], [[A_SROA_3_0_INSERT_SHIFT]] +; CHECK-NEXT: [[A_SROA_2_0_INSERT_EXT:%.*]] = zext i8 0 to i64 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_SHIFT:%.*]] = shl i64 [[A_SROA_2_0_INSERT_EXT]], 8 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_MASK:%.*]] = and i64 [[A_SROA_3_0_INSERT_INSERT]], -65281 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_INSERT:%.*]] = or i64 [[A_SROA_2_0_INSERT_MASK]], [[A_SROA_2_0_INSERT_SHIFT]] +; CHECK-NEXT: [[A_SROA_0_0_INSERT_EXT:%.*]] = zext i8 0 to i64 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_MASK:%.*]] = and i64 [[A_SROA_2_0_INSERT_INSERT]], -256 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_INSERT:%.*]] = or i64 [[A_SROA_0_0_INSERT_MASK]], [[A_SROA_0_0_INSERT_EXT]] +; CHECK-NEXT: [[RESULT:%.*]] = and i64 [[A_SROA_0_0_INSERT_INSERT]], 16777215 +; CHECK-NEXT: ret i64 [[RESULT]] +; entry: %a = alloca { [3 x i8] }, align 8 @@ -618,8 +646,10 @@ entry: define %S2* @test10() { ; CHECK-LABEL: @test10( -; CHECK-NOT: alloca %S2* -; CHECK: ret %S2* null +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint %S2* null to i64 +; CHECK-NEXT: ret %S2* null +; entry: %a = alloca [8 x i8] @@ -632,8 +662,13 @@ entry: define i32 @test11() { ; CHECK-LABEL: @test11( -; CHECK-NOT: alloca -; CHECK: ret i32 0 +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 undef, label [[GOOD:%.*]], label [[BAD:%.*]] +; CHECK: good: +; CHECK-NEXT: ret i32 0 +; CHECK: bad: +; CHECK-NEXT: ret i32 undef +; entry: %X = alloca i32 @@ -657,11 +692,31 @@ define i8 @test12() { ; and other operations that instcombine will fold, but no alloca. ; ; CHECK-LABEL: @test12( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_3_0_INSERT_EXT:%.*]] = zext i8 0 to i24 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_SHIFT:%.*]] = shl i24 [[A_SROA_3_0_INSERT_EXT]], 16 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_MASK:%.*]] = and i24 undef, 65535 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_INSERT:%.*]] = or i24 [[A_SROA_3_0_INSERT_MASK]], [[A_SROA_3_0_INSERT_SHIFT]] +; CHECK-NEXT: [[A_SROA_2_0_INSERT_EXT:%.*]] = zext i8 0 to i24 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_SHIFT:%.*]] = shl i24 [[A_SROA_2_0_INSERT_EXT]], 8 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_MASK:%.*]] = and i24 [[A_SROA_3_0_INSERT_INSERT]], -65281 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_INSERT:%.*]] = or i24 [[A_SROA_2_0_INSERT_MASK]], [[A_SROA_2_0_INSERT_SHIFT]] +; CHECK-NEXT: [[A_SROA_0_0_INSERT_EXT:%.*]] = zext i8 0 to i24 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_MASK:%.*]] = and i24 [[A_SROA_2_0_INSERT_INSERT]], -256 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_INSERT:%.*]] = or i24 [[A_SROA_0_0_INSERT_MASK]], [[A_SROA_0_0_INSERT_EXT]] +; CHECK-NEXT: [[B_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i24 [[A_SROA_0_0_INSERT_INSERT]] to i8 +; CHECK-NEXT: [[B_SROA_2_0_EXTRACT_SHIFT:%.*]] = lshr i24 [[A_SROA_0_0_INSERT_INSERT]], 8 +; CHECK-NEXT: [[B_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i24 [[B_SROA_2_0_EXTRACT_SHIFT]] to i8 +; CHECK-NEXT: [[B_SROA_3_0_EXTRACT_SHIFT:%.*]] = lshr i24 [[A_SROA_0_0_INSERT_INSERT]], 16 +; CHECK-NEXT: [[B_SROA_3_0_EXTRACT_TRUNC:%.*]] = trunc i24 [[B_SROA_3_0_EXTRACT_SHIFT]] to i8 +; CHECK-NEXT: [[BSUM0:%.*]] = add i8 [[B_SROA_0_0_EXTRACT_TRUNC]], [[B_SROA_2_0_EXTRACT_TRUNC]] +; CHECK-NEXT: [[BSUM1:%.*]] = add i8 [[BSUM0]], [[B_SROA_3_0_EXTRACT_TRUNC]] +; CHECK-NEXT: ret i8 [[BSUM1]] +; entry: %a = alloca [3 x i8] %b = alloca [3 x i8] -; CHECK-NOT: alloca %a0ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 0 store i8 0, i8* %a0ptr @@ -671,19 +726,6 @@ entry: store i8 0, i8* %a2ptr %aiptr = bitcast [3 x i8]* %a to i24* %ai = load i24, i24* %aiptr -; CHECK-NOT: store -; CHECK-NOT: load -; CHECK: %[[ext2:.*]] = zext i8 0 to i24 -; CHECK-NEXT: %[[shift2:.*]] = shl i24 %[[ext2]], 16 -; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, 65535 -; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[shift2]] -; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24 -; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8 -; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281 -; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]] -; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24 -; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], -256 -; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[ext0]] %biptr = bitcast [3 x i8]* %b to i24* store i24 %ai, i24* %biptr @@ -693,29 +735,21 @@ entry: %b1 = load i8, i8* %b1ptr %b2ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 2 %b2 = load i8, i8* %b2ptr -; CHECK-NOT: store -; CHECK-NOT: load -; CHECK: %[[trunc0:.*]] = trunc i24 %[[insert0]] to i8 -; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8 -; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8 -; CHECK-NEXT: %[[shift2:.*]] = lshr i24 %[[insert0]], 16 -; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[shift2]] to i8 %bsum0 = add i8 %b0, %b1 %bsum1 = add i8 %bsum0, %b2 ret i8 %bsum1 -; CHECK: %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]] -; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]] -; CHECK-NEXT: ret i8 %[[sum1]] } define i32 @test13() { ; Ensure we don't crash and handle undefined loads that straddle the end of the ; allocation. ; CHECK-LABEL: @test13( -; CHECK: %[[value:.*]] = zext i8 0 to i16 -; CHECK-NEXT: %[[ret:.*]] = zext i16 %[[value]] to i32 -; CHECK-NEXT: ret i32 %[[ret]] +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_2_2_LOAD_EXT:%.*]] = zext i8 0 to i16 +; CHECK-NEXT: [[RET:%.*]] = zext i16 [[A_SROA_2_2_LOAD_EXT]] to i32 +; CHECK-NEXT: ret i32 [[RET]] +; entry: %a = alloca [3 x i8], align 2 @@ -740,8 +774,9 @@ define void @test14(...) nounwind uwtable { ; across two adjacent allocas. Test that we don't try to promote or otherwise ; do bad things to these dead allocas, they should just be removed. ; CHECK-LABEL: @test14( -; CHECK-NEXT: entry: -; CHECK-NEXT: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; entry: %a = alloca %test14.struct @@ -771,10 +806,11 @@ define i32 @test15(i1 %flag) nounwind uwtable { ; loads or stores we still delete them during partitioning and rewriting. ; Otherwise we'll go to promote them while thy still have unpromotable uses. ; CHECK-LABEL: @test15( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label %loop -; CHECK: loop: -; CHECK-NEXT: br label %loop +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: br label [[LOOP]] +; entry: %l0 = alloca i64 @@ -814,12 +850,13 @@ loop: define void @test16(i8* %src, i8* %dst) { ; Ensure that we can promote an alloca of [3 x i8] to an i24 SSA value. ; CHECK-LABEL: @test16( -; CHECK-NOT: alloca -; CHECK: %[[srccast:.*]] = bitcast i8* %src to i24* -; CHECK-NEXT: load i24, i24* %[[srccast]], {{.*}}, !tbaa [[TAG_0]] -; CHECK-NEXT: %[[dstcast:.*]] = bitcast i8* %dst to i24* -; CHECK-NEXT: store i24 0, i24* %[[dstcast]], {{.*}}, !tbaa [[TAG_5]] -; CHECK-NEXT: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0_0_SRC_SROA_CAST:%.*]] = bitcast i8* [[SRC:%.*]] to i24* +; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load i24, i24* [[A_SROA_0_0_SRC_SROA_CAST]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_0_0_DST_SROA_CAST:%.*]] = bitcast i8* [[DST:%.*]] to i24* +; CHECK-NEXT: store i24 0, i24* [[A_SROA_0_0_DST_SROA_CAST]], align 1, !tbaa [[TBAA9]] +; CHECK-NEXT: ret void +; entry: %a = alloca [3 x i8] @@ -835,11 +872,13 @@ define void @test17(i8* %src, i8* %dst) { ; Ensure that we can rewrite unpromotable memcpys which extend past the end of ; the alloca. ; CHECK-LABEL: @test17( -; CHECK: %[[a:.*]] = alloca [3 x i8] -; CHECK-NEXT: %[[ptr:.*]] = getelementptr [3 x i8], [3 x i8]* %[[a]], i32 0, i32 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[ptr]], i8* %src, {{.*}}), !tbaa [[TAG_0]] -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[ptr]], {{.*}}), !tbaa [[TAG_3]] -; CHECK-NEXT: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca [3 x i8], align 1 +; CHECK-NEXT: [[PTR:%.*]] = getelementptr [3 x i8], [3 x i8]* [[A]], i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[PTR]], i8* [[SRC:%.*]], i32 4, i1 true), !tbaa [[TBAA0]] +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[DST:%.*]], i8* [[PTR]], i32 4, i1 true), !tbaa [[TBAA7]] +; CHECK-NEXT: ret void +; entry: %a = alloca [3 x i8] @@ -854,22 +893,26 @@ define void @test18(i8* %src, i8* %dst, i32 %size) { ; fixed size operations. Further, continue to split and promote allocas preceding ; the variable sized intrinsic. ; CHECK-LABEL: @test18( -; CHECK: %[[a:.*]] = alloca [34 x i8] -; CHECK: %[[srcgep1:.*]] = getelementptr inbounds i8, i8* %src, i64 4 -; CHECK-NEXT: %[[srccast1:.*]] = bitcast i8* %[[srcgep1]] to i32* -; CHECK-NEXT: %[[srcload:.*]] = load i32, i32* %[[srccast1]], {{.*}}, !tbaa [[TAG_0_M4:!.*]] -; CHECK-NEXT: %[[agep1:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %[[agep1]], i8* %src, i32 %size, {{.*}}), !tbaa [[TAG_3]] -; CHECK-NEXT: %[[agep2:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 %[[agep2]], i8 42, i32 %size, {{.*}}), !tbaa [[TAG_5]] -; CHECK-NEXT: %[[dstcast1:.*]] = bitcast i8* %dst to i32* -; CHECK-NEXT: store i32 42, i32* %[[dstcast1]], {{.*}}, !tbaa [[TAG_9]] -; CHECK-NEXT: %[[dstgep1:.*]] = getelementptr inbounds i8, i8* %dst, i64 4 -; CHECK-NEXT: %[[dstcast2:.*]] = bitcast i8* %[[dstgep1]] to i32* -; CHECK-NEXT: store i32 %[[srcload]], i32* %[[dstcast2]], {{.*}} -; CHECK-NEXT: %[[agep3:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* align 1 %[[agep3]], i32 %size, {{.*}}), !tbaa [[TAG_11]] -; CHECK-NEXT: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_33:%.*]] = alloca [34 x i8], align 1 +; CHECK-NEXT: [[A_SROA_0_0_SRC_SROA_CAST:%.*]] = bitcast i8* [[SRC:%.*]] to i32* +; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load i32, i32* [[A_SROA_0_0_SRC_SROA_CAST]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i64 4 +; CHECK-NEXT: [[A_SROA_3_0_SRC_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_3_0_SRC_SROA_IDX]] to i32* +; CHECK-NEXT: [[A_SROA_3_0_COPYLOAD:%.*]] = load i32, i32* [[A_SROA_3_0_SRC_SROA_CAST]], align 1, !tbaa [[TBAA74:![0-9]+]] +; CHECK-NEXT: [[A_SROA_33_0_PTR2_SROA_IDX:%.*]] = getelementptr inbounds [34 x i8], [34 x i8]* [[A_SROA_33]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A_SROA_33_0_PTR2_SROA_IDX]], i8* [[SRC]], i32 [[SIZE:%.*]], i1 false), !tbaa [[TBAA7]] +; CHECK-NEXT: [[A_SROA_33_0_PTR2_SROA_IDX6:%.*]] = getelementptr inbounds [34 x i8], [34 x i8]* [[A_SROA_33]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 [[A_SROA_33_0_PTR2_SROA_IDX6]], i8 42, i32 [[SIZE]], i1 false), !tbaa [[TBAA9]] +; CHECK-NEXT: [[A_SROA_0_0_DST_SROA_CAST:%.*]] = bitcast i8* [[DST:%.*]] to i32* +; CHECK-NEXT: store i32 42, i32* [[A_SROA_0_0_DST_SROA_CAST]], align 1, !tbaa [[TBAA13]] +; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4 +; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_3_0_DST_SROA_IDX]] to i32* +; CHECK-NEXT: store i32 [[A_SROA_3_0_COPYLOAD]], i32* [[A_SROA_3_0_DST_SROA_CAST]], align 1 +; CHECK-NEXT: [[A_SROA_33_0_PTR2_SROA_IDX7:%.*]] = getelementptr inbounds [34 x i8], [34 x i8]* [[A_SROA_33]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[DST]], i8* align 1 [[A_SROA_33_0_PTR2_SROA_IDX7]], i32 [[SIZE]], i1 false), !tbaa [[TBAA15]] +; CHECK-NEXT: ret void +; entry: %a = alloca [42 x i8] @@ -893,8 +936,15 @@ define i32 @test19(%opaque* %x) { ; a check for an unsized type was missing and this crashed. Ensure it behaves ; reasonably now. ; CHECK-LABEL: @test19( -; CHECK-NOT: alloca -; CHECK: ret i32 undef +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CAST1:%.*]] = bitcast %opaque* [[X:%.*]] to i8* +; CHECK-NEXT: [[A_SROA_0_0_CAST1_SROA_CAST:%.*]] = bitcast i8* [[CAST1]] to i64* +; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load i64, i64* [[A_SROA_0_0_CAST1_SROA_CAST]], align 1 +; CHECK-NEXT: [[A_SROA_2_0_CAST1_SROA_IDX:%.*]] = getelementptr inbounds i8, i8* [[CAST1]], i64 8 +; CHECK-NEXT: [[A_SROA_2_0_CAST1_SROA_CAST:%.*]] = bitcast i8* [[A_SROA_2_0_CAST1_SROA_IDX]] to i8** +; CHECK-NEXT: [[A_SROA_2_0_COPYLOAD:%.*]] = load i8*, i8** [[A_SROA_2_0_CAST1_SROA_CAST]], align 1 +; CHECK-NEXT: ret i32 undef +; entry: %a = alloca { i64, i8* } @@ -914,8 +964,15 @@ define i32 @test19_addrspacecast(%opaque* %x) { ; a check for an unsized type was missing and this crashed. Ensure it behaves ; reasonably now. ; CHECK-LABEL: @test19_addrspacecast( -; CHECK-NOT: alloca -; CHECK: ret i32 undef +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CAST1:%.*]] = addrspacecast %opaque* [[X:%.*]] to i8 addrspace(1)* +; CHECK-NEXT: [[A_SROA_0_0_CAST1_SROA_CAST:%.*]] = bitcast i8 addrspace(1)* [[CAST1]] to i64 addrspace(1)* +; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load i64, i64 addrspace(1)* [[A_SROA_0_0_CAST1_SROA_CAST]], align 1 +; CHECK-NEXT: [[A_SROA_2_0_CAST1_SROA_IDX:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[CAST1]], i16 8 +; CHECK-NEXT: [[A_SROA_2_0_CAST1_SROA_CAST:%.*]] = bitcast i8 addrspace(1)* [[A_SROA_2_0_CAST1_SROA_IDX]] to i8* addrspace(1)* +; CHECK-NEXT: [[A_SROA_2_0_COPYLOAD:%.*]] = load i8*, i8* addrspace(1)* [[A_SROA_2_0_CAST1_SROA_CAST]], align 1 +; CHECK-NEXT: ret i32 undef +; entry: %a = alloca { i64, i8* } @@ -931,10 +988,11 @@ define i32 @test20() { ; Ensure we can track negative offsets (before the beginning of the alloca) and ; negative relative offsets from offsets starting past the end of the alloca. ; CHECK-LABEL: @test20( -; CHECK-NOT: alloca -; CHECK: %[[sum1:.*]] = add i32 1, 2 -; CHECK: %[[sum2:.*]] = add i32 %[[sum1]], 3 -; CHECK: ret i32 %[[sum2]] +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUM1:%.*]] = add i32 1, 2 +; CHECK-NEXT: [[SUM2:%.*]] = add i32 [[SUM1]], 3 +; CHECK-NEXT: ret i32 [[SUM2]] +; entry: %a = alloca [3 x i32] @@ -962,8 +1020,10 @@ define i8 @test21() { ; internally. This is really awkward to really test as LLVM doesn't really ; support such extreme constructs cleanly. ; CHECK-LABEL: @test21( -; CHECK-NOT: alloca -; CHECK: or i8 -1, -1 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = or i8 -1, -1 +; CHECK-NEXT: ret i8 [[RESULT]] +; entry: %a = alloca [2305843009213693951 x i8] @@ -990,9 +1050,10 @@ entry: define void @PR13916.1() { ; Ensure that we handle overlapping memcpy intrinsics correctly, especially in ; the case where there is a directly identical value for both source and dest. -; CHECK: @PR13916.1 -; CHECK-NOT: alloca -; CHECK: ret void +; CHECK-LABEL: @PR13916.1( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; entry: %a = alloca i8 @@ -1005,9 +1066,14 @@ define void @PR13916.2() { ; Check whether we continue to handle them correctly when they start off with ; different pointer value chains, but during rewriting we coalesce them into the ; same value. -; CHECK: @PR13916.2 -; CHECK-NOT: alloca -; CHECK: ret void +; CHECK-LABEL: @PR13916.2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret void +; entry: %a = alloca %PR13916.struct, align 1 @@ -1030,9 +1096,17 @@ define void @PR13990() { ; alloca to become dead and get deleted. This might crash or fail under ; Valgrind if we regress. ; CHECK-LABEL: @PR13990( -; CHECK-NOT: alloca -; CHECK: unreachable -; CHECK: unreachable +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: br i1 undef, label [[BB2]], label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br i1 undef, label [[BB3]], label [[BB4:%.*]] +; CHECK: bb3: +; CHECK-NEXT: unreachable +; CHECK: bb4: +; CHECK-NEXT: unreachable +; entry: %tmp1 = alloca i8* @@ -1059,23 +1133,22 @@ define double @PR13969(double %x) { ; re-try running SROA over that alloca. Without that, the two allocas that are ; stored into a dead alloca don't get rewritten and promoted. ; CHECK-LABEL: @PR13969( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret double [[X:%.*]] +; entry: %a = alloca double %b = alloca double* %c = alloca double -; CHECK-NOT: alloca store double %x, double* %a store double* %c, double** %b store double* %a, double** %b store double %x, double* %c %ret = load double, double* %a -; CHECK-NOT: store -; CHECK-NOT: load ret double %ret -; CHECK: ret double %x } %PR14034.struct = type { { {} }, i32, %PR14034.list } @@ -1086,6 +1159,12 @@ define void @PR14034() { ; subsequently crashed (under valgrind) before we fixed the PR. The important ; thing is to handle empty structs gracefully. ; CHECK-LABEL: @PR14034( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [12 x i8], align 8 +; CHECK-NEXT: [[A_SROA_0_0_CAST1_SROA_IDX:%.*]] = getelementptr inbounds [12 x i8], [12 x i8]* [[A_SROA_0]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 undef, i8* align 8 [[A_SROA_0_0_CAST1_SROA_IDX]], i32 12, i1 false) +; CHECK-NEXT: ret void +; entry: %a = alloca %PR14034.struct @@ -1102,12 +1181,31 @@ define i32 @test22(i32 %x) { ; Test that SROA and promotion is not confused by a grab bax mixture of pointer ; types involving wrapper aggregates and zero-length aggregate members. ; CHECK-LABEL: @test22( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[WRAP1:%.*]] = insertvalue [1 x { i32 }] undef, i32 [[X:%.*]], 0, 0 +; CHECK-NEXT: [[WRAP1_FCA_0_0_EXTRACT:%.*]] = extractvalue [1 x { i32 }] [[WRAP1]], 0, 0 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[WRAP1_FCA_0_0_EXTRACT]] to float +; CHECK-NEXT: [[LOAD1_FCA_0_0_0_INSERT:%.*]] = insertvalue { [1 x { float }] } undef, float [[TMP0]], 0, 0, 0 +; CHECK-NEXT: [[UNWRAP1:%.*]] = extractvalue { [1 x { float }] } [[LOAD1_FCA_0_0_0_INSERT]], 0, 0 +; CHECK-NEXT: [[WRAP2:%.*]] = insertvalue { {}, { float }, [0 x i8] } undef, { float } [[UNWRAP1]], 1 +; CHECK-NEXT: [[WRAP2_FCA_1_0_EXTRACT:%.*]] = extractvalue { {}, { float }, [0 x i8] } [[WRAP2]], 1, 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[WRAP2_FCA_1_0_EXTRACT]] to <4 x i8> +; CHECK-NEXT: [[VALCAST1:%.*]] = bitcast <4 x i8> [[TMP1]] to i32 +; CHECK-NEXT: [[WRAP3:%.*]] = insertvalue [1 x [1 x i32]] undef, i32 [[VALCAST1]], 0, 0 +; CHECK-NEXT: [[WRAP4:%.*]] = insertvalue { [1 x [1 x i32]], {} } undef, [1 x [1 x i32]] [[WRAP3]], 0 +; CHECK-NEXT: [[WRAP4_FCA_0_0_0_EXTRACT:%.*]] = extractvalue { [1 x [1 x i32]], {} } [[WRAP4]], 0, 0, 0 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[WRAP4_FCA_0_0_0_EXTRACT]] to <4 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to float +; CHECK-NEXT: [[LOAD4_FCA_1_INSERT:%.*]] = insertvalue { {}, float, {} } undef, float [[TMP3]], 1 +; CHECK-NEXT: [[UNWRAP2:%.*]] = extractvalue { {}, float, {} } [[LOAD4_FCA_1_INSERT]], 1 +; CHECK-NEXT: [[VALCAST2:%.*]] = bitcast float [[UNWRAP2]] to i32 +; CHECK-NEXT: ret i32 [[VALCAST2]] +; entry: %a1 = alloca { { [1 x { i32 }] } } %a2 = alloca { {}, { float }, [0 x i8] } %a3 = alloca { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } } -; CHECK-NOT: alloca %wrap1 = insertvalue [1 x { i32 }] undef, i32 %x, 0, 0 %gep1 = getelementptr { { [1 x { i32 }] } }, { { [1 x { i32 }] } }* %a1, i32 0, i32 0, i32 0 @@ -1139,7 +1237,6 @@ entry: %valcast2 = bitcast float %unwrap2 to i32 ret i32 %valcast2 -; CHECK: ret i32 } define void @PR14059.1(double* %d) { @@ -1152,9 +1249,32 @@ define void @PR14059.1(double* %d) { ; whole-alloca operations, and perform the appropriate bitcasting on the ; *values* rather than the pointers. When this works, partial reads and writes ; via integers can be promoted away. -; CHECK: @PR14059.1 -; CHECK-NOT: alloca -; CHECK: ret void +; CHECK-LABEL: @PR14059.1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double undef to i64 +; CHECK-NEXT: [[X_SROA_0_I_0_INSERT_MASK:%.*]] = and i64 [[TMP0]], -4294967296 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[X_SROA_0_I_0_INSERT_MASK]] to double +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[TMP1]] to i64 +; CHECK-NEXT: [[X_SROA_0_I_2_INSERT_MASK:%.*]] = and i64 [[TMP2]], -281474976645121 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64 [[X_SROA_0_I_2_INSERT_MASK]] to double +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double [[TMP3]] to i64 +; CHECK-NEXT: [[X_SROA_0_I_4_D_RAW_SROA_CAST:%.*]] = bitcast double* [[D:%.*]] to i32* +; CHECK-NEXT: [[X_SROA_0_I_4_COPYLOAD:%.*]] = load i32, i32* [[X_SROA_0_I_4_D_RAW_SROA_CAST]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast double 0.000000e+00 to i64 +; CHECK-NEXT: [[X_SROA_0_I_4_INSERT_EXT:%.*]] = zext i32 [[X_SROA_0_I_4_COPYLOAD]] to i64 +; CHECK-NEXT: [[X_SROA_0_I_4_INSERT_SHIFT:%.*]] = shl i64 [[X_SROA_0_I_4_INSERT_EXT]], 32 +; CHECK-NEXT: [[X_SROA_0_I_4_INSERT_MASK4:%.*]] = and i64 [[TMP5]], 4294967295 +; CHECK-NEXT: [[X_SROA_0_I_4_INSERT_INSERT5:%.*]] = or i64 [[X_SROA_0_I_4_INSERT_MASK4]], [[X_SROA_0_I_4_INSERT_SHIFT]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64 [[X_SROA_0_I_4_INSERT_INSERT5]] to double +; CHECK-NEXT: [[TMP7:%.*]] = bitcast double [[TMP6]] to i64 +; CHECK-NEXT: [[X_SROA_0_I_4_INSERT_MASK:%.*]] = and i64 [[TMP7]], 4294967295 +; CHECK-NEXT: [[X_SROA_0_I_4_INSERT_INSERT:%.*]] = or i64 [[X_SROA_0_I_4_INSERT_MASK]], 4607182418800017408 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64 [[X_SROA_0_I_4_INSERT_INSERT]] to double +; CHECK-NEXT: [[ACCUM_REAL_I:%.*]] = load double, double* [[D]], align 8 +; CHECK-NEXT: [[ADD_R_I:%.*]] = fadd double [[ACCUM_REAL_I]], [[TMP8]] +; CHECK-NEXT: store double [[ADD_R_I]], double* [[D]], align 8 +; CHECK-NEXT: ret void +; entry: %X.sroa.0.i = alloca double, align 8 @@ -1195,42 +1315,42 @@ define i64 @PR14059.2({ float, float }* %phi) { ; underlying alloca has smaller components that are accessed independently. This ; shows up particularly with ABI lowering patterns coming out of Clang that rely ; on the particular register placement of a single large integer return value. -; CHECK: @PR14059.2 +; CHECK-LABEL: @PR14059.2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PHI_REALP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[PHI:%.*]], i32 0, i32 0 +; CHECK-NEXT: [[PHI_REAL:%.*]] = load float, float* [[PHI_REALP]], align 4 +; CHECK-NEXT: [[PHI_IMAGP:%.*]] = getelementptr inbounds { float, float }, { float, float }* [[PHI]], i32 0, i32 1 +; CHECK-NEXT: [[PHI_IMAG:%.*]] = load float, float* [[PHI_IMAGP]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[PHI_REAL]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[PHI_IMAG]] to i32 +; CHECK-NEXT: [[RETVAL_SROA_3_0_INSERT_EXT:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[RETVAL_SROA_3_0_INSERT_SHIFT:%.*]] = shl i64 [[RETVAL_SROA_3_0_INSERT_EXT]], 32 +; CHECK-NEXT: [[RETVAL_SROA_3_0_INSERT_MASK:%.*]] = and i64 undef, 4294967295 +; CHECK-NEXT: [[RETVAL_SROA_3_0_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_3_0_INSERT_MASK]], [[RETVAL_SROA_3_0_INSERT_SHIFT]] +; CHECK-NEXT: [[RETVAL_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[RETVAL_SROA_0_0_INSERT_MASK:%.*]] = and i64 [[RETVAL_SROA_3_0_INSERT_INSERT]], -4294967296 +; CHECK-NEXT: [[RETVAL_SROA_0_0_INSERT_INSERT:%.*]] = or i64 [[RETVAL_SROA_0_0_INSERT_MASK]], [[RETVAL_SROA_0_0_INSERT_EXT]] +; CHECK-NEXT: ret i64 [[RETVAL_SROA_0_0_INSERT_INSERT]] +; entry: %retval = alloca { float, float }, align 4 - ; CHECK-NOT: alloca %0 = bitcast { float, float }* %retval to i64* store i64 0, i64* %0 - ; CHECK-NOT: store %phi.realp = getelementptr inbounds { float, float }, { float, float }* %phi, i32 0, i32 0 %phi.real = load float, float* %phi.realp %phi.imagp = getelementptr inbounds { float, float }, { float, float }* %phi, i32 0, i32 1 %phi.imag = load float, float* %phi.imagp - ; CHECK: %[[realp:.*]] = getelementptr inbounds { float, float }, { float, float }* %phi, i32 0, i32 0 - ; CHECK-NEXT: %[[real:.*]] = load float, float* %[[realp]] - ; CHECK-NEXT: %[[imagp:.*]] = getelementptr inbounds { float, float }, { float, float }* %phi, i32 0, i32 1 - ; CHECK-NEXT: %[[imag:.*]] = load float, float* %[[imagp]] %real = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 0 %imag = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 1 store float %phi.real, float* %real store float %phi.imag, float* %imag - ; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32 - ; CHECK-NEXT: %[[imag_convert:.*]] = bitcast float %[[imag]] to i32 - ; CHECK-NEXT: %[[imag_ext:.*]] = zext i32 %[[imag_convert]] to i64 - ; CHECK-NEXT: %[[imag_shift:.*]] = shl i64 %[[imag_ext]], 32 - ; CHECK-NEXT: %[[imag_mask:.*]] = and i64 undef, 4294967295 - ; CHECK-NEXT: %[[imag_insert:.*]] = or i64 %[[imag_mask]], %[[imag_shift]] - ; CHECK-NEXT: %[[real_ext:.*]] = zext i32 %[[real_convert]] to i64 - ; CHECK-NEXT: %[[real_mask:.*]] = and i64 %[[imag_insert]], -4294967296 - ; CHECK-NEXT: %[[real_insert:.*]] = or i64 %[[real_mask]], %[[real_ext]] %1 = load i64, i64* %0, align 1 ret i64 %1 - ; CHECK-NEXT: ret i64 %[[real_insert]] } define void @PR14105({ [16 x i8] }* %ptr) { @@ -1238,29 +1358,36 @@ define void @PR14105({ [16 x i8] }* %ptr) { ; sign as negative. We use a volatile memcpy to ensure promotion never actually ; occurs. ; CHECK-LABEL: @PR14105( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [16 x i8], align 8 +; CHECK-NEXT: [[A_SROA_0_0_CAST1_SROA_IDX:%.*]] = getelementptr inbounds { [16 x i8] }, { [16 x i8] }* [[PTR:%.*]], i64 -1, i32 0, i64 0 +; CHECK-NEXT: [[A_SROA_0_0_CAST2_SROA_IDX:%.*]] = getelementptr inbounds [16 x i8], [16 x i8]* [[A_SROA_0]], i64 0, i64 0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[A_SROA_0_0_CAST1_SROA_IDX]], i8* align 8 [[A_SROA_0_0_CAST2_SROA_IDX]], i32 16, i1 true) +; CHECK-NEXT: ret void +; entry: %a = alloca { [16 x i8] }, align 8 -; CHECK: alloca [16 x i8], align 8 %gep = getelementptr inbounds { [16 x i8] }, { [16 x i8] }* %ptr, i64 -1 -; CHECK-NEXT: getelementptr inbounds { [16 x i8] }, { [16 x i8] }* %ptr, i64 -1, i32 0, i64 0 %cast1 = bitcast { [16 x i8 ] }* %gep to i8* %cast2 = bitcast { [16 x i8 ] }* %a to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %cast1, i8* align 8 %cast2, i32 16, i1 true) ret void -; CHECK: ret } define void @PR14105_as1({ [16 x i8] } addrspace(1)* %ptr) { ; Make sure this the right address space pointer is used for type check. ; CHECK-LABEL: @PR14105_as1( -; CHECK: alloca { [16 x i8] }, align 8 -; CHECK-NEXT: %gep = getelementptr inbounds { [16 x i8] }, { [16 x i8] } addrspace(1)* %ptr, i64 -1 -; CHECK-NEXT: %cast1 = bitcast { [16 x i8] } addrspace(1)* %gep to i8 addrspace(1)* -; CHECK-NEXT: %cast2 = bitcast { [16 x i8] }* %a to i8* -; CHECK-NEXT: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 8 %cast1, i8* align 8 %cast2, i32 16, i1 true) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca { [16 x i8] }, align 8 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds { [16 x i8] }, { [16 x i8] } addrspace(1)* [[PTR:%.*]], i64 -1 +; CHECK-NEXT: [[CAST1:%.*]] = bitcast { [16 x i8] } addrspace(1)* [[GEP]] to i8 addrspace(1)* +; CHECK-NEXT: [[CAST2:%.*]] = bitcast { [16 x i8] }* [[A]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 8 [[CAST1]], i8* align 8 [[CAST2]], i32 16, i1 true) +; CHECK-NEXT: ret void +; entry: %a = alloca { [16 x i8] }, align 8 @@ -1269,20 +1396,22 @@ entry: %cast2 = bitcast { [16 x i8 ] }* %a to i8* call void @llvm.memcpy.p1i8.p0i8.i32(i8 addrspace(1)* align 8 %cast1, i8* align 8 %cast2, i32 16, i1 true) ret void -; CHECK: ret } define void @PR14465() { ; Ensure that we don't crash when analyzing a alloca larger than the maximum ; integer type width (MAX_INT_BITS) supported by llvm (1048576*32 > (1<<23)-1). ; CHECK-LABEL: @PR14465( +; CHECK-NEXT: [[STACK:%.*]] = alloca [1048576 x i32], align 16 +; CHECK-NEXT: [[STACK_0_CAST_SROA_CAST:%.*]] = bitcast [1048576 x i32]* [[STACK]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 16 [[STACK_0_CAST_SROA_CAST]], i8 -2, i64 4194304, i1 false) +; CHECK-NEXT: ret void +; %stack = alloca [1048576 x i32], align 16 -; CHECK: alloca [1048576 x i32] %cast = bitcast [1048576 x i32]* %stack to i8* call void @llvm.memset.p0i8.i64(i8* align 16 %cast, i8 -2, i64 4194304, i1 false) ret void -; CHECK: ret } define void @PR14548(i1 %x) { @@ -1293,31 +1422,34 @@ define void @PR14548(i1 %x) { ; Note that we don't do a particularly good *job* of handling these mixtures, ; but the hope is that this is very rare. ; CHECK-LABEL: @PR14548( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i8, align 8 +; CHECK-NEXT: [[B_SROA_0:%.*]] = alloca i8, align 8 +; CHECK-NEXT: [[B_SROA_0_0_B_I1_SROA_CAST1:%.*]] = bitcast i8* [[B_SROA_0]] to i1* +; CHECK-NEXT: store i1 [[X:%.*]], i1* [[B_SROA_0_0_B_I1_SROA_CAST1]], align 8 +; CHECK-NEXT: [[B_SROA_0_0_B_SROA_0_0_FOO:%.*]] = load i8, i8* [[B_SROA_0]], align 8 +; CHECK-NEXT: [[B_SROA_0_0_B_SROA_0_0_COPYLOAD:%.*]] = load i8, i8* [[B_SROA_0]], align 8 +; CHECK-NEXT: store i8 [[B_SROA_0_0_B_SROA_0_0_COPYLOAD]], i8* [[A_SROA_0]], align 8 +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_BAR:%.*]] = load i8, i8* [[A_SROA_0]], align 8 +; CHECK-NEXT: [[A_SROA_0_0_A_I1_SROA_CAST2:%.*]] = bitcast i8* [[A_SROA_0]] to i1* +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_BAZ:%.*]] = load i1, i1* [[A_SROA_0_0_A_I1_SROA_CAST2]], align 8 +; CHECK-NEXT: ret void +; entry: %a = alloca <{ i1 }>, align 8 %b = alloca <{ i1 }>, align 8 -; CHECK: %[[a:.*]] = alloca i8, align 8 -; CHECK-NEXT: %[[b:.*]] = alloca i8, align 8 %b.i1 = bitcast <{ i1 }>* %b to i1* store i1 %x, i1* %b.i1, align 8 %b.i8 = bitcast <{ i1 }>* %b to i8* %foo = load i8, i8* %b.i8, align 1 -; CHECK-NEXT: %[[b_cast:.*]] = bitcast i8* %[[b]] to i1* -; CHECK-NEXT: store i1 %x, i1* %[[b_cast]], align 8 -; CHECK-NEXT: {{.*}} = load i8, i8* %[[b]], align 8 %a.i8 = bitcast <{ i1 }>* %a to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i1 false) nounwind %bar = load i8, i8* %a.i8, align 1 %a.i1 = getelementptr inbounds <{ i1 }>, <{ i1 }>* %a, i32 0, i32 0 %baz = load i1, i1* %a.i1, align 1 -; CHECK-NEXT: %[[copy:.*]] = load i8, i8* %[[b]], align 8 -; CHECK-NEXT: store i8 %[[copy]], i8* %[[a]], align 8 -; CHECK-NEXT: {{.*}} = load i8, i8* %[[a]], align 8 -; CHECK-NEXT: %[[a_cast:.*]] = bitcast i8* %[[a]] to i1* -; CHECK-NEXT: {{.*}} = load i1, i1* %[[a_cast]], align 8 ret void } @@ -1325,43 +1457,63 @@ entry: define <3 x i8> @PR14572.1(i32 %x) { ; Ensure that a split integer store which is wider than the type size of the ; alloca (relying on the alloc size padding) doesn't trigger an assert. -; CHECK: @PR14572.1 +; CHECK-LABEL: @PR14572.1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[X:%.*]] to i24 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i24 [[A_0_EXTRACT_TRUNC]] to <3 x i8> +; CHECK-NEXT: [[A_SROA_2_0_EXTRACT_SHIFT:%.*]] = lshr i32 [[X]], 24 +; CHECK-NEXT: [[A_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[A_SROA_2_0_EXTRACT_SHIFT]] to i8 +; CHECK-NEXT: ret <3 x i8> [[TMP0]] +; entry: %a = alloca <3 x i8>, align 4 -; CHECK-NOT: alloca %cast = bitcast <3 x i8>* %a to i32* store i32 %x, i32* %cast, align 1 %y = load <3 x i8>, <3 x i8>* %a, align 4 ret <3 x i8> %y -; CHECK: ret <3 x i8> } define i32 @PR14572.2(<3 x i8> %x) { ; Ensure that a split integer load which is wider than the type size of the ; alloca (relying on the alloc size padding) doesn't trigger an assert. -; CHECK: @PR14572.2 +; CHECK-LABEL: @PR14572.2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <3 x i8> [[X:%.*]] to i24 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_EXT:%.*]] = zext i8 undef to i32 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_SHIFT:%.*]] = shl i32 [[A_SROA_2_0_INSERT_EXT]], 24 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_MASK:%.*]] = and i32 undef, 16777215 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_INSERT:%.*]] = or i32 [[A_SROA_2_0_INSERT_MASK]], [[A_SROA_2_0_INSERT_SHIFT]] +; CHECK-NEXT: [[A_0_INSERT_EXT:%.*]] = zext i24 [[TMP0]] to i32 +; CHECK-NEXT: [[A_0_INSERT_MASK:%.*]] = and i32 [[A_SROA_2_0_INSERT_INSERT]], -16777216 +; CHECK-NEXT: [[A_0_INSERT_INSERT:%.*]] = or i32 [[A_0_INSERT_MASK]], [[A_0_INSERT_EXT]] +; CHECK-NEXT: ret i32 [[A_0_INSERT_INSERT]] +; entry: %a = alloca <3 x i8>, align 4 -; CHECK-NOT: alloca store <3 x i8> %x, <3 x i8>* %a, align 1 %cast = bitcast <3 x i8>* %a to i32* %y = load i32, i32* %cast, align 4 ret i32 %y -; CHECK: ret i32 } define i32 @PR14601(i32 %x) { ; Don't try to form a promotable integer alloca when there is a variable length ; memory intrinsic. ; CHECK-LABEL: @PR14601( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[A_0_A_I8_SROA_CAST:%.*]] = bitcast i32* [[A]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 4 [[A_0_A_I8_SROA_CAST]], i8 0, i32 [[X:%.*]], i1 false) +; CHECK-NEXT: [[A_0_V:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: ret i32 [[A_0_V]] +; entry: %a = alloca i32 -; CHECK: alloca %a.i8 = bitcast i32* %a to i8* call void @llvm.memset.p0i8.i32(i8* %a.i8, i8 0, i32 %x, i1 false) @@ -1376,16 +1528,55 @@ define void @PR15674(i8* %data, i8* %src, i32 %size) { ; convertable to the integer type that we end up promoting this alloca toward, ; doesn't get widened to a full alloca store. ; CHECK-LABEL: @PR15674( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP_SROA_0:%.*]] = alloca i32, align 4 +; CHECK-NEXT: switch i32 [[SIZE:%.*]], label [[END:%.*]] [ +; CHECK-NEXT: i32 4, label [[BB4:%.*]] +; CHECK-NEXT: i32 3, label [[BB3:%.*]] +; CHECK-NEXT: i32 2, label [[BB2:%.*]] +; CHECK-NEXT: i32 1, label [[BB1:%.*]] +; CHECK-NEXT: ] +; CHECK: bb4: +; CHECK-NEXT: [[SRC_GEP3:%.*]] = getelementptr inbounds i8, i8* [[SRC:%.*]], i32 3 +; CHECK-NEXT: [[SRC_3:%.*]] = load i8, i8* [[SRC_GEP3]], align 1 +; CHECK-NEXT: [[TMP_SROA_0_3_TMP_GEP3_SROA_RAW_CAST7:%.*]] = bitcast i32* [[TMP_SROA_0]] to i8* +; CHECK-NEXT: [[TMP_SROA_0_3_TMP_GEP3_SROA_RAW_IDX8:%.*]] = getelementptr inbounds i8, i8* [[TMP_SROA_0_3_TMP_GEP3_SROA_RAW_CAST7]], i64 3 +; CHECK-NEXT: store i8 [[SRC_3]], i8* [[TMP_SROA_0_3_TMP_GEP3_SROA_RAW_IDX8]], align 1 +; CHECK-NEXT: br label [[BB3]] +; CHECK: bb3: +; CHECK-NEXT: [[SRC_GEP2:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i32 2 +; CHECK-NEXT: [[SRC_2:%.*]] = load i8, i8* [[SRC_GEP2]], align 1 +; CHECK-NEXT: [[TMP_SROA_0_2_TMP_GEP2_SROA_RAW_CAST5:%.*]] = bitcast i32* [[TMP_SROA_0]] to i8* +; CHECK-NEXT: [[TMP_SROA_0_2_TMP_GEP2_SROA_RAW_IDX6:%.*]] = getelementptr inbounds i8, i8* [[TMP_SROA_0_2_TMP_GEP2_SROA_RAW_CAST5]], i64 2 +; CHECK-NEXT: store i8 [[SRC_2]], i8* [[TMP_SROA_0_2_TMP_GEP2_SROA_RAW_IDX6]], align 2 +; CHECK-NEXT: br label [[BB2]] +; CHECK: bb2: +; CHECK-NEXT: [[SRC_GEP1:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i32 1 +; CHECK-NEXT: [[SRC_1:%.*]] = load i8, i8* [[SRC_GEP1]], align 1 +; CHECK-NEXT: [[TMP_SROA_0_1_TMP_GEP1_SROA_RAW_CAST3:%.*]] = bitcast i32* [[TMP_SROA_0]] to i8* +; CHECK-NEXT: [[TMP_SROA_0_1_TMP_GEP1_SROA_RAW_IDX4:%.*]] = getelementptr inbounds i8, i8* [[TMP_SROA_0_1_TMP_GEP1_SROA_RAW_CAST3]], i64 1 +; CHECK-NEXT: store i8 [[SRC_1]], i8* [[TMP_SROA_0_1_TMP_GEP1_SROA_RAW_IDX4]], align 1 +; CHECK-NEXT: br label [[BB1]] +; CHECK: bb1: +; CHECK-NEXT: [[SRC_GEP0:%.*]] = getelementptr inbounds i8, i8* [[SRC]], i32 0 +; CHECK-NEXT: [[SRC_0:%.*]] = load i8, i8* [[SRC_GEP0]], align 1 +; CHECK-NEXT: [[TMP_SROA_0_0_TMP_GEP0_SROA_CAST2:%.*]] = bitcast i32* [[TMP_SROA_0]] to i8* +; CHECK-NEXT: store i8 [[SRC_0]], i8* [[TMP_SROA_0_0_TMP_GEP0_SROA_CAST2]], align 4 +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[TMP_SROA_0_0_TMP_RAW_SROA_CAST1:%.*]] = bitcast i32* [[TMP_SROA_0]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[DATA:%.*]], i8* align 4 [[TMP_SROA_0_0_TMP_RAW_SROA_CAST1]], i32 [[SIZE]], i1 false) +; CHECK-NEXT: ret void +; entry: %tmp = alloca [4 x i8], align 1 -; CHECK: alloca i32 switch i32 %size, label %end [ - i32 4, label %bb4 - i32 3, label %bb3 - i32 2, label %bb2 - i32 1, label %bb1 + i32 4, label %bb4 + i32 3, label %bb3 + i32 2, label %bb2 + i32 1, label %bb1 ] bb4: @@ -1393,7 +1584,6 @@ bb4: %src.3 = load i8, i8* %src.gep3 %tmp.gep3 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i32 3 store i8 %src.3, i8* %tmp.gep3 -; CHECK: store i8 br label %bb3 @@ -1402,7 +1592,6 @@ bb3: %src.2 = load i8, i8* %src.gep2 %tmp.gep2 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i32 2 store i8 %src.2, i8* %tmp.gep2 -; CHECK: store i8 br label %bb2 @@ -1411,7 +1600,6 @@ bb2: %src.1 = load i8, i8* %src.gep1 %tmp.gep1 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i32 1 store i8 %src.1, i8* %tmp.gep1 -; CHECK: store i8 br label %bb1 @@ -1420,7 +1608,6 @@ bb1: %src.0 = load i8, i8* %src.gep0 %tmp.gep0 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i32 0 store i8 %src.0, i8* %tmp.gep0 -; CHECK: store i8 br label %end @@ -1428,13 +1615,13 @@ end: %tmp.raw = bitcast [4 x i8]* %tmp to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %data, i8* %tmp.raw, i32 %size, i1 false) ret void -; CHECK: ret void } define void @PR15805(i1 %a, i1 %b) { ; CHECK-LABEL: @PR15805( -; CHECK-NOT: alloca -; CHECK: ret void +; CHECK-NEXT: [[COND_SROA_SPECULATED:%.*]] = select i1 undef, i64 undef, i64 undef +; CHECK-NEXT: ret void +; %c = alloca i64, align 8 %p.0.c = select i1 undef, i64* %c, i64* %c @@ -1449,8 +1636,13 @@ define void @PR15805.1(i1 %a, i1 %b) { ; order in which the uses of the alloca are visited. ; ; CHECK-LABEL: @PR15805.1( -; CHECK-NOT: alloca -; CHECK: ret void +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[COND_SROA_SPECULATED:%.*]] = select i1 undef, i64 undef, i64 undef +; CHECK-NEXT: br i1 undef, label [[LOOP:%.*]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; %c = alloca i64, align 8 br label %exit @@ -1471,10 +1663,22 @@ define void @PR16651.1(i8* %a) { ; memcpy. ; ; CHECK-LABEL: @PR16651.1( -; CHECK: alloca i16 -; CHECK: alloca i8 -; CHECK: alloca i8 -; CHECK: unreachable +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B_SROA_0:%.*]] = alloca i16, align 4 +; CHECK-NEXT: [[B_SROA_1:%.*]] = alloca i8, align 2 +; CHECK-NEXT: [[B_SROA_2:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[B_SROA_0_0_A_SROA_CAST:%.*]] = bitcast i8* [[A:%.*]] to i16* +; CHECK-NEXT: [[B_SROA_0_0_COPYLOAD:%.*]] = load volatile i16, i16* [[B_SROA_0_0_A_SROA_CAST]], align 4 +; CHECK-NEXT: store volatile i16 [[B_SROA_0_0_COPYLOAD]], i16* [[B_SROA_0]], align 4 +; CHECK-NEXT: [[B_SROA_1_0_A_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 2 +; CHECK-NEXT: [[B_SROA_1_0_COPYLOAD:%.*]] = load volatile i8, i8* [[B_SROA_1_0_A_SROA_RAW_IDX]], align 2 +; CHECK-NEXT: store volatile i8 [[B_SROA_1_0_COPYLOAD]], i8* [[B_SROA_1]], align 2 +; CHECK-NEXT: [[B_SROA_2_0_A_SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 3 +; CHECK-NEXT: [[B_SROA_2_0_COPYLOAD:%.*]] = load volatile i8, i8* [[B_SROA_2_0_A_SROA_RAW_IDX]], align 1 +; CHECK-NEXT: store volatile i8 [[B_SROA_2_0_COPYLOAD]], i8* [[B_SROA_2]], align 1 +; CHECK-NEXT: [[B_SROA_1_0_B_SROA_1_2_:%.*]] = load i8, i8* [[B_SROA_1]], align 2 +; CHECK-NEXT: unreachable +; entry: %b = alloca i32, align 4 @@ -1492,8 +1696,14 @@ define void @PR16651.2() { ; bail on select instructions. ; ; CHECK-LABEL: @PR16651.2( -; CHECK: alloca <2 x float> -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TV1_SROA_0:%.*]] = alloca <2 x float>, align 8 +; CHECK-NEXT: store <2 x float> undef, <2 x float>* [[TV1_SROA_0]], align 8 +; CHECK-NEXT: [[TV1_SROA_0_0__SROA_IDX:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TV1_SROA_0]], i64 0, i32 0 +; CHECK-NEXT: [[COND105_IN_I_I:%.*]] = select i1 undef, float* null, float* [[TV1_SROA_0_0__SROA_IDX]] +; CHECK-NEXT: [[COND105_I_I:%.*]] = load float, float* [[COND105_IN_I_I]], align 8 +; CHECK-NEXT: ret void +; entry: %tv1 = alloca { <2 x float>, <2 x float> }, align 8 @@ -1507,8 +1717,9 @@ entry: define void @test23(i32 %x) { ; CHECK-LABEL: @test23( -; CHECK-NOT: alloca -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; entry: %a = alloca i32, align 4 store i32 %x, i32* %a, align 4 @@ -1522,8 +1733,9 @@ entry: define void @PR18615() { ; CHECK-LABEL: @PR18615( -; CHECK-NOT: alloca -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; entry: %f = alloca i8 %gep = getelementptr i8, i8* %f, i64 -1 @@ -1533,11 +1745,16 @@ entry: define void @test24(i8* %src, i8* %dst) { ; CHECK-LABEL: @test24( -; CHECK: alloca i64, align 16 -; CHECK: load volatile i64, i64* %{{[^,]*}}, align 1, !tbaa [[TAG_0]] -; CHECK: store volatile i64 %{{[^,]*}}, i64* %{{[^,]*}}, align 16, !tbaa [[TAG_0]] -; CHECK: load volatile i64, i64* %{{[^,]*}}, align 16, !tbaa [[TAG_3]] -; CHECK: store volatile i64 %{{[^,]*}}, i64* %{{[^,]*}}, align 1, !tbaa [[TAG_3]] +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i64, align 16 +; CHECK-NEXT: [[A_0_SRC_SROA_CAST:%.*]] = bitcast i8* [[SRC:%.*]] to i64* +; CHECK-NEXT: [[A_0_COPYLOAD:%.*]] = load volatile i64, i64* [[A_0_SRC_SROA_CAST]], align 1, !tbaa [[TBAA0]] +; CHECK-NEXT: store volatile i64 [[A_0_COPYLOAD]], i64* [[A]], align 16, !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_0_DST_SROA_CAST:%.*]] = bitcast i8* [[DST:%.*]] to i64* +; CHECK-NEXT: [[A_0_COPYLOAD1:%.*]] = load volatile i64, i64* [[A]], align 16, !tbaa [[TBAA7]] +; CHECK-NEXT: store volatile i64 [[A_0_COPYLOAD1]], i64* [[A_0_DST_SROA_CAST]], align 1, !tbaa [[TBAA7]] +; CHECK-NEXT: ret void +; entry: %a = alloca i64, align 16 @@ -1556,11 +1773,12 @@ define float @test25() { ; handle these cases and form splitable and promotable SSA values. ; ; CHECK-LABEL: @test25( -; CHECK-NOT: alloca -; CHECK: %[[F1:.*]] = bitcast i32 0 to float -; CHECK: %[[F2:.*]] = bitcast i32 1065353216 to float -; CHECK: %[[SUM:.*]] = fadd float %[[F1]], %[[F2]] -; CHECK: ret float %[[SUM]] +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 0 to float +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 1065353216 to float +; CHECK-NEXT: [[RET:%.*]] = fadd float [[TMP0]], [[TMP1]] +; CHECK-NEXT: ret float [[RET]] +; entry: %a = alloca i64 @@ -1588,17 +1806,18 @@ define void @test26() { ; Test a case of splitting up loads and stores against a globals. ; ; CHECK-LABEL: @test26( -; CHECK-NOT: alloca -; CHECK: %[[L1:.*]] = load i32, i32* bitcast -; CHECK: %[[L2:.*]] = load i32, i32* bitcast -; CHECK: %[[F1:.*]] = bitcast i32 %[[L1]] to float -; CHECK: %[[F2:.*]] = bitcast i32 %[[L2]] to float -; CHECK: %[[SUM:.*]] = fadd float %[[F1]], %[[F2]] -; CHECK: %[[C1:.*]] = bitcast float %[[SUM]] to i32 -; CHECK: %[[C2:.*]] = bitcast float %[[SUM]] to i32 -; CHECK: store i32 %[[C1]], i32* bitcast -; CHECK: store i32 %[[C2]], i32* bitcast -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V14:%.*]] = load i32, i32* bitcast ([2 x float]* @complex1 to i32*), align 4 +; CHECK-NEXT: [[V16:%.*]] = load i32, i32* bitcast (float* getelementptr inbounds ([2 x float], [2 x float]* @complex1, i64 0, i64 1) to i32*), align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[V14]] to float +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[V16]] to float +; CHECK-NEXT: [[SUM:%.*]] = fadd float [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float [[SUM]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[SUM]] to i32 +; CHECK-NEXT: store i32 [[TMP2]], i32* bitcast ([2 x float]* @complex2 to i32*), align 4 +; CHECK-NEXT: store i32 [[TMP3]], i32* bitcast (float* getelementptr inbounds ([2 x float], [2 x float]* @complex2, i64 0, i64 1) to i32*), align 4 +; CHECK-NEXT: ret void +; entry: %a = alloca i64 @@ -1623,11 +1842,12 @@ define float @test27() { ; the alloca SROA is processing, and they overlap but at an offset. ; ; CHECK-LABEL: @test27( -; CHECK-NOT: alloca -; CHECK: %[[F1:.*]] = bitcast i32 0 to float -; CHECK: %[[F2:.*]] = bitcast i32 1065353216 to float -; CHECK: %[[SUM:.*]] = fadd float %[[F1]], %[[F2]] -; CHECK: ret float %[[SUM]] +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 0 to float +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 1065353216 to float +; CHECK-NEXT: [[RET:%.*]] = fadd float [[TMP0]], [[TMP1]] +; CHECK-NEXT: ret float [[RET]] +; entry: %a = alloca [12 x i8] @@ -1658,10 +1878,23 @@ define i32 @PR22093() { ; properly for rewriting. ; ; CHECK-LABEL: @PR22093( -; CHECK-NOT: alloca -; CHECK: alloca i16 -; CHECK-NOT: alloca -; CHECK: store volatile i16 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i16, align 4 +; CHECK-NEXT: store volatile i16 42, i16* [[A_SROA_0]], align 4 +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_LOAD:%.*]] = load i16, i16* [[A_SROA_0]], align 4 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_EXT:%.*]] = zext i16 undef to i32 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_SHIFT:%.*]] = shl i32 [[A_SROA_3_0_INSERT_EXT]], 16 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_MASK:%.*]] = and i32 undef, 65535 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_INSERT:%.*]] = or i32 [[A_SROA_3_0_INSERT_MASK]], [[A_SROA_3_0_INSERT_SHIFT]] +; CHECK-NEXT: [[A_SROA_0_0_INSERT_EXT:%.*]] = zext i16 [[A_SROA_0_0_A_SROA_0_0_LOAD]] to i32 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_MASK:%.*]] = and i32 [[A_SROA_3_0_INSERT_INSERT]], -65536 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_INSERT:%.*]] = or i32 [[A_SROA_0_0_INSERT_MASK]], [[A_SROA_0_0_INSERT_EXT]] +; CHECK-NEXT: [[A_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[A_SROA_0_0_INSERT_INSERT]] to i16 +; CHECK-NEXT: store i16 [[A_SROA_0_0_EXTRACT_TRUNC]], i16* [[A_SROA_0]], align 4 +; CHECK-NEXT: [[A_SROA_3_0_EXTRACT_SHIFT:%.*]] = lshr i32 [[A_SROA_0_0_INSERT_INSERT]], 16 +; CHECK-NEXT: [[A_SROA_3_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[A_SROA_3_0_EXTRACT_SHIFT]] to i16 +; CHECK-NEXT: ret i32 [[A_SROA_0_0_INSERT_INSERT]] +; entry: %a = alloca i32 @@ -1682,12 +1915,29 @@ define void @PR22093.2() { ; presplit. ; ; CHECK-LABEL: @PR22093.2( -; CHECK-NOT: alloca -; CHECK: alloca i16 -; CHECK-NEXT: alloca i8 -; CHECK-NOT: alloca -; CHECK: store volatile i16 -; CHECK: store volatile i8 +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i16, align 8 +; CHECK-NEXT: [[A_SROA_31:%.*]] = alloca i8, align 4 +; CHECK-NEXT: store volatile i16 42, i16* [[A_SROA_0]], align 8 +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_LOAD:%.*]] = load i16, i16* [[A_SROA_0]], align 8 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_EXT:%.*]] = zext i16 undef to i32 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_SHIFT:%.*]] = shl i32 [[A_SROA_3_0_INSERT_EXT]], 16 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_MASK:%.*]] = and i32 undef, 65535 +; CHECK-NEXT: [[A_SROA_3_0_INSERT_INSERT:%.*]] = or i32 [[A_SROA_3_0_INSERT_MASK]], [[A_SROA_3_0_INSERT_SHIFT]] +; CHECK-NEXT: [[A_SROA_0_0_INSERT_EXT:%.*]] = zext i16 [[A_SROA_0_0_A_SROA_0_0_LOAD]] to i32 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_MASK:%.*]] = and i32 [[A_SROA_3_0_INSERT_INSERT]], -65536 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_INSERT:%.*]] = or i32 [[A_SROA_0_0_INSERT_MASK]], [[A_SROA_0_0_INSERT_EXT]] +; CHECK-NEXT: [[A_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[A_SROA_0_0_INSERT_INSERT]] to i16 +; CHECK-NEXT: store i16 [[A_SROA_0_0_EXTRACT_TRUNC]], i16* [[A_SROA_0]], align 8 +; CHECK-NEXT: [[A_SROA_3_0_EXTRACT_SHIFT:%.*]] = lshr i32 [[A_SROA_0_0_INSERT_INSERT]], 16 +; CHECK-NEXT: [[A_SROA_3_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[A_SROA_3_0_EXTRACT_SHIFT]] to i16 +; CHECK-NEXT: store volatile i8 13, i8* [[A_SROA_31]], align 4 +; CHECK-NEXT: [[A_SROA_31_4_EXTRACT_TRUNC:%.*]] = trunc i32 [[A_SROA_0_0_INSERT_INSERT]] to i8 +; CHECK-NEXT: store i8 [[A_SROA_31_4_EXTRACT_TRUNC]], i8* [[A_SROA_31]], align 4 +; CHECK-NEXT: [[A_SROA_5_4_EXTRACT_SHIFT:%.*]] = lshr i32 [[A_SROA_0_0_INSERT_INSERT]], 8 +; CHECK-NEXT: [[A_SROA_5_4_EXTRACT_TRUNC:%.*]] = trunc i32 [[A_SROA_5_4_EXTRACT_SHIFT]] to i24 +; CHECK-NEXT: ret void +; entry: %a = alloca i64 @@ -1705,8 +1955,12 @@ entry: define void @PR23737() { ; CHECK-LABEL: @PR23737( -; CHECK: store atomic volatile {{.*}} seq_cst -; CHECK: load atomic volatile {{.*}} seq_cst +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR:%.*]] = alloca i64, align 8 +; CHECK-NEXT: store atomic volatile i64 0, i64* [[PTR]] seq_cst, align 8 +; CHECK-NEXT: [[PTR_0_LOAD:%.*]] = load atomic volatile i64, i64* [[PTR]] seq_cst, align 8 +; CHECK-NEXT: ret void +; entry: %ptr = alloca i64, align 8 store atomic volatile i64 0, i64* %ptr seq_cst, align 8 @@ -1721,11 +1975,10 @@ define i16 @PR24463() { ; alloca. SROA can split the alloca to avoid shift or trunc. ; ; CHECK-LABEL: @PR24463( -; CHECK-NOT: alloca -; CHECK-NOT: trunc -; CHECK-NOT: lshr -; CHECK: %[[ZEXT:.*]] = zext i8 {{.*}} to i16 -; CHECK: ret i16 %[[ZEXT]] +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA_SROA_1_2_LOAD_EXT:%.*]] = zext i8 0 to i16 +; CHECK-NEXT: ret i16 [[ALLOCA_SROA_1_2_LOAD_EXT]] +; entry: %alloca = alloca [3 x i8] %gep1 = getelementptr inbounds [3 x i8], [3 x i8]* %alloca, i64 0, i64 1 @@ -1742,16 +1995,24 @@ entry: define void @PR25873(%struct.STest* %outData) { ; CHECK-LABEL: @PR25873( -; CHECK: store i32 1123418112 -; CHECK: store i32 1139015680 -; CHECK: %[[HIZEXT:.*]] = zext i32 1139015680 to i64 -; CHECK: %[[HISHL:.*]] = shl i64 %[[HIZEXT]], 32 -; CHECK: %[[HIMASK:.*]] = and i64 undef, 4294967295 -; CHECK: %[[HIINSERT:.*]] = or i64 %[[HIMASK]], %[[HISHL]] -; CHECK: %[[LOZEXT:.*]] = zext i32 1123418112 to i64 -; CHECK: %[[LOMASK:.*]] = and i64 %[[HIINSERT]], -4294967296 -; CHECK: %[[LOINSERT:.*]] = or i64 %[[LOMASK]], %[[LOZEXT]] -; CHECK: store i64 %[[LOINSERT]] +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMPDATA_SROA_0_0__SROA_CAST1_SROA_CAST:%.*]] = bitcast %struct.STest* [[OUTDATA:%.*]] to i32* +; CHECK-NEXT: store i32 1123418112, i32* [[TMPDATA_SROA_0_0__SROA_CAST1_SROA_CAST]], align 4 +; CHECK-NEXT: [[TMPDATA_SROA_0_0__SROA_CAST1_SROA_IDX:%.*]] = getelementptr inbounds [[STRUCT_STEST:%.*]], %struct.STest* [[OUTDATA]], i64 0, i32 0, i32 1 +; CHECK-NEXT: [[TMPDATA_SROA_0_0__SROA_CAST1_SROA_CAST16:%.*]] = bitcast float* [[TMPDATA_SROA_0_0__SROA_CAST1_SROA_IDX]] to i32* +; CHECK-NEXT: store i32 1139015680, i32* [[TMPDATA_SROA_0_0__SROA_CAST1_SROA_CAST16]], align 4 +; CHECK-NEXT: [[TMPDATA_SROA_6_0__SROA_IDX3:%.*]] = getelementptr inbounds [[STRUCT_STEST]], %struct.STest* [[OUTDATA]], i64 0, i32 1 +; CHECK-NEXT: [[TMPDATA_SROA_6_0__SROA_CAST4:%.*]] = bitcast %struct.SPos* [[TMPDATA_SROA_6_0__SROA_IDX3]] to i64* +; CHECK-NEXT: [[TMPDATA_SROA_6_SROA_4_0_INSERT_EXT:%.*]] = zext i32 1139015680 to i64 +; CHECK-NEXT: [[TMPDATA_SROA_6_SROA_4_0_INSERT_SHIFT:%.*]] = shl i64 [[TMPDATA_SROA_6_SROA_4_0_INSERT_EXT]], 32 +; CHECK-NEXT: [[TMPDATA_SROA_6_SROA_4_0_INSERT_MASK:%.*]] = and i64 undef, 4294967295 +; CHECK-NEXT: [[TMPDATA_SROA_6_SROA_4_0_INSERT_INSERT:%.*]] = or i64 [[TMPDATA_SROA_6_SROA_4_0_INSERT_MASK]], [[TMPDATA_SROA_6_SROA_4_0_INSERT_SHIFT]] +; CHECK-NEXT: [[TMPDATA_SROA_6_SROA_0_0_INSERT_EXT:%.*]] = zext i32 1123418112 to i64 +; CHECK-NEXT: [[TMPDATA_SROA_6_SROA_0_0_INSERT_MASK:%.*]] = and i64 [[TMPDATA_SROA_6_SROA_4_0_INSERT_INSERT]], -4294967296 +; CHECK-NEXT: [[TMPDATA_SROA_6_SROA_0_0_INSERT_INSERT:%.*]] = or i64 [[TMPDATA_SROA_6_SROA_0_0_INSERT_MASK]], [[TMPDATA_SROA_6_SROA_0_0_INSERT_EXT]] +; CHECK-NEXT: store i64 [[TMPDATA_SROA_6_SROA_0_0_INSERT_INSERT]], i64* [[TMPDATA_SROA_6_0__SROA_CAST4]], align 4 +; CHECK-NEXT: ret void +; entry: %tmpData = alloca %struct.STest, align 8 %0 = bitcast %struct.STest* %tmpData to i8* @@ -1775,8 +2036,9 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) n define void @PR27999() unnamed_addr { ; CHECK-LABEL: @PR27999( -; CHECK: entry-block: -; CHECK-NEXT: ret void +; CHECK-NEXT: entry-block: +; CHECK-NEXT: ret void +; entry-block: %0 = alloca [2 x i64], align 8 %1 = bitcast [2 x i64]* %0 to i8* @@ -1789,8 +2051,9 @@ entry-block: define void @PR29139() { ; CHECK-LABEL: @PR29139( -; CHECK: bb1: -; CHECK-NEXT: ret void +; CHECK-NEXT: bb1: +; CHECK-NEXT: ret void +; bb1: %e.7.sroa.6.i = alloca i32, align 1 %e.7.sroa.6.0.load81.i = load i32, i32* %e.7.sroa.6.i, align 1 @@ -1801,10 +2064,15 @@ bb1: ; PR35657 reports assertion failure with this code define void @PR35657(i64 %v) { -; CHECK-LABEL: @PR35657 -; CHECK: call void @callee16(i16 %{{.*}}) -; CHECK: call void @callee48(i48 %{{.*}}) -; CHECK: ret void +; CHECK-LABEL: @PR35657( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A48_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[V:%.*]] to i16 +; CHECK-NEXT: [[A48_SROA_2_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[V]], 16 +; CHECK-NEXT: [[A48_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[A48_SROA_2_0_EXTRACT_SHIFT]] to i48 +; CHECK-NEXT: call void @callee16(i16 [[A48_SROA_0_0_EXTRACT_TRUNC]]) +; CHECK-NEXT: call void @callee48(i48 [[A48_SROA_2_0_EXTRACT_TRUNC]]) +; CHECK-NEXT: ret void +; entry: %a48 = alloca i48 %a48.cast64 = bitcast i48* %a48 to i64* @@ -1826,14 +2094,13 @@ declare void @callee48(i48 %a) define void @test28(i64 %v) #0 { ; SROA should split the first i64 store to avoid additional and/or instructions ; when storing into i32 fields - ; CHECK-LABEL: @test28( -; CHECK-NOT: alloca -; CHECK-NOT: and -; CHECK-NOT: or -; CHECK: %[[shift:.*]] = lshr i64 %v, 32 -; CHECK-NEXT: %{{.*}} = trunc i64 %[[shift]] to i32 -; CHECK-NEXT: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T_SROA_0_8_EXTRACT_TRUNC:%.*]] = trunc i64 [[V:%.*]] to i32 +; CHECK-NEXT: [[T_SROA_2_8_EXTRACT_SHIFT:%.*]] = lshr i64 [[V]], 32 +; CHECK-NEXT: [[T_SROA_2_8_EXTRACT_TRUNC:%.*]] = trunc i64 [[T_SROA_2_8_EXTRACT_SHIFT]] to i32 +; CHECK-NEXT: ret void +; entry: %t = alloca { i64, i32, i32 } @@ -1854,8 +2121,32 @@ declare void @llvm.lifetime.end.isVoid.i64.p0i8(i64, [10 x float]* nocapture) define void @test29(i32 %num, i32 %tid) { ; CHECK-LABEL: @test29( -; CHECK-NOT: alloca [10 x float] -; CHECK: ret void +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[NUM:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[BB1:%.*]], label [[BB7:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TID:%.*]], 0 +; CHECK-NEXT: [[CONV_I:%.*]] = zext i32 [[TID]] to i64 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @array, i64 0, i64 [[CONV_I]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[ARRAYIDX5]] to i32* +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ [[NUM]], [[BB1]] ], [ [[SUB:%.*]], [[BB5:%.*]] ] +; CHECK-NEXT: br i1 [[TOBOOL]], label [[BB3:%.*]], label [[BB4:%.*]] +; CHECK: bb3: +; CHECK-NEXT: br label [[BB5]] +; CHECK: bb4: +; CHECK-NEXT: store i32 undef, i32* [[TMP0]], align 4 +; CHECK-NEXT: br label [[BB5]] +; CHECK: bb5: +; CHECK-NEXT: [[SUB]] = add i32 [[I_02]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[SUB]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[BB2]], label [[BB6:%.*]] +; CHECK: bb6: +; CHECK-NEXT: br label [[BB7]] +; CHECK: bb7: +; CHECK-NEXT: ret void +; entry: %ra = alloca [10 x float], align 4 @@ -1959,86 +2250,12 @@ bb7: !59 = !{!60, !60, i64 0, i64 1} !60 = !{!2, i64 1, !"type_59"} -; CHECK-DAG: [[TAG_0]] = !{[[TYPE_0:!.*]], [[TYPE_0]], i64 0, i64 200} -; CHECK-DAG: [[TYPE_0]] = !{{{.*}}, !"type_0"} - -; CHECK-DAG: [[TAG_0_M42]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 158} -; CHECK-DAG: [[TAG_0_M43]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 157} -; CHECK-DAG: [[TAG_0_M142]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 58} -; CHECK-DAG: [[TAG_0_M158]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 42} - -; CHECK-DAG: [[TAG_59]] = !{[[TYPE_59:!.*]], [[TYPE_59]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_59]] = !{{{.*}}, !"type_59"} - -; CHECK-DAG: [[TAG_0_M20]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 180} -; CHECK-DAG: [[TAG_0_M22]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 178} -; CHECK-DAG: [[TAG_0_M23]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 177} -; CHECK-DAG: [[TAG_0_M30]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 170} -; CHECK-DAG: [[TAG_0_M40]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 160} -; CHECK-DAG: [[TAG_0_M50]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 150} -; CHECK-DAG: [[TAG_0_M52]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 148} -; CHECK-DAG: [[TAG_0_M53]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 147} -; CHECK-DAG: [[TAG_0_M60]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 140} - -; CHECK-DAG: [[TYPE_7:!.*]] = !{{{.*}}, !"type_7"} -; CHECK-DAG: [[TAG_7]] = !{[[TYPE_7]], [[TYPE_7]], i64 0, i64 1} - -; CHECK-DAG: [[TYPE_3:!.*]] = !{{{.*}}, !"type_3"} -; CHECK-DAG: [[TAG_3]] = !{[[TYPE_3]], [[TYPE_3]], i64 0, i64 1} - -; CHECK-DAG: [[TYPE_5:!.*]] = !{{{.*}}, !"type_5"} -; CHECK-DAG: [[TAG_5]] = !{[[TYPE_5]], [[TYPE_5]], i64 0, i64 1} - -; CHECK-DAG: [[TAG_0_M4]] = !{[[TYPE_0]], [[TYPE_0]], i64 0, i64 196} - - -; CHECK-DAG: [[TYPE_9:!.*]] = !{{{.*}}, !"type_9"} -; CHECK-DAG: [[TAG_9]] = !{[[TYPE_9]], [[TYPE_9]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_11:!.*]] = !{{{.*}}, !"type_11"} -; CHECK-DAG: [[TAG_11]] = !{[[TYPE_11]], [[TYPE_11]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_13:!.*]] = !{{{.*}}, !"type_13"} -; CHECK-DAG: [[TAG_13]] = !{[[TYPE_13]], [[TYPE_13]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_15:!.*]] = !{{{.*}}, !"type_15"} -; CHECK-DAG: [[TAG_15]] = !{[[TYPE_15]], [[TYPE_15]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_17:!.*]] = !{{{.*}}, !"type_17"} -; CHECK-DAG: [[TAG_17]] = !{[[TYPE_17]], [[TYPE_17]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_19:!.*]] = !{{{.*}}, !"type_19"} -; CHECK-DAG: [[TAG_19]] = !{[[TYPE_19]], [[TYPE_19]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_21:!.*]] = !{{{.*}}, !"type_21"} -; CHECK-DAG: [[TAG_21]] = !{[[TYPE_21]], [[TYPE_21]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_23:!.*]] = !{{{.*}}, !"type_23"} -; CHECK-DAG: [[TAG_23]] = !{[[TYPE_23]], [[TYPE_23]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_25:!.*]] = !{{{.*}}, !"type_25"} -; CHECK-DAG: [[TAG_25]] = !{[[TYPE_25]], [[TYPE_25]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_27:!.*]] = !{{{.*}}, !"type_27"} -; CHECK-DAG: [[TAG_27]] = !{[[TYPE_27]], [[TYPE_27]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_29:!.*]] = !{{{.*}}, !"type_29"} -; CHECK-DAG: [[TAG_29]] = !{[[TYPE_29]], [[TYPE_29]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_31:!.*]] = !{{{.*}}, !"type_31"} -; CHECK-DAG: [[TAG_31]] = !{[[TYPE_31]], [[TYPE_31]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_33:!.*]] = !{{{.*}}, !"type_33"} -; CHECK-DAG: [[TAG_33]] = !{[[TYPE_33]], [[TYPE_33]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_35:!.*]] = !{{{.*}}, !"type_35"} -; CHECK-DAG: [[TAG_35]] = !{[[TYPE_35]], [[TYPE_35]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_37:!.*]] = !{{{.*}}, !"type_37"} -; CHECK-DAG: [[TAG_37]] = !{[[TYPE_37]], [[TYPE_37]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_39:!.*]] = !{{{.*}}, !"type_39"} -; CHECK-DAG: [[TAG_39]] = !{[[TYPE_39]], [[TYPE_39]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_41:!.*]] = !{{{.*}}, !"type_41"} -; CHECK-DAG: [[TAG_41]] = !{[[TYPE_41]], [[TYPE_41]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_43:!.*]] = !{{{.*}}, !"type_43"} -; CHECK-DAG: [[TAG_43]] = !{[[TYPE_43]], [[TYPE_43]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_45:!.*]] = !{{{.*}}, !"type_45"} -; CHECK-DAG: [[TAG_45]] = !{[[TYPE_45]], [[TYPE_45]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_47:!.*]] = !{{{.*}}, !"type_47"} -; CHECK-DAG: [[TAG_47]] = !{[[TYPE_47]], [[TYPE_47]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_49:!.*]] = !{{{.*}}, !"type_49"} -; CHECK-DAG: [[TAG_49]] = !{[[TYPE_49]], [[TYPE_49]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_51:!.*]] = !{{{.*}}, !"type_51"} -; CHECK-DAG: [[TAG_51]] = !{[[TYPE_51]], [[TYPE_51]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_53:!.*]] = !{{{.*}}, !"type_53"} -; CHECK-DAG: [[TAG_53]] = !{[[TYPE_53]], [[TYPE_53]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_55:!.*]] = !{{{.*}}, !"type_55"} -; CHECK-DAG: [[TAG_55]] = !{[[TYPE_55]], [[TYPE_55]], i64 0, i64 1} -; CHECK-DAG: [[TYPE_57:!.*]] = !{{{.*}}, !"type_57"} -; CHECK-DAG: [[TAG_57]] = !{[[TYPE_57]], [[TYPE_57]], i64 0, i64 1} + + + + + + + + + diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll index 3d76e76..7f056b8 100644 --- a/llvm/test/Transforms/SROA/vector-promotion.ll +++ b/llvm/test/Transforms/SROA/vector-promotion.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -sroa -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" @@ -5,15 +6,21 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 define i32 @test1(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2 +; CHECK-NEXT: [[A_SROA_2_28_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 3 +; CHECK-NEXT: [[A_SROA_2_16_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_2_28_VEC_EXTRACT]] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_2_16_VEC_EXTRACT]], [[TMP4]] +; CHECK-NEXT: ret i32 [[TMP5]] +; entry: - %a = alloca [2 x <4 x i32>] -; CHECK-NOT: alloca + %a = alloca [2 x <4 x i32>] %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0 store <4 x i32> %x, <4 x i32>* %a.x %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1 store <4 x i32> %y, <4 x i32>* %a.y -; CHECK-NOT: store %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2 %tmp1 = load i32, i32* %a.tmp1 @@ -21,30 +28,30 @@ entry: %tmp2 = load i32, i32* %a.tmp2 %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0 %tmp3 = load i32, i32* %a.tmp3 -; CHECK-NOT: load -; CHECK: extractelement <4 x i32> %x, i32 2 -; CHECK-NEXT: extractelement <4 x i32> %y, i32 3 -; CHECK-NEXT: extractelement <4 x i32> %y, i32 0 %tmp4 = add i32 %tmp1, %tmp2 %tmp5 = add i32 %tmp3, %tmp4 ret i32 %tmp5 -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: ret } define i32 @test2(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2 +; CHECK-NEXT: [[A_SROA_2_28_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 3 +; CHECK-NEXT: [[A_SROA_2_16_VEC_EXTRACT:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[A_SROA_2_16_VEC_EXTRACT]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_2_28_VEC_EXTRACT]] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP3]], [[TMP4]] +; CHECK-NEXT: ret i32 [[TMP5]] +; entry: - %a = alloca [2 x <4 x i32>] -; CHECK-NOT: alloca + %a = alloca [2 x <4 x i32>] %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0 store <4 x i32> %x, <4 x i32>* %a.x %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1 store <4 x i32> %y, <4 x i32>* %a.y -; CHECK-NOT: store %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2 %tmp1 = load i32, i32* %a.tmp1 @@ -54,35 +61,33 @@ entry: %a.tmp3.cast = bitcast i32* %a.tmp3 to <2 x i32>* %tmp3.vec = load <2 x i32>, <2 x i32>* %a.tmp3.cast %tmp3 = extractelement <2 x i32> %tmp3.vec, i32 0 -; CHECK-NOT: load -; CHECK: %[[extract1:.*]] = extractelement <4 x i32> %x, i32 2 -; CHECK-NEXT: %[[extract2:.*]] = extractelement <4 x i32> %y, i32 3 -; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> %y, <4 x i32> poison, <2 x i32> -; CHECK-NEXT: %[[extract4:.*]] = extractelement <2 x i32> %[[extract3]], i32 0 %tmp4 = add i32 %tmp1, %tmp2 %tmp5 = add i32 %tmp3, %tmp4 ret i32 %tmp5 -; CHECK-NEXT: %[[sum1:.*]] = add i32 %[[extract1]], %[[extract2]] -; CHECK-NEXT: %[[sum2:.*]] = add i32 %[[extract4]], %[[sum1]] -; CHECK-NEXT: ret i32 %[[sum2]] } define i32 @test3(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X:%.*]], i32 -1, i32 2 +; CHECK-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_0_8_VEC_INSERT]], i32 2 +; CHECK-NEXT: [[A_SROA_3_28_VEC_EXTRACT:%.*]] = extractelement <4 x i32> zeroinitializer, i32 3 +; CHECK-NEXT: [[A_SROA_3_16_VEC_EXTRACT:%.*]] = extractelement <4 x i32> zeroinitializer, i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_3_28_VEC_EXTRACT]] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_3_16_VEC_EXTRACT]], [[TMP4]] +; CHECK-NEXT: ret i32 [[TMP5]] +; entry: - %a = alloca [2 x <4 x i32>] -; CHECK-NOT: alloca + %a = alloca [2 x <4 x i32>] %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0 store <4 x i32> %x, <4 x i32>* %a.x %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1 store <4 x i32> %y, <4 x i32>* %a.y -; CHECK-NOT: store %a.y.cast = bitcast <4 x i32>* %a.y to i8* call void @llvm.memset.p0i8.i32(i8* %a.y.cast, i8 0, i32 16, i1 false) -; CHECK-NOT: memset %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2 %a.tmp1.cast = bitcast i32* %a.tmp1 to i8* @@ -92,36 +97,37 @@ entry: %tmp2 = load i32, i32* %a.tmp2 %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0 %tmp3 = load i32, i32* %a.tmp3 -; CHECK-NOT: load -; CHECK: %[[insert:.*]] = insertelement <4 x i32> %x, i32 -1, i32 2 -; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2 -; CHECK-NEXT: extractelement <4 x i32> zeroinitializer, i32 3 -; CHECK-NEXT: extractelement <4 x i32> zeroinitializer, i32 0 %tmp4 = add i32 %tmp1, %tmp2 %tmp5 = add i32 %tmp3, %tmp4 ret i32 %tmp5 -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: ret } define i32 @test4(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) { ; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_3_16_COPYLOAD:%.*]] = load <4 x i32>, <4 x i32>* [[Z:%.*]], align 1 +; CHECK-NEXT: [[A_SROA_0_8_Z_TMP1_CAST_SROA_IDX:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[Z]], i64 0, i64 2 +; CHECK-NEXT: [[A_SROA_0_8_COPYLOAD:%.*]] = load i32, i32* [[A_SROA_0_8_Z_TMP1_CAST_SROA_IDX]], align 1 +; CHECK-NEXT: [[A_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X:%.*]], i32 [[A_SROA_0_8_COPYLOAD]], i32 2 +; CHECK-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_0_8_VEC_INSERT]], i32 2 +; CHECK-NEXT: [[A_SROA_3_28_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_3_16_COPYLOAD]], i32 3 +; CHECK-NEXT: [[A_SROA_3_16_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_3_16_COPYLOAD]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_3_28_VEC_EXTRACT]] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_3_16_VEC_EXTRACT]], [[TMP4]] +; CHECK-NEXT: ret i32 [[TMP5]] +; entry: - %a = alloca [2 x <4 x i32>] -; CHECK-NOT: alloca + %a = alloca [2 x <4 x i32>] %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0 store <4 x i32> %x, <4 x i32>* %a.x %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1 store <4 x i32> %y, <4 x i32>* %a.y -; CHECK-NOT: store %a.y.cast = bitcast <4 x i32>* %a.y to i8* %z.cast = bitcast <4 x i32>* %z to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.y.cast, i8* %z.cast, i32 16, i1 false) -; CHECK-NOT: memcpy %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2 %a.tmp1.cast = bitcast i32* %a.tmp1 to i8* @@ -133,21 +139,10 @@ entry: %tmp2 = load i32, i32* %a.tmp2 %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0 %tmp3 = load i32, i32* %a.tmp3 -; CHECK-NOT: memcpy -; CHECK: %[[load:.*]] = load <4 x i32>, <4 x i32>* %z -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2 -; CHECK-NEXT: %[[element_load:.*]] = load i32, i32* %[[gep]] -; CHECK-NEXT: %[[insert:.*]] = insertelement <4 x i32> %x, i32 %[[element_load]], i32 2 -; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2 -; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 3 -; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 0 %tmp4 = add i32 %tmp1, %tmp2 %tmp5 = add i32 %tmp3, %tmp4 ret i32 %tmp5 -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: ret } declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocapture, i32, i1) nounwind @@ -155,20 +150,29 @@ declare void @llvm.memcpy.p0i8.p1i8.i32(i8* nocapture, i8 addrspace(1)* nocaptur ; Same as test4 with a different sized address space pointer source. define i32 @test4_as1(<4 x i32> %x, <4 x i32> %y, <4 x i32> addrspace(1)* %z) { ; CHECK-LABEL: @test4_as1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_3_16_COPYLOAD:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[Z:%.*]], align 1 +; CHECK-NEXT: [[A_SROA_0_8_Z_TMP1_CAST_SROA_IDX:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* [[Z]], i64 0, i64 2 +; CHECK-NEXT: [[A_SROA_0_8_COPYLOAD:%.*]] = load i32, i32 addrspace(1)* [[A_SROA_0_8_Z_TMP1_CAST_SROA_IDX]], align 1 +; CHECK-NEXT: [[A_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X:%.*]], i32 [[A_SROA_0_8_COPYLOAD]], i32 2 +; CHECK-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_0_8_VEC_INSERT]], i32 2 +; CHECK-NEXT: [[A_SROA_3_28_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_3_16_COPYLOAD]], i32 3 +; CHECK-NEXT: [[A_SROA_3_16_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_3_16_COPYLOAD]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_3_28_VEC_EXTRACT]] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_3_16_VEC_EXTRACT]], [[TMP4]] +; CHECK-NEXT: ret i32 [[TMP5]] +; entry: - %a = alloca [2 x <4 x i32>] -; CHECK-NOT: alloca + %a = alloca [2 x <4 x i32>] %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0 store <4 x i32> %x, <4 x i32>* %a.x %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1 store <4 x i32> %y, <4 x i32>* %a.y -; CHECK-NOT: store %a.y.cast = bitcast <4 x i32>* %a.y to i8* %z.cast = bitcast <4 x i32> addrspace(1)* %z to i8 addrspace(1)* call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.y.cast, i8 addrspace(1)* %z.cast, i32 16, i1 false) -; CHECK-NOT: memcpy %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2 %a.tmp1.cast = bitcast i32* %a.tmp1 to i8* @@ -180,41 +184,38 @@ entry: %tmp2 = load i32, i32* %a.tmp2 %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0 %tmp3 = load i32, i32* %a.tmp3 -; CHECK-NOT: memcpy -; CHECK: %[[load:.*]] = load <4 x i32>, <4 x i32> addrspace(1)* %z -; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %z, i64 0, i64 2 -; CHECK-NEXT: %[[element_load:.*]] = load i32, i32 addrspace(1)* %[[gep]] -; CHECK-NEXT: %[[insert:.*]] = insertelement <4 x i32> %x, i32 %[[element_load]], i32 2 -; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2 -; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 3 -; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 0 %tmp4 = add i32 %tmp1, %tmp2 %tmp5 = add i32 %tmp3, %tmp4 ret i32 %tmp5 -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: ret } define i32 @test5(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) { ; CHECK-LABEL: @test5( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0_8_A_SROA_0_8_Z_TMP1_CAST_SROA_CAST_SROA_IDX:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[Z:%.*]], i64 0, i64 2 +; CHECK-NEXT: [[A_SROA_0_8_VEC_EXTRACT2:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 2 +; CHECK-NEXT: store i32 [[A_SROA_0_8_VEC_EXTRACT2]], i32* [[A_SROA_0_8_A_SROA_0_8_Z_TMP1_CAST_SROA_CAST_SROA_IDX]], align 1 +; CHECK-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y]], i32 2 +; CHECK-NEXT: [[A_SROA_4_12_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y]], i32 3 +; CHECK-NEXT: [[A_SROA_4_0_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_4_12_VEC_EXTRACT]] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_4_0_VEC_EXTRACT]], [[TMP4]] +; CHECK-NEXT: ret i32 [[TMP5]] +; ; The same as the above, but with reversed source and destination for the ; element memcpy, and a self copy. entry: - %a = alloca [2 x <4 x i32>] -; CHECK-NOT: alloca + %a = alloca [2 x <4 x i32>] %a.x = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0 store <4 x i32> %x, <4 x i32>* %a.x %a.y = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1 store <4 x i32> %y, <4 x i32>* %a.y -; CHECK-NOT: store %a.y.cast = bitcast <4 x i32>* %a.y to i8* %a.x.cast = bitcast <4 x i32>* %a.x to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.x.cast, i8* %a.y.cast, i32 16, i1 false) -; CHECK-NOT: memcpy %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2 %a.tmp1.cast = bitcast i32* %a.tmp1 to i8* @@ -226,20 +227,10 @@ entry: %tmp2 = load i32, i32* %a.tmp2 %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0 %tmp3 = load i32, i32* %a.tmp3 -; CHECK-NOT: memcpy -; CHECK: %[[gep:.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2 -; CHECK-NEXT: %[[extract:.*]] = extractelement <4 x i32> %y, i32 2 -; CHECK-NEXT: store i32 %[[extract]], i32* %[[gep]] -; CHECK-NEXT: extractelement <4 x i32> %y, i32 2 -; CHECK-NEXT: extractelement <4 x i32> %y, i32 3 -; CHECK-NEXT: extractelement <4 x i32> %y, i32 0 %tmp4 = add i32 %tmp1, %tmp2 %tmp5 = add i32 %tmp3, %tmp4 ret i32 %tmp5 -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: ret } declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind @@ -247,15 +238,22 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i1) nounwind define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) { ; CHECK-LABEL: @test6( +; CHECK-NEXT: [[TMP:%.*]] = alloca { <4 x i64>, <4 x i64> }, align 32 +; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds { <4 x i64>, <4 x i64> }, { <4 x i64>, <4 x i64> }* [[TMP]], i32 0, i32 0 +; CHECK-NEXT: store <4 x i64> [[X:%.*]], <4 x i64>* [[P0]], align 32 +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds { <4 x i64>, <4 x i64> }, { <4 x i64>, <4 x i64> }* [[TMP]], i32 0, i32 1 +; CHECK-NEXT: store <4 x i64> [[Y:%.*]], <4 x i64>* [[P1]], align 32 +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds { <4 x i64>, <4 x i64> }, { <4 x i64>, <4 x i64> }* [[TMP]], i32 0, i32 0, i64 [[N:%.*]] +; CHECK-NEXT: [[RES:%.*]] = load i64, i64* [[ADDR]], align 4 +; CHECK-NEXT: ret i64 [[RES]] +; ; The old scalarrepl pass would wrongly drop the store to the second alloca. ; PR13254 %tmp = alloca { <4 x i64>, <4 x i64> } %p0 = getelementptr inbounds { <4 x i64>, <4 x i64> }, { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 0 store <4 x i64> %x, <4 x i64>* %p0 -; CHECK: store <4 x i64> %x, %p1 = getelementptr inbounds { <4 x i64>, <4 x i64> }, { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 1 store <4 x i64> %y, <4 x i64>* %p1 -; CHECK: store <4 x i64> %y, %addr = getelementptr inbounds { <4 x i64>, <4 x i64> }, { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 0, i64 %n %res = load i64, i64* %addr, align 4 ret i64 %res @@ -263,225 +261,245 @@ define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) { define <4 x i32> @test_subvec_store() { ; CHECK-LABEL: @test_subvec_store( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_0_VECBLEND:%.*]] = select <4 x i1> , <4 x i32> , <4 x i32> undef +; CHECK-NEXT: [[A_4_VECBLEND:%.*]] = select <4 x i1> , <4 x i32> , <4 x i32> [[A_0_VECBLEND]] +; CHECK-NEXT: [[A_8_VECBLEND:%.*]] = select <4 x i1> , <4 x i32> , <4 x i32> [[A_4_VECBLEND]] +; CHECK-NEXT: [[A_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[A_8_VECBLEND]], i32 3, i32 3 +; CHECK-NEXT: ret <4 x i32> [[A_12_VEC_INSERT]] +; entry: %a = alloca <4 x i32> -; CHECK-NOT: alloca %a.gep0 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 0 %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>* store <2 x i32> , <2 x i32>* %a.cast0 -; CHECK-NOT: store -; CHECK: select <4 x i1> %a.gep1 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 1 %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>* store <2 x i32> , <2 x i32>* %a.cast1 -; CHECK-NEXT: select <4 x i1> %a.gep2 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 2 %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>* store <2 x i32> , <2 x i32>* %a.cast2 -; CHECK-NEXT: select <4 x i1> %a.gep3 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 3 store i32 3, i32* %a.gep3 -; CHECK-NEXT: insertelement <4 x i32> %ret = load <4 x i32>, <4 x i32>* %a ret <4 x i32> %ret -; CHECK-NEXT: ret <4 x i32> } define <4 x i32> @test_subvec_load() { ; CHECK-LABEL: @test_subvec_load( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_0_VEC_EXTRACT:%.*]] = shufflevector <4 x i32> , <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[A_4_VEC_EXTRACT:%.*]] = shufflevector <4 x i32> , <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[A_8_VEC_EXTRACT:%.*]] = shufflevector <4 x i32> , <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[TMP:%.*]] = shufflevector <2 x i32> [[A_0_VEC_EXTRACT]], <2 x i32> [[A_4_VEC_EXTRACT]], <2 x i32> +; CHECK-NEXT: [[RET:%.*]] = shufflevector <2 x i32> [[TMP]], <2 x i32> [[A_8_VEC_EXTRACT]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[RET]] +; entry: %a = alloca <4 x i32> -; CHECK-NOT: alloca store <4 x i32> , <4 x i32>* %a -; CHECK-NOT: store %a.gep0 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 0 %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>* %first = load <2 x i32>, <2 x i32>* %a.cast0 -; CHECK-NOT: load -; CHECK: %[[extract1:.*]] = shufflevector <4 x i32> , <4 x i32> poison, <2 x i32> %a.gep1 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 1 %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>* %second = load <2 x i32>, <2 x i32>* %a.cast1 -; CHECK-NEXT: %[[extract2:.*]] = shufflevector <4 x i32> , <4 x i32> poison, <2 x i32> %a.gep2 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 2 %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>* %third = load <2 x i32>, <2 x i32>* %a.cast2 -; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> , <4 x i32> poison, <2 x i32> %tmp = shufflevector <2 x i32> %first, <2 x i32> %second, <2 x i32> %ret = shufflevector <2 x i32> %tmp, <2 x i32> %third, <4 x i32> -; CHECK-NEXT: %[[tmp:.*]] = shufflevector <2 x i32> %[[extract1]], <2 x i32> %[[extract2]], <2 x i32> -; CHECK-NEXT: %[[ret:.*]] = shufflevector <2 x i32> %[[tmp]], <2 x i32> %[[extract3]], <4 x i32> ret <4 x i32> %ret -; CHECK-NEXT: ret <4 x i32> %[[ret]] } declare void @llvm.memset.p0i32.i32(i32* nocapture, i32, i32, i1) nounwind define <4 x float> @test_subvec_memset() { ; CHECK-LABEL: @test_subvec_memset( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_0_VECBLEND:%.*]] = select <4 x i1> , <4 x float> , <4 x float> undef +; CHECK-NEXT: [[A_4_VECBLEND:%.*]] = select <4 x i1> , <4 x float> , <4 x float> [[A_0_VECBLEND]] +; CHECK-NEXT: [[A_8_VECBLEND:%.*]] = select <4 x i1> , <4 x float> , <4 x float> [[A_4_VECBLEND]] +; CHECK-NEXT: [[A_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[A_8_VECBLEND]], float 0x38E0E0E0E0000000, i32 3 +; CHECK-NEXT: ret <4 x float> [[A_12_VEC_INSERT]] +; entry: %a = alloca <4 x float> -; CHECK-NOT: alloca %a.gep0 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 0 %a.cast0 = bitcast float* %a.gep0 to i8* call void @llvm.memset.p0i8.i32(i8* %a.cast0, i8 0, i32 8, i1 false) -; CHECK-NOT: store -; CHECK: select <4 x i1> %a.gep1 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 1 %a.cast1 = bitcast float* %a.gep1 to i8* call void @llvm.memset.p0i8.i32(i8* %a.cast1, i8 1, i32 8, i1 false) -; CHECK-NEXT: select <4 x i1> %a.gep2 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 2 %a.cast2 = bitcast float* %a.gep2 to i8* call void @llvm.memset.p0i8.i32(i8* %a.cast2, i8 3, i32 8, i1 false) -; CHECK-NEXT: select <4 x i1> %a.gep3 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 3 %a.cast3 = bitcast float* %a.gep3 to i8* call void @llvm.memset.p0i8.i32(i8* %a.cast3, i8 7, i32 4, i1 false) -; CHECK-NEXT: insertelement <4 x float> %ret = load <4 x float>, <4 x float>* %a ret <4 x float> %ret -; CHECK-NEXT: ret <4 x float> } define <4 x float> @test_subvec_memcpy(i8* %x, i8* %y, i8* %z, i8* %f, i8* %out) { ; CHECK-LABEL: @test_subvec_memcpy( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_0_X_SROA_CAST:%.*]] = bitcast i8* [[X:%.*]] to <2 x float>* +; CHECK-NEXT: [[A_0_COPYLOAD:%.*]] = load <2 x float>, <2 x float>* [[A_0_X_SROA_CAST]], align 1 +; CHECK-NEXT: [[A_0_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_0_COPYLOAD]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[A_0_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_0_VEC_EXPAND]], <4 x float> undef +; CHECK-NEXT: [[A_4_Y_SROA_CAST:%.*]] = bitcast i8* [[Y:%.*]] to <2 x float>* +; CHECK-NEXT: [[A_4_COPYLOAD:%.*]] = load <2 x float>, <2 x float>* [[A_4_Y_SROA_CAST]], align 1 +; CHECK-NEXT: [[A_4_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_4_COPYLOAD]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[A_4_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_4_VEC_EXPAND]], <4 x float> [[A_0_VECBLEND]] +; CHECK-NEXT: [[A_8_Z_SROA_CAST:%.*]] = bitcast i8* [[Z:%.*]] to <2 x float>* +; CHECK-NEXT: [[A_8_COPYLOAD:%.*]] = load <2 x float>, <2 x float>* [[A_8_Z_SROA_CAST]], align 1 +; CHECK-NEXT: [[A_8_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_8_COPYLOAD]], <2 x float> poison, <4 x i32> +; CHECK-NEXT: [[A_8_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_8_VEC_EXPAND]], <4 x float> [[A_4_VECBLEND]] +; CHECK-NEXT: [[A_12_F_SROA_CAST:%.*]] = bitcast i8* [[F:%.*]] to float* +; CHECK-NEXT: [[A_12_COPYLOAD:%.*]] = load float, float* [[A_12_F_SROA_CAST]], align 1 +; CHECK-NEXT: [[A_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[A_8_VECBLEND]], float [[A_12_COPYLOAD]], i32 3 +; CHECK-NEXT: [[A_8_OUT_SROA_CAST:%.*]] = bitcast i8* [[OUT:%.*]] to <2 x float>* +; CHECK-NEXT: [[A_8_VEC_EXTRACT:%.*]] = shufflevector <4 x float> [[A_12_VEC_INSERT]], <4 x float> poison, <2 x i32> +; CHECK-NEXT: store <2 x float> [[A_8_VEC_EXTRACT]], <2 x float>* [[A_8_OUT_SROA_CAST]], align 1 +; CHECK-NEXT: ret <4 x float> [[A_12_VEC_INSERT]] +; entry: %a = alloca <4 x float> -; CHECK-NOT: alloca %a.gep0 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 0 %a.cast0 = bitcast float* %a.gep0 to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast0, i8* %x, i32 8, i1 false) -; CHECK: %[[xptr:.*]] = bitcast i8* %x to <2 x float>* -; CHECK-NEXT: %[[x:.*]] = load <2 x float>, <2 x float>* %[[xptr]] -; CHECK-NEXT: %[[expand_x:.*]] = shufflevector <2 x float> %[[x]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: select <4 x i1> %a.gep1 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 1 %a.cast1 = bitcast float* %a.gep1 to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast1, i8* %y, i32 8, i1 false) -; CHECK-NEXT: %[[yptr:.*]] = bitcast i8* %y to <2 x float>* -; CHECK-NEXT: %[[y:.*]] = load <2 x float>, <2 x float>* %[[yptr]] -; CHECK-NEXT: %[[expand_y:.*]] = shufflevector <2 x float> %[[y]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: select <4 x i1> %a.gep2 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 2 %a.cast2 = bitcast float* %a.gep2 to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast2, i8* %z, i32 8, i1 false) -; CHECK-NEXT: %[[zptr:.*]] = bitcast i8* %z to <2 x float>* -; CHECK-NEXT: %[[z:.*]] = load <2 x float>, <2 x float>* %[[zptr]] -; CHECK-NEXT: %[[expand_z:.*]] = shufflevector <2 x float> %[[z]], <2 x float> poison, <4 x i32> -; CHECK-NEXT: select <4 x i1> %a.gep3 = getelementptr <4 x float>, <4 x float>* %a, i32 0, i32 3 %a.cast3 = bitcast float* %a.gep3 to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast3, i8* %f, i32 4, i1 false) -; CHECK-NEXT: %[[fptr:.*]] = bitcast i8* %f to float* -; CHECK-NEXT: %[[f:.*]] = load float, float* %[[fptr]] -; CHECK-NEXT: %[[insert_f:.*]] = insertelement <4 x float> call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %a.cast2, i32 8, i1 false) -; CHECK-NEXT: %[[outptr:.*]] = bitcast i8* %out to <2 x float>* -; CHECK-NEXT: %[[extract_out:.*]] = shufflevector <4 x float> %[[insert_f]], <4 x float> poison, <2 x i32> -; CHECK-NEXT: store <2 x float> %[[extract_out]], <2 x float>* %[[outptr]] %ret = load <4 x float>, <4 x float>* %a ret <4 x float> %ret -; CHECK-NEXT: ret <4 x float> %[[insert_f]] } define i32 @PR14212() { ; CHECK-LABEL: @PR14212( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <3 x i8> undef to i24 +; CHECK-NEXT: [[RETVAL_SROA_2_0_INSERT_EXT:%.*]] = zext i8 undef to i32 +; CHECK-NEXT: [[RETVAL_SROA_2_0_INSERT_SHIFT:%.*]] = shl i32 [[RETVAL_SROA_2_0_INSERT_EXT]], 24 +; CHECK-NEXT: [[RETVAL_SROA_2_0_INSERT_MASK:%.*]] = and i32 undef, 16777215 +; CHECK-NEXT: [[RETVAL_SROA_2_0_INSERT_INSERT:%.*]] = or i32 [[RETVAL_SROA_2_0_INSERT_MASK]], [[RETVAL_SROA_2_0_INSERT_SHIFT]] +; CHECK-NEXT: [[RETVAL_0_INSERT_EXT:%.*]] = zext i24 [[TMP0]] to i32 +; CHECK-NEXT: [[RETVAL_0_INSERT_MASK:%.*]] = and i32 [[RETVAL_SROA_2_0_INSERT_INSERT]], -16777216 +; CHECK-NEXT: [[RETVAL_0_INSERT_INSERT:%.*]] = or i32 [[RETVAL_0_INSERT_MASK]], [[RETVAL_0_INSERT_EXT]] +; CHECK-NEXT: ret i32 [[RETVAL_0_INSERT_INSERT]] +; ; This caused a crash when "splitting" the load of the i32 in order to promote ; the store of <3 x i8> properly. Heavily reduced from an OpenCL test case. entry: %retval = alloca <3 x i8>, align 4 -; CHECK-NOT: alloca store <3 x i8> undef, <3 x i8>* %retval, align 4 %cast = bitcast <3 x i8>* %retval to i32* %load = load i32, i32* %cast, align 4 ret i32 %load -; CHECK: ret i32 } define <2 x i8> @PR14349.1(i32 %x) { -; CHECK: @PR14349.1 +; CHECK-LABEL: @PR14349.1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[X:%.*]] to i16 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16 [[A_SROA_0_0_EXTRACT_TRUNC]] to <2 x i8> +; CHECK-NEXT: [[A_SROA_2_0_EXTRACT_SHIFT:%.*]] = lshr i32 [[X]], 16 +; CHECK-NEXT: [[A_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[A_SROA_2_0_EXTRACT_SHIFT]] to i16 +; CHECK-NEXT: ret <2 x i8> [[TMP0]] +; ; The first testcase for broken SROA rewriting of split integer loads and ; stores due to smaller vector loads and stores. This particular test ensures ; that we can rewrite a split store of an integer to a store of a vector. entry: %a = alloca i32 -; CHECK-NOT: alloca store i32 %x, i32* %a -; CHECK-NOT: store %cast = bitcast i32* %a to <2 x i8>* %vec = load <2 x i8>, <2 x i8>* %cast -; CHECK-NOT: load ret <2 x i8> %vec -; CHECK: %[[trunc:.*]] = trunc i32 %x to i16 -; CHECK: %[[cast:.*]] = bitcast i16 %[[trunc]] to <2 x i8> -; CHECK: ret <2 x i8> %[[cast]] } define i32 @PR14349.2(<2 x i8> %x) { -; CHECK: @PR14349.2 +; CHECK-LABEL: @PR14349.2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i8> [[X:%.*]] to i16 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_EXT:%.*]] = zext i16 undef to i32 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_SHIFT:%.*]] = shl i32 [[A_SROA_2_0_INSERT_EXT]], 16 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_MASK:%.*]] = and i32 undef, 65535 +; CHECK-NEXT: [[A_SROA_2_0_INSERT_INSERT:%.*]] = or i32 [[A_SROA_2_0_INSERT_MASK]], [[A_SROA_2_0_INSERT_SHIFT]] +; CHECK-NEXT: [[A_SROA_0_0_INSERT_EXT:%.*]] = zext i16 [[TMP0]] to i32 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_MASK:%.*]] = and i32 [[A_SROA_2_0_INSERT_INSERT]], -65536 +; CHECK-NEXT: [[A_SROA_0_0_INSERT_INSERT:%.*]] = or i32 [[A_SROA_0_0_INSERT_MASK]], [[A_SROA_0_0_INSERT_EXT]] +; CHECK-NEXT: ret i32 [[A_SROA_0_0_INSERT_INSERT]] +; ; The first testcase for broken SROA rewriting of split integer loads and ; stores due to smaller vector loads and stores. This particular test ensures ; that we can rewrite a split load of an integer to a load of a vector. entry: %a = alloca i32 -; CHECK-NOT: alloca %cast = bitcast i32* %a to <2 x i8>* store <2 x i8> %x, <2 x i8>* %cast -; CHECK-NOT: store %int = load i32, i32* %a -; CHECK-NOT: load ret i32 %int -; CHECK: %[[cast:.*]] = bitcast <2 x i8> %x to i16 -; CHECK: %[[trunc:.*]] = zext i16 %[[cast]] to i32 -; CHECK: %[[insert:.*]] = or i32 %{{.*}}, %[[trunc]] -; CHECK: ret i32 %[[insert]] } define i32 @test7(<2 x i32> %x, <2 x i32> %y) { ; Test that we can promote to vectors when the alloca doesn't mention any vector types. ; CHECK-LABEL: @test7( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[X:%.*]], i32 1 +; CHECK-NEXT: [[A_SROA_2_12_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[Y:%.*]], i32 1 +; CHECK-NEXT: [[A_SROA_2_8_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[Y]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_4_VEC_EXTRACT]], [[A_SROA_2_12_VEC_EXTRACT]] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_2_8_VEC_EXTRACT]], [[TMP4]] +; CHECK-NEXT: ret i32 [[TMP5]] +; entry: - %a = alloca [2 x i64] + %a = alloca [2 x i64] %a.cast = bitcast [2 x i64]* %a to [2 x <2 x i32>]* -; CHECK-NOT: alloca %a.x = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 0 store <2 x i32> %x, <2 x i32>* %a.x %a.y = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 1 store <2 x i32> %y, <2 x i32>* %a.y -; CHECK-NOT: store %a.tmp1 = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 0, i64 1 %tmp1 = load i32, i32* %a.tmp1 @@ -489,92 +507,82 @@ entry: %tmp2 = load i32, i32* %a.tmp2 %a.tmp3 = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 1, i64 0 %tmp3 = load i32, i32* %a.tmp3 -; CHECK-NOT: load -; CHECK: extractelement <2 x i32> %x, i32 1 -; CHECK-NEXT: extractelement <2 x i32> %y, i32 1 -; CHECK-NEXT: extractelement <2 x i32> %y, i32 0 %tmp4 = add i32 %tmp1, %tmp2 %tmp5 = add i32 %tmp3, %tmp4 ret i32 %tmp5 -; CHECK-NEXT: add -; CHECK-NEXT: add -; CHECK-NEXT: ret } define i32 @test8(<2 x i32> %x) { ; Ensure that we can promote an alloca that doesn't mention a vector type based ; on a single store with a vector type. ; CHECK-LABEL: @test8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[X:%.*]], i32 0 +; CHECK-NEXT: [[A_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[X]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_0_VEC_EXTRACT]], [[A_SROA_0_4_VEC_EXTRACT]] +; CHECK-NEXT: ret i32 [[TMP4]] +; entry: - %a = alloca i64 + %a = alloca i64 %a.vec = bitcast i64* %a to <2 x i32>* %a.i32 = bitcast i64* %a to i32* -; CHECK-NOT: alloca store <2 x i32> %x, <2 x i32>* %a.vec -; CHECK-NOT: store %tmp1 = load i32, i32* %a.i32 %a.tmp2 = getelementptr inbounds i32, i32* %a.i32, i64 1 %tmp2 = load i32, i32* %a.tmp2 -; CHECK-NOT: load -; CHECK: extractelement <2 x i32> %x, i32 0 -; CHECK-NEXT: extractelement <2 x i32> %x, i32 1 %tmp4 = add i32 %tmp1, %tmp2 ret i32 %tmp4 -; CHECK-NEXT: add -; CHECK-NEXT: ret } define <2 x i32> @test9(i32 %x, i32 %y) { ; Ensure that we can promote an alloca that doesn't mention a vector type based ; on a single load with a vector type. ; CHECK-LABEL: @test9( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i32> undef, i32 [[X:%.*]], i32 0 +; CHECK-NEXT: [[A_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x i32> [[A_SROA_0_0_VEC_INSERT]], i32 [[Y:%.*]], i32 1 +; CHECK-NEXT: ret <2 x i32> [[A_SROA_0_4_VEC_INSERT]] +; entry: - %a = alloca i64 + %a = alloca i64 %a.vec = bitcast i64* %a to <2 x i32>* %a.i32 = bitcast i64* %a to i32* -; CHECK-NOT: alloca store i32 %x, i32* %a.i32 %a.tmp2 = getelementptr inbounds i32, i32* %a.i32, i64 1 store i32 %y, i32* %a.tmp2 -; CHECK-NOT: store -; CHECK: %[[V1:.*]] = insertelement <2 x i32> undef, i32 %x, i32 0 -; CHECK-NEXT: %[[V2:.*]] = insertelement <2 x i32> %[[V1]], i32 %y, i32 1 %result = load <2 x i32>, <2 x i32>* %a.vec -; CHECK-NOT: load ret <2 x i32> %result -; CHECK-NEXT: ret <2 x i32> %[[V2]] } define <2 x i32> @test10(<4 x i16> %x, i32 %y) { ; If there are multiple different vector types used, we should select the one ; with the widest elements. ; CHECK-LABEL: @test10( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[X:%.*]] to <2 x i32> +; CHECK-NEXT: [[A_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Y:%.*]], i32 1 +; CHECK-NEXT: ret <2 x i32> [[A_SROA_0_4_VEC_INSERT]] +; entry: - %a = alloca i64 + %a = alloca i64 %a.vec1 = bitcast i64* %a to <2 x i32>* %a.vec2 = bitcast i64* %a to <4 x i16>* %a.i32 = bitcast i64* %a to i32* -; CHECK-NOT: alloca store <4 x i16> %x, <4 x i16>* %a.vec2 %a.tmp2 = getelementptr inbounds i32, i32* %a.i32, i64 1 store i32 %y, i32* %a.tmp2 -; CHECK-NOT: store -; CHECK: %[[V1:.*]] = bitcast <4 x i16> %x to <2 x i32> -; CHECK-NEXT: %[[V2:.*]] = insertelement <2 x i32> %[[V1]], i32 %y, i32 1 %result = load <2 x i32>, <2 x i32>* %a.vec1 -; CHECK-NOT: load ret <2 x i32> %result -; CHECK-NEXT: ret <2 x i32> %[[V2]] } define <2 x float> @test11(<4 x i16> %x, i32 %y) { @@ -582,44 +590,41 @@ define <2 x float> @test11(<4 x i16> %x, i32 %y) { ; pick the integer types. This isn't really important, but seems like the best ; heuristic for making a deterministic decision. ; CHECK-LABEL: @test11( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32 [[Y:%.*]] to <2 x i16> +; CHECK-NEXT: [[A_SROA_0_4_VEC_EXPAND:%.*]] = shufflevector <2 x i16> [[TMP0]], <2 x i16> poison, <4 x i32> +; CHECK-NEXT: [[A_SROA_0_4_VECBLEND:%.*]] = select <4 x i1> , <4 x i16> [[A_SROA_0_4_VEC_EXPAND]], <4 x i16> [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[A_SROA_0_4_VECBLEND]] to <2 x float> +; CHECK-NEXT: ret <2 x float> [[TMP1]] +; entry: - %a = alloca i64 + %a = alloca i64 %a.vec1 = bitcast i64* %a to <2 x float>* %a.vec2 = bitcast i64* %a to <4 x i16>* %a.i32 = bitcast i64* %a to i32* -; CHECK-NOT: alloca store <4 x i16> %x, <4 x i16>* %a.vec2 %a.tmp2 = getelementptr inbounds i32, i32* %a.i32, i64 1 store i32 %y, i32* %a.tmp2 -; CHECK-NOT: store -; CHECK: %[[V1:.*]] = bitcast i32 %y to <2 x i16> -; CHECK-NEXT: %[[V2:.*]] = shufflevector <2 x i16> %[[V1]], <2 x i16> poison, <4 x i32> -; CHECK-NEXT: %[[V3:.*]] = select <4 x i1> , <4 x i16> %[[V2]], <4 x i16> %x -; CHECK-NEXT: %[[V4:.*]] = bitcast <4 x i16> %[[V3]] to <2 x float> %result = load <2 x float>, <2 x float>* %a.vec1 -; CHECK-NOT: load ret <2 x float> %result -; CHECK-NEXT: ret <2 x float> %[[V4]] } define <4 x float> @test12() { ; CHECK-LABEL: @test12( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> undef to <4 x float> +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; %a = alloca <3 x i32>, align 16 -; CHECK-NOT: alloca %cast1 = bitcast <3 x i32>* %a to <4 x i32>* store <4 x i32> undef, <4 x i32>* %cast1, align 16 -; CHECK-NOT: store %cast2 = bitcast <3 x i32>* %a to <3 x float>* %cast3 = bitcast <3 x float>* %cast2 to <4 x float>* %vec = load <4 x float>, <4 x float>* %cast3 -; CHECK-NOT: load -; CHECK: %[[ret:.*]] = bitcast <4 x i32> undef to <4 x float> -; CHECK-NEXT: ret <4 x float> %[[ret]] ret <4 x float> %vec } -- 2.7.4