From 3c4d2a03968ccf5889bacffe02d6fa2443b0260f Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Sat, 26 Nov 2022 17:52:58 +0300 Subject: [PATCH] [SROA] `isVectorPromotionViable()`: memory intrinsics operate on vectors of bytes (take 2) This is a recommit of cf624b23bc5d5a6161706d1663def49380ff816a, which was reverted in 5cfc22cafe3f2465e0bb324f8daba82ffcabd0df, because the cut-off on the number of vector elements was not low enough, and it triggered both SDAG SDNode operand number assertions, and caused compile time explosions in some cases. Let's try with something really *REALLY* conservative first, just to get somewhere, and try to bump it (to 64/128) later. FIXME: should this respect TTI reg width * num vec regs? Original commit message: Now, there's a big caveat here - these bytes are abstract bytes, not the i8 we have in LLVM, so strictly speaking this is not exactly legal, see e.g. https://github.com/AliveToolkit/alive2/issues/860 ^ the "bytes" "could" have been a pointer, and loading it as an integer inserts an implicit ptrtoint. But at the same time, InstCombine's `InstCombinerImpl::SimplifyAnyMemTransfer()` would expand a memtransfer of 1/2/4/8 bytes into integer-typed load+store, so this isn't exactly a new problem. Note that in memory, poison is byte-wise, so we really can't widen elements, but SROA seems to be inconsistent here. Fixes #59116. --- clang/test/CodeGenOpenCL/amdgpu-nullptr.cl | 18 +- llvm/lib/Transforms/Scalar/SROA.cpp | 49 +++-- llvm/test/CodeGen/AMDGPU/v1024.ll | 2 +- llvm/test/DebugInfo/X86/sroasplit-1.ll | 6 +- llvm/test/DebugInfo/X86/sroasplit-4.ll | 20 +-- .../PhaseOrdering/instcombine-sroa-inttoptr.ll | 20 ++- llvm/test/Transforms/SROA/address-spaces.ll | 12 +- llvm/test/Transforms/SROA/alignment.ll | 24 ++- llvm/test/Transforms/SROA/alloca-address-space.ll | 12 +- llvm/test/Transforms/SROA/basictest.ll | 197 ++++++++------------- llvm/test/Transforms/SROA/pointer-offset-size.ll | 3 +- llvm/test/Transforms/SROA/slice-width.ll | 3 +- llvm/test/Transforms/SROA/tbaa-struct.ll | 3 +- llvm/test/Transforms/SROA/tbaa-struct2.ll | 3 +- llvm/test/Transforms/SROA/vector-promotion.ll | 19 +- 15 files changed, 180 insertions(+), 211 deletions(-) diff --git a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl index 65f6f2e..859e81f 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-nullptr.cl @@ -515,13 +515,17 @@ typedef struct { private char *p; } StructTy3; -// CHECK-LABEL: test_memset_private -// CHECK: call void @llvm.memset.p5i8.i64(i8 addrspace(5)* noundef align 8 {{.*}}, i8 0, i64 32, i1 false) -// CHECK: [[GEP:%.*]] = getelementptr inbounds %struct.StructTy3, %struct.StructTy3 addrspace(5)* %ptr, i32 0, i32 4 -// CHECK: store i8 addrspace(5)* addrspacecast (i8* null to i8 addrspace(5)*), i8 addrspace(5)* addrspace(5)* [[GEP]] -// CHECK: [[GEP1:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* {{.*}}, i32 36 -// CHECK: [[GEP1_CAST:%.*]] = bitcast i8 addrspace(5)* [[GEP1]] to i32 addrspace(5)* -// CHECK: store i32 0, i32 addrspace(5)* [[GEP1_CAST]], align 4 +// CHECK-LABEL: @test_memset_private( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [[STRUCT_STRUCTTY3:%.*]] addrspace(5)* [[PTR:%.*]] to i8 addrspace(5)* +// CHECK-NEXT: [[S3_SROA_0_SROA_0_0_S3_SROA_0_0__SROA_CAST2_SROA_CAST:%.*]] = bitcast [[STRUCT_STRUCTTY3]] addrspace(5)* [[PTR]] to <32 x i8> addrspace(5)* +// CHECK-NEXT: store <32 x i8> zeroinitializer, <32 x i8> addrspace(5)* [[S3_SROA_0_SROA_0_0_S3_SROA_0_0__SROA_CAST2_SROA_CAST]], align 8, !tbaa.struct !9 +// CHECK-NEXT: [[S3_SROA_4_0__SROA_IDX6:%.*]] = getelementptr inbounds [[STRUCT_STRUCTTY3]], [[STRUCT_STRUCTTY3]] addrspace(5)* [[PTR]], i32 0, i32 4 +// CHECK-NEXT: store i8 addrspace(5)* addrspacecast (i8* null to i8 addrspace(5)*), i8 addrspace(5)* addrspace(5)* [[S3_SROA_4_0__SROA_IDX6]], align 8, !tbaa.struct !12 +// CHECK-NEXT: [[S3_SROA_5_0__SROA_IDX:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* [[TMP0]], i32 36 +// CHECK-NEXT: [[S3_SROA_5_0__SROA_CAST8:%.*]] = bitcast i8 addrspace(5)* [[S3_SROA_5_0__SROA_IDX]] to i32 addrspace(5)* +// CHECK-NEXT: store i32 0, i32 addrspace(5)* [[S3_SROA_5_0__SROA_CAST8]], align 4, !tbaa.struct !13 +// CHECK-NEXT: ret void void test_memset_private(private StructTy3 *ptr) { StructTy3 S3 = {0, 0, 0, 0, 0}; *ptr = S3; diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 6dcdd63..a2d7dc2 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1806,8 +1806,10 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, ? Ty->getElementType() : FixedVectorType::get(Ty->getElementType(), NumElements); - Type *SplitIntTy = - Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8); + Type *SplitIntTy = nullptr; + if (uint64_t Bitwidth = NumElements * ElementSize * 8; + Bitwidth <= IntegerType::MAX_INT_BITS) + SplitIntTy = Type::getIntNTy(Ty->getContext(), Bitwidth); Use *U = S.getUse(); @@ -1826,7 +1828,8 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, // Disable vector promotion when there are loads or stores of an FCA. if (LTy->isStructTy()) return false; - if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) { + if (SplitIntTy && + (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset())) { assert(LTy->isIntegerTy()); LTy = SplitIntTy; } @@ -1839,7 +1842,8 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, // Disable vector promotion when there are loads or stores of an FCA. if (STy->isStructTy()) return false; - if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) { + if (SplitIntTy && + (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset())) { assert(STy->isIntegerTy()); STy = SplitIntTy; } @@ -1889,7 +1893,8 @@ static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy, /// SSA value. We only can ensure this for a limited set of operations, and we /// don't want to do the rewrites unless we are confident that the result will /// be promotable, so we have an early test here. -static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { +static VectorType *isVectorPromotionViable(Partition &P, LLVMContext &Ctx, + const DataLayout &DL) { // Collect the candidate types for vector-based promotion. Also track whether // we have different element types. SmallVector CandidateTys; @@ -1926,6 +1931,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { } } }; + bool SeenMemTransferInst = false; // Consider any loads or stores that are the exact size of the slice. for (const Slice &S : P) if (S.beginOffset() == P.beginOffset() && @@ -1934,8 +1940,29 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { CheckCandidateType(LI->getType()); else if (auto *SI = dyn_cast(S.getUse()->getUser())) CheckCandidateType(SI->getValueOperand()->getType()); + else if (isa(S.getUse()->getUser())) + SeenMemTransferInst = true; } + // If we have seen mem transfer intrinsic, + // and the partition is small-enough, + // enqueue appropriate byte vector. + // + // The "small-enough" threshold is somewhat arbitrary, + // and is mostly dictated by compile-time concerns, + // but, at the same time, SDAG SDNode can't handle + // more then 65535 operands, so we should not + // produce vectors with more than ~32768 elements. + // + // Perhaps, we should also take into account the TTI: + // `getNumberOfRegisters() * getRegisterBitWidth() / 8` ? + // + // FIXME: byte type is sticky. If we had any op with byte-typed elements, + // then we should choose that type. + if (SeenMemTransferInst && P.size() <= 32) + CheckCandidateType( + FixedVectorType::get(IntegerType::getInt8Ty(Ctx), P.size())); + // If we didn't find a vector type, nothing to do here. if (CandidateTys.empty()) return nullptr; @@ -1992,13 +2019,6 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { CandidateTys.resize(1); } - // FIXME: hack. Do we have a named constant for this? - // SDAG SDNode can't have more than 65535 operands. - llvm::erase_if(CandidateTys, [](VectorType *VTy) { - return cast(VTy)->getNumElements() > - std::numeric_limits::max(); - }); - for (VectorType *VTy : CandidateTys) if (checkVectorTypeForPromotion(P, VTy, DL)) return VTy; @@ -4323,8 +4343,9 @@ AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS, bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL); - VectorType *VecTy = - IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL); + VectorType *VecTy = IsIntegerPromotable + ? nullptr + : isVectorPromotionViable(P, AI.getContext(), DL); if (VecTy) SliceTy = VecTy; diff --git a/llvm/test/CodeGen/AMDGPU/v1024.ll b/llvm/test/CodeGen/AMDGPU/v1024.ll index 1326ba4..6dbb944 100644 --- a/llvm/test/CodeGen/AMDGPU/v1024.ll +++ b/llvm/test/CodeGen/AMDGPU/v1024.ll @@ -4,7 +4,7 @@ ; GCN-LABEL: {{^}}test_v1024: ; GCN-NOT: v_accvgpr -; GCN-COUNT-32: v_mov_b32_e32 +; GCN-COUNT-10: v_mov_b32_e32 ; GCN-NOT: v_accvgpr define amdgpu_kernel void @test_v1024() { entry: diff --git a/llvm/test/DebugInfo/X86/sroasplit-1.ll b/llvm/test/DebugInfo/X86/sroasplit-1.ll index 0ec3681..5a80b56 100644 --- a/llvm/test/DebugInfo/X86/sroasplit-1.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-1.ll @@ -20,10 +20,8 @@ ; ; Verify that SROA creates a variable piece when splitting i1. -; CHECK: %[[I1:.*]] = alloca [12 x i8], align 4 -; CHECK: call void @llvm.dbg.declare(metadata [12 x i8]* %[[I1]], metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 96)) -; CHECK: call void @llvm.dbg.value(metadata i32 %[[A:.*]], metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)) -; CHECK: ret i32 %[[A]] +; CHECK: %[[I1:.*]] = load <12 x i8>, +; CHECK: call void @llvm.dbg.value(metadata <12 x i8> %[[I1]], metadata ![[VAR:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 96)) ; Read Var and Piece: ; CHECK: ![[VAR]] = !DILocalVariable(name: "i1",{{.*}} line: 11, diff --git a/llvm/test/DebugInfo/X86/sroasplit-4.ll b/llvm/test/DebugInfo/X86/sroasplit-4.ll index 0d5594e..a3b35b8 100644 --- a/llvm/test/DebugInfo/X86/sroasplit-4.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-4.ll @@ -1,28 +1,28 @@ ; RUN: opt -sroa < %s -S -o - | FileCheck %s ; ; Test that recursively splitting an alloca updates the debug info correctly. -; CHECK: %[[T:.*]] = load i64, i64* @t, align 8 -; CHECK: call void @llvm.dbg.value(metadata i64 %[[T]], metadata ![[Y:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)) -; CHECK: %[[T1:.*]] = load i64, i64* @t, align 8 -; CHECK: call void @llvm.dbg.value(metadata i64 %[[T1]], metadata ![[Y]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) -; CHECK: call void @llvm.dbg.value(metadata i64 %[[T]], metadata ![[R:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 192, 64)) -; CHECK: call void @llvm.dbg.value(metadata i64 %[[T1]], metadata ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 256, 64)) -; +; CHECK: call void @llvm.dbg.value(metadata <16 x i8> %[[Y_VEC:.*]], metadata ![[Y:.*]], metadata !DIExpression()) +; CHECK: call void @llvm.dbg.value(metadata <16 x i8> %[[Y_VEC1:.*]], metadata ![[Y]], metadata !DIExpression()) +; CHECK: call void @llvm.dbg.value(metadata i32 0, metadata ![[R:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)) +; CHECK: call void @llvm.dbg.value(metadata i64 0, metadata ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) +; CHECK: call void @llvm.dbg.value(metadata i64 0, metadata ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64)) +; CHECK: call void @llvm.dbg.value(metadata <16 x i8> %[[Y_VEC1]], metadata ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 192, 128)) +; ; struct p { ; __SIZE_TYPE__ s; ; __SIZE_TYPE__ t; ; }; -; +; ; struct r { ; int i; ; struct p x; ; struct p y; ; }; -; +; ; extern int call_me(struct r); ; extern int maybe(); ; extern __SIZE_TYPE__ t; -; +; ; int test() { ; if (maybe()) ; return 0; diff --git a/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll index 886bde2..3361ad1 100644 --- a/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll +++ b/llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll @@ -68,12 +68,13 @@ define dso_local i32* @_Z3foo1S(%0* byval(%0) align 8 %arg) { ; CHECK-LABEL: @_Z3foo1S( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I2:%.*]] = alloca [[TMP0:%.*]], align 8 -; CHECK-NEXT: [[I1_SROA_0_0_I5_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I5_SROA_IDX]], align 8 +; CHECK-NEXT: [[TMP0]] = bitcast %0* [[ARG:%.*]] to i64* +; CHECK-NEXT: [[I11_SROA_0_0_VEC_EXTRACT_EXTRACT:%.*]] = load i64, i64* [[TMP0]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[I11_SROA_0_0_VEC_EXTRACT_EXTRACT]] to i32* ; CHECK-NEXT: [[I_SROA_0_0_I6_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I2]], i64 0, i32 0 -; CHECK-NEXT: store i32* [[I1_SROA_0_0_COPYLOAD]], i32** [[I_SROA_0_0_I6_SROA_IDX]], align 8 +; CHECK-NEXT: store i32* [[TMP1]], i32** [[I_SROA_0_0_I6_SROA_IDX]], align 8 ; CHECK-NEXT: tail call void @_Z7escape01S(%0* nonnull byval([[TMP0]]) align 8 [[I2]]) -; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]] +; CHECK-NEXT: ret i32* [[TMP1]] ; bb: %i = alloca %0, align 8 @@ -107,21 +108,22 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) define dso_local i32* @_Z3bar1S(%0* byval(%0) align 8 %arg) { ; CHECK-LABEL: @_Z3bar1S( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[I1_SROA_0_0_I4_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* [[ARG:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I4_SROA_IDX]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %0* [[ARG:%.*]] to i64* +; CHECK-NEXT: [[I13_SROA_0_0_VEC_EXTRACT_EXTRACT:%.*]] = load i64, i64* [[TMP0]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[I13_SROA_0_0_VEC_EXTRACT_EXTRACT]] to i32* ; CHECK-NEXT: [[I5:%.*]] = tail call i32 @_Z4condv() ; CHECK-NEXT: [[I6_NOT:%.*]] = icmp eq i32 [[I5]], 0 ; CHECK-NEXT: br i1 [[I6_NOT]], label [[BB10:%.*]], label [[BB7:%.*]] ; CHECK: bb7: ; CHECK-NEXT: tail call void @_Z5sync0v() -; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[I1_SROA_0_0_COPYLOAD]]) +; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[TMP1]]) ; CHECK-NEXT: br label [[BB13:%.*]] ; CHECK: bb10: ; CHECK-NEXT: tail call void @_Z5sync1v() -; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[I1_SROA_0_0_COPYLOAD]]) +; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[TMP1]]) ; CHECK-NEXT: br label [[BB13]] ; CHECK: bb13: -; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]] +; CHECK-NEXT: ret i32* [[TMP1]] ; bb: %i = alloca %0, align 8 diff --git a/llvm/test/Transforms/SROA/address-spaces.ll b/llvm/test/Transforms/SROA/address-spaces.ll index 70e1a68..0300e99 100644 --- a/llvm/test/Transforms/SROA/address-spaces.ll +++ b/llvm/test/Transforms/SROA/address-spaces.ll @@ -11,8 +11,8 @@ declare void @llvm.memcpy.p1.p1.i32(ptr addrspace(1) nocapture, ptr addrspace(1) ; Make sure an illegal bitcast isn't introduced define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b) { ; CHECK-LABEL: @test_address_space_1_1( -; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) [[A:%.*]], align 2 -; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(1) [[A:%.*]], align 2 +; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16 @@ -23,8 +23,8 @@ define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b) { define void @test_address_space_1_0(ptr addrspace(1) %a, ptr %b) { ; CHECK-LABEL: @test_address_space_1_0( -; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) [[A:%.*]], align 2 -; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(1) [[A:%.*]], align 2 +; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16 @@ -35,8 +35,8 @@ define void @test_address_space_1_0(ptr addrspace(1) %a, ptr %b) { define void @test_address_space_0_1(ptr %a, ptr addrspace(1) %b) { ; CHECK-LABEL: @test_address_space_0_1( -; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 2 -; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[A:%.*]], align 2 +; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16 diff --git a/llvm/test/Transforms/SROA/alignment.ll b/llvm/test/Transforms/SROA/alignment.ll index 66da09c..ba673b1 100644 --- a/llvm/test/Transforms/SROA/alignment.ll +++ b/llvm/test/Transforms/SROA/alignment.ll @@ -92,15 +92,15 @@ define void @PR13920(ptr %a, ptr %b) { ; Test that alignments on memcpy intrinsics get propagated to loads and stores. ; CHECK-LABEL: @PR13920( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 2 -; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[A:%.*]], align 2 +; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; ; DEBUGLOC-LABEL: @PR13920( ; DEBUGLOC-NEXT: entry: ; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG38:![0-9]+]] -; DEBUGLOC-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 2, !dbg [[DBG39:![0-9]+]] -; DEBUGLOC-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr [[B:%.*]], align 2, !dbg [[DBG40:![0-9]+]] +; DEBUGLOC-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[A:%.*]], align 2, !dbg [[DBG39:![0-9]+]] +; DEBUGLOC-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr [[B:%.*]], align 2, !dbg [[DBG40:![0-9]+]] ; DEBUGLOC-NEXT: ret void, !dbg [[DBG41:![0-9]+]] ; @@ -118,21 +118,17 @@ define void @test3(ptr %x) { ; reduce the alignment. ; CHECK-LABEL: @test3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [22 x i8], align 8 -; CHECK-NEXT: [[B_SROA_0:%.*]] = alloca [18 x i8], align 2 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[A_SROA_0]], ptr align 8 [[X:%.*]], i32 22, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[B_SROA_0]], ptr align 2 [[X]], i32 18, i1 false) +; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load <22 x i8>, ptr [[X:%.*]], align 8 +; CHECK-NEXT: [[B_SROA_0_6_COPYLOAD:%.*]] = load <18 x i8>, ptr [[X]], align 2 ; CHECK-NEXT: ret void ; ; DEBUGLOC-LABEL: @test3( ; DEBUGLOC-NEXT: entry: -; DEBUGLOC-NEXT: [[A_SROA_0:%.*]] = alloca [22 x i8], align 8, !dbg [[DBG47:![0-9]+]] -; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47]] -; DEBUGLOC-NEXT: [[B_SROA_0:%.*]] = alloca [18 x i8], align 2, !dbg [[DBG48:![0-9]+]] -; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] -; DEBUGLOC-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[A_SROA_0]], ptr align 8 [[X:%.*]], i32 22, i1 false), !dbg [[DBG49:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47:![0-9]+]] +; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]] +; DEBUGLOC-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load <22 x i8>, ptr [[X:%.*]], align 8, !dbg [[DBG49:![0-9]+]] ; DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META46:![0-9]+]], metadata !DIExpression()), !dbg [[DBG50:![0-9]+]] -; DEBUGLOC-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[B_SROA_0]], ptr align 2 [[X]], i32 18, i1 false), !dbg [[DBG51:![0-9]+]] +; DEBUGLOC-NEXT: [[B_SROA_0_6_COPYLOAD:%.*]] = load <18 x i8>, ptr [[X]], align 2, !dbg [[DBG51:![0-9]+]] ; DEBUGLOC-NEXT: ret void, !dbg [[DBG52:![0-9]+]] ; diff --git a/llvm/test/Transforms/SROA/alloca-address-space.ll b/llvm/test/Transforms/SROA/alloca-address-space.ll index d4f305c..b06f269 100644 --- a/llvm/test/Transforms/SROA/alloca-address-space.ll +++ b/llvm/test/Transforms/SROA/alloca-address-space.ll @@ -10,8 +10,8 @@ declare void @llvm.memcpy.p1.p1.i32(ptr addrspace(1) nocapture, ptr addrspace(1) define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b) { ; CHECK-LABEL: @test_address_space_1_1( -; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) [[A:%.*]], align 2 -; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(1) [[A:%.*]], align 2 +; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16, addrspace(2) @@ -22,8 +22,8 @@ define void @test_address_space_1_1(ptr addrspace(1) %a, ptr addrspace(1) %b) { define void @test_address_space_1_0(ptr addrspace(1) %a, ptr addrspace(2) %b) { ; CHECK-LABEL: @test_address_space_1_0( -; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(1) [[A:%.*]], align 2 -; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(2) [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(1) [[A:%.*]], align 2 +; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(2) [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16, addrspace(2) @@ -34,8 +34,8 @@ define void @test_address_space_1_0(ptr addrspace(1) %a, ptr addrspace(2) %b) { define void @test_address_space_0_1(ptr addrspace(2) %a, ptr addrspace(1) %b) { ; CHECK-LABEL: @test_address_space_0_1( -; CHECK-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr addrspace(2) [[A:%.*]], align 2 -; CHECK-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 +; CHECK-NEXT: [[AA_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr addrspace(2) [[A:%.*]], align 2 +; CHECK-NEXT: store <16 x i8> [[AA_SROA_0_0_COPYLOAD]], ptr addrspace(1) [[B:%.*]], align 2 ; CHECK-NEXT: ret void ; %aa = alloca <2 x i64>, align 16, addrspace(2) diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll index 5ac8ed8c..a57cd04 100644 --- a/llvm/test/Transforms/SROA/basictest.ll +++ b/llvm/test/Transforms/SROA/basictest.ll @@ -141,10 +141,7 @@ define void @test3(ptr %dst, ptr align 8 %src) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [42 x i8], align 1 ; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca [99 x i8], align 1 -; CHECK-NEXT: [[A_SROA_32:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: [[A_SROA_15:%.*]] = alloca [42 x i8], align 1 -; CHECK-NEXT: [[A_SROA_16:%.*]] = alloca [7 x i8], align 1 -; CHECK-NEXT: [[A_SROA_235:%.*]] = alloca [7 x i8], align 1 ; CHECK-NEXT: [[A_SROA_31:%.*]] = alloca [85 x i8], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_0]], ptr align 8 [[SRC:%.*]], i32 42, i1 false), !tbaa [[TBAA0:![0-9]+]] ; CHECK-NEXT: [[A_SROA_2_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 42 @@ -152,87 +149,74 @@ define void @test3(ptr %dst, ptr align 8 %src) { ; CHECK-NEXT: [[A_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 43 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_3]], ptr align 1 [[A_SROA_3_0_SRC_SROA_IDX]], i32 99, i1 false), !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_32_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 142 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_32]], ptr align 2 [[A_SROA_32_0_SRC_SROA_IDX]], i32 16, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[A_SROA_32_0_SRC_SROA_IDX]], align 2, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_15_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 158 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_15]], ptr align 2 [[A_SROA_15_0_SRC_SROA_IDX]], i32 42, i1 false), !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_16_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 200 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_16]], ptr align 8 [[A_SROA_16_0_SRC_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_0_COPYLOAD:%.*]] = load <7 x i8>, ptr [[A_SROA_16_0_SRC_SROA_IDX]], align 8, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_23_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 207 ; CHECK-NEXT: [[A_SROA_23_0_COPYLOAD:%.*]] = load i8, ptr [[A_SROA_23_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_235_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 208 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235]], ptr align 8 [[A_SROA_235_0_SRC_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_0_COPYLOAD:%.*]] = load <7 x i8>, ptr [[A_SROA_235_0_SRC_SROA_IDX]], align 8, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_31_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 215 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31]], ptr align 1 [[A_SROA_31_0_SRC_SROA_IDX]], i32 85, i1 false), !tbaa [[TBAA0]] -; CHECK-NEXT: store i8 1, ptr [[A_SROA_32]], align 1, !tbaa [[TBAA3:![0-9]+]] -; CHECK-NEXT: store i16 1, ptr [[A_SROA_32]], align 1, !tbaa [[TBAA5:![0-9]+]] -; CHECK-NEXT: store i32 1, ptr [[A_SROA_32]], align 1, !tbaa [[TBAA7:![0-9]+]] -; CHECK-NEXT: store i64 1, ptr [[A_SROA_32]], align 1, !tbaa [[TBAA9:![0-9]+]] -; CHECK-NEXT: [[A_SROA_32_1_OVERLAP_2_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 1 -; CHECK-NEXT: store i64 2, ptr [[A_SROA_32_1_OVERLAP_2_I8_SROA_IDX]], align 1, !tbaa [[TBAA11:![0-9]+]] -; CHECK-NEXT: [[A_SROA_32_2_OVERLAP_3_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 2 -; CHECK-NEXT: store i64 3, ptr [[A_SROA_32_2_OVERLAP_3_I8_SROA_IDX]], align 1, !tbaa [[TBAA13:![0-9]+]] -; CHECK-NEXT: [[A_SROA_32_3_OVERLAP_4_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 3 -; CHECK-NEXT: store i64 4, ptr [[A_SROA_32_3_OVERLAP_4_I8_SROA_IDX]], align 1, !tbaa [[TBAA15:![0-9]+]] -; CHECK-NEXT: [[A_SROA_32_4_OVERLAP_5_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 4 -; CHECK-NEXT: store i64 5, ptr [[A_SROA_32_4_OVERLAP_5_I8_SROA_IDX]], align 1, !tbaa [[TBAA17:![0-9]+]] -; CHECK-NEXT: [[A_SROA_32_5_OVERLAP_6_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 5 -; CHECK-NEXT: store i64 6, ptr [[A_SROA_32_5_OVERLAP_6_I8_SROA_IDX]], align 1, !tbaa [[TBAA19:![0-9]+]] -; CHECK-NEXT: [[A_SROA_32_6_OVERLAP_7_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 6 -; CHECK-NEXT: store i64 7, ptr [[A_SROA_32_6_OVERLAP_7_I8_SROA_IDX]], align 1, !tbaa [[TBAA21:![0-9]+]] -; CHECK-NEXT: [[A_SROA_32_7_OVERLAP_8_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 7 -; CHECK-NEXT: store i64 8, ptr [[A_SROA_32_7_OVERLAP_8_I8_SROA_IDX]], align 1, !tbaa [[TBAA23:![0-9]+]] -; CHECK-NEXT: [[A_SROA_32_8_OVERLAP_9_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_32]], i64 8 -; CHECK-NEXT: store i64 9, ptr [[A_SROA_32_8_OVERLAP_9_I8_SROA_IDX]], align 1, !tbaa [[TBAA25:![0-9]+]] -; CHECK-NEXT: store i8 1, ptr [[A_SROA_16]], align 1, !tbaa [[TBAA27:![0-9]+]] -; CHECK-NEXT: store i16 1, ptr [[A_SROA_16]], align 1, !tbaa [[TBAA29:![0-9]+]] -; CHECK-NEXT: store i32 1, ptr [[A_SROA_16]], align 1, !tbaa [[TBAA31:![0-9]+]] -; CHECK-NEXT: [[A_SROA_16_1_OVERLAP2_1_1_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_16]], i64 1 -; CHECK-NEXT: store i32 2, ptr [[A_SROA_16_1_OVERLAP2_1_1_I8_SROA_IDX]], align 1, !tbaa [[TBAA33:![0-9]+]] -; CHECK-NEXT: [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_16]], i64 2 -; CHECK-NEXT: store i32 3, ptr [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX]], align 1, !tbaa [[TBAA35:![0-9]+]] -; CHECK-NEXT: [[A_SROA_16_3_OVERLAP2_1_3_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_16]], i64 3 -; CHECK-NEXT: store i32 4, ptr [[A_SROA_16_3_OVERLAP2_1_3_I8_SROA_IDX]], align 1, !tbaa [[TBAA37:![0-9]+]] -; CHECK-NEXT: store i32 1, ptr [[A_SROA_235]], align 1, !tbaa [[TBAA39:![0-9]+]] -; CHECK-NEXT: [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX11:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1 -; CHECK-NEXT: store i8 1, ptr [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX11]], align 1, !tbaa [[TBAA41:![0-9]+]] -; CHECK-NEXT: [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX10:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1 -; CHECK-NEXT: store i16 1, ptr [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX10]], align 1, !tbaa [[TBAA43:![0-9]+]] -; CHECK-NEXT: [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1 -; CHECK-NEXT: store i32 1, ptr [[A_SROA_235_1_OVERLAP2_2_1_I8_SROA_IDX]], align 1, !tbaa [[TBAA45:![0-9]+]] -; CHECK-NEXT: [[A_SROA_235_2_OVERLAP2_2_2_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 2 -; CHECK-NEXT: store i32 3, ptr [[A_SROA_235_2_OVERLAP2_2_2_I8_SROA_IDX]], align 1, !tbaa [[TBAA47:![0-9]+]] -; CHECK-NEXT: [[A_SROA_235_3_OVERLAP2_2_3_I8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 3 -; CHECK-NEXT: store i32 4, ptr [[A_SROA_235_3_OVERLAP2_2_3_I8_SROA_IDX]], align 1, !tbaa [[TBAA49:![0-9]+]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_0_VEC_INSERT:%.*]] = insertelement <16 x i8> [[A_SROA_32_SROA_0_0_COPYLOAD]], i8 1, i32 0 +; CHECK-NEXT: [[A_SROA_32_SROA_0_0_VECBLEND27:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i16> to <2 x i8>), i32 0), i8 extractelement (<2 x i8> bitcast (<1 x i16> to <2 x i8>), i32 1), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_0_VEC_INSERT]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_0_VECBLEND25:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_0_VECBLEND27]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_0_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_0_VECBLEND25]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_1_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_0_VECBLEND]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_2_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_1_VECBLEND]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_3_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_2_VECBLEND]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_4_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_3_VECBLEND]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_5_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7), i8 undef, i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_4_VECBLEND]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_6_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7), i8 undef, i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_5_VECBLEND]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_7_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7), i8 undef>, <16 x i8> [[A_SROA_32_SROA_0_6_VECBLEND]] +; CHECK-NEXT: [[A_SROA_32_SROA_0_8_VECBLEND:%.*]] = select <16 x i1> , <16 x i8> bitcast (<1 x i64> to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (<1 x i64> to <8 x i8>), i32 7)>, <16 x i8> [[A_SROA_32_SROA_0_7_VECBLEND]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VEC_INSERT:%.*]] = insertelement <7 x i8> [[A_SROA_16_SROA_0_0_COPYLOAD]], i8 1, i32 0 +; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VECBLEND18:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i16> to <2 x i8>), i32 0), i8 extractelement (<2 x i8> bitcast (<1 x i16> to <2 x i8>), i32 1), i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <7 x i8> [[A_SROA_16_SROA_0_0_VEC_INSERT]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VECBLEND:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3), i8 undef, i8 undef, i8 undef>, <7 x i8> [[A_SROA_16_SROA_0_0_VECBLEND18]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_1_VECBLEND:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3), i8 undef, i8 undef>, <7 x i8> [[A_SROA_16_SROA_0_0_VECBLEND]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_2_VECBLEND:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3), i8 undef>, <7 x i8> [[A_SROA_16_SROA_0_1_VECBLEND]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_3_VECBLEND:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3)>, <7 x i8> [[A_SROA_16_SROA_0_2_VECBLEND]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_0_VECBLEND:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3), i8 undef, i8 undef, i8 undef>, <7 x i8> [[A_SROA_235_SROA_0_0_COPYLOAD]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VEC_INSERT:%.*]] = insertelement <7 x i8> [[A_SROA_235_SROA_0_0_VECBLEND]], i8 1, i32 1 +; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VECBLEND13:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i16> to <2 x i8>), i32 0), i8 extractelement (<2 x i8> bitcast (<1 x i16> to <2 x i8>), i32 1), i8 undef, i8 undef, i8 undef, i8 undef>, <7 x i8> [[A_SROA_235_SROA_0_1_VEC_INSERT]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VECBLEND:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3), i8 undef, i8 undef>, <7 x i8> [[A_SROA_235_SROA_0_1_VECBLEND13]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_2_VECBLEND:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3), i8 undef>, <7 x i8> [[A_SROA_235_SROA_0_1_VECBLEND]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_3_VECBLEND:%.*]] = select <7 x i1> , <7 x i8> bitcast (<1 x i32> to <4 x i8>), i32 0), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 1), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 2), i8 extractelement (<4 x i8> bitcast (<1 x i32> to <4 x i8>), i32 3)>, <7 x i8> [[A_SROA_235_SROA_0_2_VECBLEND]] ; CHECK-NEXT: [[A_SROA_15_197_OVERLAP2_PREFIX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_15]], i64 39 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_15_197_OVERLAP2_PREFIX_SROA_IDX]], ptr align 1 [[SRC]], i32 3, i1 false), !tbaa [[TBAA51:![0-9]+]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_15_197_OVERLAP2_PREFIX_SROA_IDX]], ptr align 1 [[SRC]], i32 3, i1 false), !tbaa [[TBAA3:![0-9]+]] ; CHECK-NEXT: [[A_SROA_16_197_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 3 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_16]], ptr align 1 [[A_SROA_16_197_SRC_SROA_IDX]], i32 5, i1 false), !tbaa [[TBAA51]] -; CHECK-NEXT: [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX12:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_16]], i64 2 -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_16_2_OVERLAP2_1_2_I8_SROA_IDX12]], i8 42, i32 5, i1 false), !tbaa [[TBAA53:![0-9]+]] -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_235]], i8 42, i32 2, i1 false), !tbaa [[TBAA53]] -; CHECK-NEXT: [[A_SROA_235_209_OVERLAP2_2_1_I8_SROA_IDX8:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235_209_OVERLAP2_2_1_I8_SROA_IDX8]], ptr align 1 [[SRC]], i32 5, i1 false), !tbaa [[TBAA55:![0-9]+]] -; CHECK-NEXT: [[A_SROA_235_210_OVERLAP2_2_2_I8_SROA_IDX9:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_235]], i64 2 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235_210_OVERLAP2_2_2_I8_SROA_IDX9]], ptr align 1 [[SRC]], i32 5, i1 false), !tbaa [[TBAA57:![0-9]+]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_0_COPYLOAD21:%.*]] = load <5 x i8>, ptr [[A_SROA_16_197_SRC_SROA_IDX]], align 1, !tbaa [[TBAA3]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <5 x i8> [[A_SROA_16_SROA_0_0_COPYLOAD21]], <5 x i8> poison, <7 x i32> +; CHECK-NEXT: [[A_SROA_16_SROA_0_0_VECBLEND22:%.*]] = select <7 x i1> , <7 x i8> [[A_SROA_16_SROA_0_0_VEC_EXPAND]], <7 x i8> [[A_SROA_16_SROA_0_3_VECBLEND]] +; CHECK-NEXT: [[A_SROA_16_SROA_0_2_VECBLEND23:%.*]] = select <7 x i1> , <7 x i8> , <7 x i8> [[A_SROA_16_SROA_0_0_VECBLEND22]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_0_VECBLEND11:%.*]] = select <7 x i1> , <7 x i8> , <7 x i8> [[A_SROA_235_SROA_0_3_VECBLEND]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_1_COPYLOAD:%.*]] = load <5 x i8>, ptr [[SRC]], align 1, !tbaa [[TBAA5:![0-9]+]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VEC_EXPAND:%.*]] = shufflevector <5 x i8> [[A_SROA_235_SROA_0_1_COPYLOAD]], <5 x i8> poison, <7 x i32> +; CHECK-NEXT: [[A_SROA_235_SROA_0_1_VECBLEND15:%.*]] = select <7 x i1> , <7 x i8> [[A_SROA_235_SROA_0_1_VEC_EXPAND]], <7 x i8> [[A_SROA_235_SROA_0_0_VECBLEND11]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_2_COPYLOAD:%.*]] = load <5 x i8>, ptr [[SRC]], align 1, !tbaa [[TBAA7:![0-9]+]] +; CHECK-NEXT: [[A_SROA_235_SROA_0_2_VEC_EXPAND:%.*]] = shufflevector <5 x i8> [[A_SROA_235_SROA_0_2_COPYLOAD]], <5 x i8> poison, <7 x i32> +; CHECK-NEXT: [[A_SROA_235_SROA_0_2_VECBLEND16:%.*]] = select <7 x i1> , <7 x i8> [[A_SROA_235_SROA_0_2_VEC_EXPAND]], <7 x i8> [[A_SROA_235_SROA_0_1_VECBLEND15]] ; CHECK-NEXT: [[A_SROA_31_210_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 5 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31]], ptr align 1 [[A_SROA_31_210_SRC_SROA_IDX]], i32 3, i1 false), !tbaa [[TBAA57]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST:%.*]], ptr align 1 [[A_SROA_0]], i32 42, i1 false), !tbaa [[TBAA59:![0-9]+]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31]], ptr align 1 [[A_SROA_31_210_SRC_SROA_IDX]], i32 3, i1 false), !tbaa [[TBAA7]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST:%.*]], ptr align 1 [[A_SROA_0]], i32 42, i1 false), !tbaa [[TBAA9:![0-9]+]] ; CHECK-NEXT: [[A_SROA_2_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 42 -; CHECK-NEXT: store i8 0, ptr [[A_SROA_2_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA59]] +; CHECK-NEXT: store i8 0, ptr [[A_SROA_2_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 43 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_3_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_3]], i32 99, i1 false), !tbaa [[TBAA59]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_3_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_3]], i32 99, i1 false), !tbaa [[TBAA9]] ; CHECK-NEXT: [[A_SROA_32_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 142 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_32_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_32]], i32 16, i1 false), !tbaa [[TBAA59]] +; CHECK-NEXT: store <16 x i8> [[A_SROA_32_SROA_0_8_VECBLEND]], ptr [[A_SROA_32_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] ; CHECK-NEXT: [[A_SROA_15_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 158 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_15_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_15]], i32 42, i1 false), !tbaa [[TBAA59]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_15_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_15]], i32 42, i1 false), !tbaa [[TBAA9]] ; CHECK-NEXT: [[A_SROA_16_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 200 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_16_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_16]], i32 7, i1 false), !tbaa [[TBAA59]] +; CHECK-NEXT: store <7 x i8> [[A_SROA_16_SROA_0_2_VECBLEND23]], ptr [[A_SROA_16_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] ; CHECK-NEXT: [[A_SROA_23_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 207 -; CHECK-NEXT: store i8 42, ptr [[A_SROA_23_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA59]] +; CHECK-NEXT: store i8 42, ptr [[A_SROA_23_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] ; CHECK-NEXT: [[A_SROA_235_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 208 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_235_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_235]], i32 7, i1 false), !tbaa [[TBAA59]] +; CHECK-NEXT: store <7 x i8> [[A_SROA_235_SROA_0_2_VECBLEND16]], ptr [[A_SROA_235_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] ; CHECK-NEXT: [[A_SROA_31_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 215 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_31]], i32 85, i1 false), !tbaa [[TBAA59]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_31]], i32 85, i1 false), !tbaa [[TBAA9]] ; CHECK-NEXT: ret void ; @@ -315,60 +299,30 @@ entry: define void @test4(ptr %dst, ptr %src) { ; CHECK-LABEL: @test4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [20 x i8], align 1 -; CHECK-NEXT: [[A_SROA_2_SROA_4:%.*]] = alloca [7 x i8], align 1 -; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca [10 x i8], align 1 -; CHECK-NEXT: [[A_SROA_31_SROA_5:%.*]] = alloca [7 x i8], align 1 -; CHECK-NEXT: [[A_SROA_6_SROA_4:%.*]] = alloca [7 x i8], align 1 ; CHECK-NEXT: [[A_SROA_7:%.*]] = alloca [40 x i8], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_0]], ptr align 1 [[SRC:%.*]], i32 20, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <20 x i8>, ptr [[SRC:%.*]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_2_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 20 -; CHECK-NEXT: [[A_SROA_2_SROA_0_0_COPYLOAD:%.*]] = load i16, ptr [[A_SROA_2_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_SROA_2_SROA_3_0_A_SROA_2_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_2_0_SRC_SROA_IDX]], i64 2 -; CHECK-NEXT: [[A_SROA_2_SROA_3_0_COPYLOAD:%.*]] = load i8, ptr [[A_SROA_2_SROA_3_0_A_SROA_2_0_SRC_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_SROA_2_SROA_4_0_A_SROA_2_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_2_0_SRC_SROA_IDX]], i64 3 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_2_SROA_4]], ptr align 1 [[A_SROA_2_SROA_4_0_A_SROA_2_0_SRC_SROA_IDX_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_2_SROA_0_0_COPYLOAD:%.*]] = load <10 x i8>, ptr [[A_SROA_2_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 30 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_3]], ptr align 1 [[A_SROA_3_0_SRC_SROA_IDX]], i32 10, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_3_SROA_0_0_COPYLOAD:%.*]] = load <10 x i8>, ptr [[A_SROA_3_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_31_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 40 -; CHECK-NEXT: [[A_SROA_31_SROA_0_0_COPYLOAD:%.*]] = load i16, ptr [[A_SROA_31_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_SROA_31_SROA_4_0_A_SROA_31_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_31_0_SRC_SROA_IDX]], i64 2 -; CHECK-NEXT: [[A_SROA_31_SROA_4_0_COPYLOAD:%.*]] = load i8, ptr [[A_SROA_31_SROA_4_0_A_SROA_31_0_SRC_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_SROA_31_SROA_5_0_A_SROA_31_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_31_0_SRC_SROA_IDX]], i64 3 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_SROA_5]], ptr align 1 [[A_SROA_31_SROA_5_0_A_SROA_31_0_SRC_SROA_IDX_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_31_SROA_0_0_COPYLOAD:%.*]] = load <10 x i8>, ptr [[A_SROA_31_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_6_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 50 -; CHECK-NEXT: [[A_SROA_6_SROA_0_0_COPYLOAD:%.*]] = load i16, ptr [[A_SROA_6_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_SROA_6_SROA_3_0_A_SROA_6_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_6_0_SRC_SROA_IDX]], i64 2 -; CHECK-NEXT: [[A_SROA_6_SROA_3_0_COPYLOAD:%.*]] = load i8, ptr [[A_SROA_6_SROA_3_0_A_SROA_6_0_SRC_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_SROA_6_SROA_4_0_A_SROA_6_0_SRC_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_6_0_SRC_SROA_IDX]], i64 3 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_6_SROA_4]], ptr align 1 [[A_SROA_6_SROA_4_0_A_SROA_6_0_SRC_SROA_IDX_SROA_IDX]], i32 7, i1 false), !tbaa [[TBAA0]] +; CHECK-NEXT: [[A_SROA_6_SROA_0_0_COPYLOAD:%.*]] = load <10 x i8>, ptr [[A_SROA_6_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_7_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 60 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_7]], ptr align 1 [[A_SROA_7_0_SRC_SROA_IDX]], i32 40, i1 false), !tbaa [[TBAA0]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_SROA_5]], ptr align 1 [[A_SROA_2_SROA_4]], i32 7, i1 false), !tbaa [[TBAA3]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_SROA_5]], ptr align 1 [[A_SROA_6_SROA_4]], i32 7, i1 false), !tbaa [[TBAA5]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST:%.*]], ptr align 1 [[A_SROA_0]], i32 20, i1 false), !tbaa [[TBAA7]] +; CHECK-NEXT: [[A_SROA_31_SROA_0_2_VEC_INSERT:%.*]] = insertelement <10 x i8> [[A_SROA_2_SROA_0_0_COPYLOAD]], i8 0, i32 2 +; CHECK-NEXT: store <20 x i8> [[A_SROA_0_SROA_0_0_COPYLOAD]], ptr [[DST:%.*]], align 1, !tbaa [[TBAA11:![0-9]+]] ; CHECK-NEXT: [[A_SROA_2_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 20 -; CHECK-NEXT: store i16 [[A_SROA_2_SROA_0_0_COPYLOAD]], ptr [[A_SROA_2_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA7]] -; CHECK-NEXT: [[A_SROA_2_SROA_3_0_A_SROA_2_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_2_0_DST_SROA_IDX]], i64 2 -; CHECK-NEXT: store i8 [[A_SROA_2_SROA_3_0_COPYLOAD]], ptr [[A_SROA_2_SROA_3_0_A_SROA_2_0_DST_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA7]] -; CHECK-NEXT: [[A_SROA_2_SROA_4_0_A_SROA_2_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_2_0_DST_SROA_IDX]], i64 3 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_2_SROA_4_0_A_SROA_2_0_DST_SROA_IDX_SROA_IDX]], ptr align 1 [[A_SROA_2_SROA_4]], i32 7, i1 false), !tbaa [[TBAA7]] +; CHECK-NEXT: store <10 x i8> [[A_SROA_2_SROA_0_0_COPYLOAD]], ptr [[A_SROA_2_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 30 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_3_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_3]], i32 10, i1 false), !tbaa [[TBAA7]] +; CHECK-NEXT: store <10 x i8> [[A_SROA_3_SROA_0_0_COPYLOAD]], ptr [[A_SROA_3_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] ; CHECK-NEXT: [[A_SROA_31_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 40 -; CHECK-NEXT: store i16 [[A_SROA_6_SROA_0_0_COPYLOAD]], ptr [[A_SROA_31_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA7]] -; CHECK-NEXT: [[A_SROA_31_SROA_4_0_A_SROA_31_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_31_0_DST_SROA_IDX]], i64 2 -; CHECK-NEXT: store i8 [[A_SROA_6_SROA_3_0_COPYLOAD]], ptr [[A_SROA_31_SROA_4_0_A_SROA_31_0_DST_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA7]] -; CHECK-NEXT: [[A_SROA_31_SROA_5_0_A_SROA_31_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_31_0_DST_SROA_IDX]], i64 3 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_31_SROA_5_0_A_SROA_31_0_DST_SROA_IDX_SROA_IDX]], ptr align 1 [[A_SROA_31_SROA_5]], i32 7, i1 false), !tbaa [[TBAA7]] +; CHECK-NEXT: store <10 x i8> [[A_SROA_6_SROA_0_0_COPYLOAD]], ptr [[A_SROA_31_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] ; CHECK-NEXT: [[A_SROA_6_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 50 -; CHECK-NEXT: store i16 [[A_SROA_6_SROA_0_0_COPYLOAD]], ptr [[A_SROA_6_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA7]] -; CHECK-NEXT: [[A_SROA_6_SROA_3_0_A_SROA_6_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_6_0_DST_SROA_IDX]], i64 2 -; CHECK-NEXT: store i8 [[A_SROA_6_SROA_3_0_COPYLOAD]], ptr [[A_SROA_6_SROA_3_0_A_SROA_6_0_DST_SROA_IDX_SROA_IDX]], align 1, !tbaa [[TBAA7]] -; CHECK-NEXT: [[A_SROA_6_SROA_4_0_A_SROA_6_0_DST_SROA_IDX_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_6_0_DST_SROA_IDX]], i64 3 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_6_SROA_4_0_A_SROA_6_0_DST_SROA_IDX_SROA_IDX]], ptr align 1 [[A_SROA_6_SROA_4]], i32 7, i1 false), !tbaa [[TBAA7]] +; CHECK-NEXT: store <10 x i8> [[A_SROA_6_SROA_0_0_COPYLOAD]], ptr [[A_SROA_6_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA11]] ; CHECK-NEXT: [[A_SROA_7_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 60 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_7_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_7]], i32 40, i1 false), !tbaa [[TBAA7]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_7_0_DST_SROA_IDX]], ptr align 1 [[A_SROA_7]], i32 40, i1 false), !tbaa [[TBAA11]] ; CHECK-NEXT: ret void ; @@ -455,8 +409,8 @@ define void @test7(ptr %src, ptr %dst) { ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load volatile i32, ptr [[SRC:%.*]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_COPYLOAD]], ptr [[A_SROA_0]], align 4, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1:%.*]] = load volatile i32, ptr [[A_SROA_0]], align 4, !tbaa [[TBAA3]] -; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1]], ptr [[DST:%.*]], align 1, !tbaa [[TBAA3]] +; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1:%.*]] = load volatile i32, ptr [[A_SROA_0]], align 4, !tbaa [[TBAA13:![0-9]+]] +; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD1]], ptr [[DST:%.*]], align 1, !tbaa [[TBAA13]] ; CHECK-NEXT: ret void ; @@ -476,9 +430,9 @@ define %S2 @test8(ptr %arg) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[S2_NEXT_PTR:%.*]] = getelementptr [[S2:%.*]], ptr [[ARG:%.*]], i64 0, i32 1 ; CHECK-NEXT: [[S2_NEXT:%.*]] = load ptr, ptr [[S2_NEXT_PTR]], align 8, !tbaa [[TBAA0]] -; CHECK-NEXT: [[S2_NEXT_S1:%.*]] = load ptr, ptr [[S2_NEXT]], align 8, !tbaa [[TBAA3]] +; CHECK-NEXT: [[S2_NEXT_S1:%.*]] = load ptr, ptr [[S2_NEXT]], align 8, !tbaa [[TBAA13]] ; CHECK-NEXT: [[S2_NEXT_NEXT_PTR:%.*]] = getelementptr [[S2]], ptr [[S2_NEXT]], i64 0, i32 1 -; CHECK-NEXT: [[S2_NEXT_NEXT:%.*]] = load ptr, ptr [[S2_NEXT_NEXT_PTR]], align 8, !tbaa [[TBAA7]] +; CHECK-NEXT: [[S2_NEXT_NEXT:%.*]] = load ptr, ptr [[S2_NEXT_NEXT_PTR]], align 8, !tbaa [[TBAA11]] ; CHECK-NEXT: [[RESULT1:%.*]] = insertvalue [[S2]] poison, ptr [[S2_NEXT_S1]], 0 ; CHECK-NEXT: [[RESULT2:%.*]] = insertvalue [[S2]] [[RESULT1]], ptr [[S2_NEXT_NEXT]], 1 ; CHECK-NEXT: ret [[S2]] [[RESULT2]] @@ -725,7 +679,7 @@ define void @test16(ptr %src, ptr %dst) { ; CHECK-LABEL: @test16( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load i24, ptr [[SRC:%.*]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: store i24 0, ptr [[DST:%.*]], align 1, !tbaa [[TBAA5]] +; CHECK-NEXT: store i24 0, ptr [[DST:%.*]], align 1, !tbaa [[TBAA15:![0-9]+]] ; CHECK-NEXT: ret void ; @@ -744,7 +698,7 @@ define void @test17(ptr %src, ptr %dst) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca [3 x i8], align 1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[A]], ptr [[SRC:%.*]], i32 4, i1 true), !tbaa [[TBAA0]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DST:%.*]], ptr [[A]], i32 4, i1 true), !tbaa [[TBAA3]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DST:%.*]], ptr [[A]], i32 4, i1 true), !tbaa [[TBAA13]] ; CHECK-NEXT: ret void ; @@ -765,12 +719,12 @@ define void @test18(ptr %src, ptr %dst, i32 %size) { ; CHECK-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load i32, ptr [[SRC:%.*]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: [[A_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 4 ; CHECK-NEXT: [[A_SROA_3_0_COPYLOAD:%.*]] = load i32, ptr [[A_SROA_3_0_SRC_SROA_IDX]], align 1, !tbaa [[TBAA0]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_33]], ptr [[SRC]], i32 [[SIZE:%.*]], i1 false), !tbaa [[TBAA3]] -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_33]], i8 42, i32 [[SIZE]], i1 false), !tbaa [[TBAA5]] -; CHECK-NEXT: store i32 42, ptr [[DST:%.*]], align 1, !tbaa [[TBAA9]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_33]], ptr [[SRC]], i32 [[SIZE:%.*]], i1 false), !tbaa [[TBAA13]] +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_33]], i8 42, i32 [[SIZE]], i1 false), !tbaa [[TBAA15]] +; CHECK-NEXT: store i32 42, ptr [[DST:%.*]], align 1, !tbaa [[TBAA17:![0-9]+]] ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4 -; CHECK-NEXT: store i32 [[A_SROA_3_0_COPYLOAD]], ptr [[A_SROA_3_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA9]] -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DST]], ptr align 1 [[A_SROA_33]], i32 [[SIZE]], i1 false), !tbaa [[TBAA11]] +; CHECK-NEXT: store i32 [[A_SROA_3_0_COPYLOAD]], ptr [[A_SROA_3_0_DST_SROA_IDX]], align 1, !tbaa [[TBAA17]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr [[DST]], ptr align 1 [[A_SROA_33]], i32 [[SIZE]], i1 false), !tbaa [[TBAA19:![0-9]+]] ; CHECK-NEXT: ret void ; @@ -1005,8 +959,7 @@ define void @PR14034(ptr %ptr, ptr %ptr2) { ; thing is to handle empty structs gracefully. ; CHECK-LABEL: @PR14034( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [12 x i8], align 8 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[PTR2:%.*]], ptr align 8 [[A_SROA_0]], i32 12, i1 false) +; CHECK-NEXT: store <12 x i8> undef, ptr [[PTR2:%.*]], align 1 ; CHECK-NEXT: ret void ; @@ -1547,8 +1500,8 @@ define void @test24(ptr %src, ptr %dst) { ; CHECK-NEXT: [[A:%.*]] = alloca i64, align 16 ; CHECK-NEXT: [[A_0_COPYLOAD:%.*]] = load volatile i64, ptr [[SRC:%.*]], align 1, !tbaa [[TBAA0]] ; CHECK-NEXT: store volatile i64 [[A_0_COPYLOAD]], ptr [[A]], align 16, !tbaa [[TBAA0]] -; CHECK-NEXT: [[A_0_COPYLOAD1:%.*]] = load volatile i64, ptr [[A]], align 16, !tbaa [[TBAA3]] -; CHECK-NEXT: store volatile i64 [[A_0_COPYLOAD1]], ptr [[DST:%.*]], align 1, !tbaa [[TBAA3]] +; CHECK-NEXT: [[A_0_COPYLOAD1:%.*]] = load volatile i64, ptr [[A]], align 16, !tbaa [[TBAA13]] +; CHECK-NEXT: store volatile i64 [[A_0_COPYLOAD1]], ptr [[DST:%.*]], align 1, !tbaa [[TBAA13]] ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SROA/pointer-offset-size.ll b/llvm/test/Transforms/SROA/pointer-offset-size.ll index 76b5209..bf3c63c 100644 --- a/llvm/test/Transforms/SROA/pointer-offset-size.ll +++ b/llvm/test/Transforms/SROA/pointer-offset-size.ll @@ -8,8 +8,7 @@ target datalayout = "e-p:64:64:64:32" define i16 @test(ptr %ts2.i) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[S_SROA_0:%.*]] = alloca [3 x i8], align 8 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[TS2_I:%.*]], ptr align 8 [[S_SROA_0]], i32 3, i1 false) +; CHECK-NEXT: store <3 x i8> undef, ptr [[TS2_I:%.*]], align 1 ; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[TS2_I]], align 2 ; CHECK-NEXT: ret i16 [[TMP0]] ; diff --git a/llvm/test/Transforms/SROA/slice-width.ll b/llvm/test/Transforms/SROA/slice-width.ll index 7d2aeaa..74ddf75 100644 --- a/llvm/test/Transforms/SROA/slice-width.ll +++ b/llvm/test/Transforms/SROA/slice-width.ll @@ -46,8 +46,7 @@ load_i1: define void @memcpy_fp80_padding() { ; CHECK-LABEL: @memcpy_fp80_padding( -; CHECK-NEXT: [[X_SROA_0:%.*]] = alloca x86_fp80, align 16 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 16 [[X_SROA_0]], ptr align 16 @foo_copy_source, i32 16, i1 false) +; CHECK-NEXT: [[X_SROA_0_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr @foo_copy_source, align 16 ; CHECK-NEXT: [[X_SROA_1_0_COPYLOAD:%.*]] = load i64, ptr getelementptr inbounds (i8, ptr @foo_copy_source, i64 16), align 16 ; CHECK-NEXT: [[X_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr getelementptr inbounds (i8, ptr @foo_copy_source, i64 24), align 8 ; CHECK-NEXT: store i64 [[X_SROA_1_0_COPYLOAD]], ptr @i64_sink, align 4 diff --git a/llvm/test/Transforms/SROA/tbaa-struct.ll b/llvm/test/Transforms/SROA/tbaa-struct.ll index 3e9332c..3d55b72 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct.ll @@ -10,7 +10,8 @@ declare <2 x float> @foo(ptr %0) define void @bar(ptr %y2) { ; CHECK-LABEL: @bar( ; CHECK-NEXT: [[X14:%.*]] = call <2 x float> @foo(ptr [[Y2:%.*]]) -; CHECK-NEXT: store <2 x float> [[X14]], ptr [[Y2]], align 4, !tbaa.struct !0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[X14]] to <2 x i32> +; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[Y2]], align 4, !tbaa.struct !0 ; CHECK-NEXT: ret void ; %x7 = alloca %vector diff --git a/llvm/test/Transforms/SROA/tbaa-struct2.ll b/llvm/test/Transforms/SROA/tbaa-struct2.ll index 1c81fc61..e7d5f4e 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct2.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct2.ll @@ -9,12 +9,11 @@ declare double @subcall(double %g, i32 %m) define double @bar(ptr %wishart) { ; CHECK-LABEL: @bar( -; CHECK-NEXT: [[TMP_SROA_3:%.*]] = alloca [4 x i8], align 4 ; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load double, ptr [[WISHART:%.*]], align 8, !tbaa.struct !0 ; CHECK-NEXT: [[TMP_SROA_2_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 8 ; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa.struct !7 ; CHECK-NEXT: [[TMP_SROA_3_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 12 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct !8 +; CHECK-NEXT: [[TMP_SROA_3_SROA_0_0_COPYLOAD:%.*]] = load <4 x i8>, ptr [[TMP_SROA_3_0_WISHART_SROA_IDX]], align 4, !tbaa.struct !8 ; CHECK-NEXT: [[CALL:%.*]] = call double @subcall(double [[TMP_SROA_0_0_COPYLOAD]], i32 [[TMP_SROA_2_0_COPYLOAD]]) ; CHECK-NEXT: ret double [[CALL]] ; diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll index bdf50ea..569dd05 100644 --- a/llvm/test/Transforms/SROA/vector-promotion.ll +++ b/llvm/test/Transforms/SROA/vector-promotion.ll @@ -567,9 +567,9 @@ define <4 x float> @test12(<4 x i32> %val) { define void @swap-8bytes(ptr %x, ptr %y) { ; CHECK-LABEL: @swap-8bytes( -; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[X:%.*]], align 1 +; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <8 x i8>, ptr [[X:%.*]], align 1 ; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 8, i1 false) -; CHECK-NEXT: store i64 [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 +; CHECK-NEXT: store <8 x i8> [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 ; CHECK-NEXT: ret void ; %tmp = alloca [2 x i32] @@ -581,10 +581,9 @@ define void @swap-8bytes(ptr %x, ptr %y) { define void @swap-7bytes(ptr %x, ptr %y) { ; CHECK-LABEL: @swap-7bytes( -; CHECK-NEXT: [[TMP:%.*]] = alloca [7 x i8], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr [[X:%.*]], i64 7, i1 false) +; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <7 x i8>, ptr [[X:%.*]], align 1 ; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 7, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], i64 7, i1 false) +; CHECK-NEXT: store <7 x i8> [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 ; CHECK-NEXT: ret void ; %tmp = alloca [7 x i8] @@ -596,10 +595,9 @@ define void @swap-7bytes(ptr %x, ptr %y) { define void @swap-16bytes(ptr %x, ptr %y) { ; CHECK-LABEL: @swap-16bytes( -; CHECK-NEXT: [[TMP:%.*]] = alloca [2 x i64], align 8 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr [[X:%.*]], i64 16, i1 false) +; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <16 x i8>, ptr [[X:%.*]], align 1 ; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 16, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], i64 16, i1 false) +; CHECK-NEXT: store <16 x i8> [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 ; CHECK-NEXT: ret void ; %tmp = alloca [2 x i64] @@ -611,10 +609,9 @@ define void @swap-16bytes(ptr %x, ptr %y) { define void @swap-15bytes(ptr %x, ptr %y) { ; CHECK-LABEL: @swap-15bytes( -; CHECK-NEXT: [[TMP:%.*]] = alloca [15 x i8], align 1 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr [[X:%.*]], i64 15, i1 false) +; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load <15 x i8>, ptr [[X:%.*]], align 1 ; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 15, i1 false) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], i64 15, i1 false) +; CHECK-NEXT: store <15 x i8> [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1 ; CHECK-NEXT: ret void ; %tmp = alloca [15 x i8] -- 2.7.4