From: Florian Hahn Date: Sat, 4 Feb 2023 20:42:50 +0000 (+0000) Subject: [LV] Add users for loads to make tests more robust. X-Git-Tag: upstream/17.0.6~18573 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a9ac22b501c5fd03a07a86ab65d675c84d3aa3cb;p=platform%2Fupstream%2Fllvm.git [LV] Add users for loads to make tests more robust. Update a few tests to add users to loads to avoid them being optimized out by future changes. In cases the unused loads didn't matter for the test, remove them. --- diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll index e718bda..db97dc6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-avoid-scalarization.ll @@ -8,7 +8,7 @@ target triple = "aarch64-unknown-linux-gnu" ; the loop, preventing the gep (and consequently the loop induction ; update variable) from being classified as 'uniform'. -define void @test_no_scalarization(ptr %a, i32 %idx, i32 %n) #0 { +define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0 { ; CHECK-LABEL: @test_no_scalarization( ; CHECK-NEXT: L.entry: ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[IDX:%.*]], 1 @@ -39,21 +39,26 @@ define void @test_no_scalarization(ptr %a, i32 %idx, i32 %n) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[A:%.*]], [[VEC_IND]] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP13]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP14]], align 8 -; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP16]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT2]] -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: middle.block: +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[IDX]], [[INDEX]] +; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[A:%.*]], [[VEC_IND]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[TMP13]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP15]], align 8 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[B:%.*]], i32 [[TMP12]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP17]], align 8 ; CHECK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 2 -; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[TMP19]], 1 -; CHECK-NEXT: [[TMP21:%.*]] = extractelement [[TMP12]], i32 [[TMP20]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP19]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT2]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP22:%.*]] = mul i32 [[TMP21]], 2 +; CHECK-NEXT: [[TMP23:%.*]] = sub i32 [[TMP22]], 1 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement [[TMP13]], i32 [[TMP23]] ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[L_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -62,12 +67,14 @@ define void @test_no_scalarization(ptr %a, i32 %idx, i32 %n) #0 { ; CHECK: L.LoopBody: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[L_LOOPBODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[INDVAR_NEXT]] = add nsw i32 [[INDVAR]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i64, ptr [[A]], i32 [[INDVAR]] -; CHECK-NEXT: [[TMP23:%.*]] = load double, ptr [[TMP22]], align 8 -; CHECK-NEXT: [[TMP24:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[TMP24]], label [[L_LOOPBODY]], label [[L_EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i64, ptr [[A]], i32 [[INDVAR]] +; CHECK-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP25]], align 8 +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr i64, ptr [[B]], i32 [[INDVAR]] +; CHECK-NEXT: store double [[TMP26]], ptr [[GEP_B]], align 8 +; CHECK-NEXT: [[TMP27:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[TMP27]], label [[L_LOOPBODY]], label [[L_EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: L.exit: -; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi ptr [ [[TMP22]], [[L_LOOPBODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi ptr [ [[TMP25]], [[L_LOOPBODY]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: store i64 1, ptr [[DOTLCSSA]], align 8 ; CHECK-NEXT: ret void ; @@ -79,6 +86,8 @@ L.LoopBody: ; preds = %L.LoopBody, %L.entr %indvar.next = add nsw i32 %indvar, 1 %0 = getelementptr i64, ptr %a, i32 %indvar %1 = load double, ptr %0, align 8 + %gep.b = getelementptr i64, ptr %b, i32 %indvar + store double %1, ptr %gep.b %2 = icmp slt i32 %indvar.next, %n br i1 %2, label %L.LoopBody, label %L.exit diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll index b77ac10..e9198918 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr48340.ll @@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu" %0 = type { i32 } %1 = type { i64 } -define void @foo(ptr %p, ptr %p.last) unnamed_addr #0 { +define ptr @foo(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[P3:%.*]] = ptrtoint ptr [[P:%.*]] to i64 @@ -40,6 +40,7 @@ define void @foo(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x ptr> [[WIDE_MASKED_GATHER6]], i32 3 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -50,9 +51,10 @@ define void @foo(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK-NEXT: [[P_INC]] = getelementptr inbounds i64, ptr [[P2]], i64 128 ; CHECK-NEXT: [[V:%.*]] = load ptr, ptr [[P2]], align 8 ; CHECK-NEXT: [[B:%.*]] = icmp eq ptr [[P_INC]], [[P_LAST]] -; CHECK-NEXT: br i1 [[B]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[B]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: -; CHECK-NEXT: ret void +; CHECK-NEXT: [[V_LCSSA:%.*]] = phi ptr [ [[V]], [[LOOP]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret ptr [[V_LCSSA]] ; entry: br label %loop @@ -65,10 +67,10 @@ loop: br i1 %b, label %exit, label %loop exit: - ret void + ret ptr %v } -define void @bar(ptr %p, ptr %p.last) unnamed_addr #0 { +define ptr @bar(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK-LABEL: @bar( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[P3:%.*]] = ptrtoint ptr [[P:%.*]] to i64 @@ -101,6 +103,7 @@ define void @bar(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x ptr> [[WIDE_MASKED_GATHER6]], i32 3 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -113,7 +116,8 @@ define void @bar(ptr %p, ptr %p.last) unnamed_addr #0 { ; CHECK-NEXT: [[B:%.*]] = icmp eq ptr [[P_INC]], [[P_LAST]] ; CHECK-NEXT: br i1 [[B]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: -; CHECK-NEXT: ret void +; CHECK-NEXT: [[V_LCSSA:%.*]] = phi ptr [ [[V]], [[LOOP]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret ptr [[V_LCSSA]] ; entry: br label %loop @@ -126,7 +130,7 @@ loop: br i1 %b, label %exit, label %loop exit: - ret void + ret ptr %v } attributes #0 = { "target-cpu"="skylake" } diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll index 225b1c0..8597211 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll @@ -475,9 +475,8 @@ define void @test_first_order_recurrences_and_induction(ptr %ptr) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[VEC_IND]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[PTR:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], @@ -497,7 +496,6 @@ loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %iv.next = add nuw nsw i64 %iv, 1 %gep.ptr = getelementptr inbounds i64, ptr %ptr, i64 %iv - %for.1.next = load i64, ptr %gep.ptr, align 2 %add.1 = add i64 %for.1, 10 store i64 %add.1, ptr %gep.ptr %exitcond.not = icmp eq i64 %iv.next, 1000 @@ -518,9 +516,8 @@ define void @test_first_order_recurrences_and_induction2(ptr %ptr) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[VEC_IND]], <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[PTR:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 ; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], @@ -540,7 +537,6 @@ loop: %for.1 = phi i64 [ 22, %entry ], [ %iv, %loop ] %iv.next = add nuw nsw i64 %iv, 1 %gep.ptr = getelementptr inbounds i64, ptr %ptr, i64 %iv - %for.1.next = load i64, ptr %gep.ptr, align 2 %add.1 = add i64 %for.1, 10 store i64 %add.1, ptr %gep.ptr %exitcond.not = icmp eq i64 %iv.next, 1000