From 7d945970488e2474f18fd78fbc9598962703692d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sun, 8 May 2022 11:40:53 +0100 Subject: [PATCH] [SLP][X86] Add test coverage for PR41892 / Issue #41237 --- .../test/Transforms/SLPVectorizer/X86/odd_store.ll | 114 +++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll b/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll index 4f72ddb..39df2d2 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll @@ -60,3 +60,117 @@ define i32 @foo(i8* noalias nocapture %A, float* noalias nocapture %B, float %T) ret i32 undef } +; PR41892 +define void @test_v4f32_v2f32_store(<4 x float> %f, float* %p){ +; CHECK-LABEL: @test_v4f32_v2f32_store( +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0 +; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[F]], i64 1 +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT: store float [[X0]], float* [[P]], align 4 +; CHECK-NEXT: store float [[X1]], float* [[P1]], align 4 +; CHECK-NEXT: ret void +; + %x0 = extractelement <4 x float> %f, i64 0 + %x1 = extractelement <4 x float> %f, i64 1 + %p1 = getelementptr inbounds float, float* %p, i64 1 + store float %x0, float* %p, align 4 + store float %x1, float* %p1, align 4 + ret void +} + +define void @test_v4f32_v2f32_splat_store(<4 x float> %f, float* %p){ +; CHECK-LABEL: @test_v4f32_v2f32_splat_store( +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0 +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT: store float [[X0]], float* [[P]], align 4 +; CHECK-NEXT: store float [[X0]], float* [[P1]], align 4 +; CHECK-NEXT: ret void +; + %x0 = extractelement <4 x float> %f, i64 0 + %p1 = getelementptr inbounds float, float* %p, i64 1 + store float %x0, float* %p, align 4 + store float %x0, float* %p1, align 4 + ret void +} + +define void @test_v4f32_v3f32_store(<4 x float> %f, float* %p){ +; CHECK-LABEL: @test_v4f32_v3f32_store( +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0 +; CHECK-NEXT: [[X1:%.*]] = extractelement <4 x float> [[F]], i64 1 +; CHECK-NEXT: [[X2:%.*]] = extractelement <4 x float> [[F]], i64 2 +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT: store float [[X0]], float* [[P]], align 4 +; CHECK-NEXT: store float [[X1]], float* [[P1]], align 4 +; CHECK-NEXT: store float [[X2]], float* [[P2]], align 4 +; CHECK-NEXT: ret void +; + %x0 = extractelement <4 x float> %f, i64 0 + %x1 = extractelement <4 x float> %f, i64 1 + %x2 = extractelement <4 x float> %f, i64 2 + %p1 = getelementptr inbounds float, float* %p, i64 1 + %p2 = getelementptr inbounds float, float* %p, i64 2 + store float %x0, float* %p, align 4 + store float %x1, float* %p1, align 4 + store float %x2, float* %p2, align 4 + ret void +} + +define void @test_v4f32_v3f32_splat_store(<4 x float> %f, float* %p){ +; CHECK-LABEL: @test_v4f32_v3f32_splat_store( +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0 +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, float* [[P:%.*]], i64 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, float* [[P]], i64 2 +; CHECK-NEXT: store float [[X0]], float* [[P]], align 4 +; CHECK-NEXT: store float [[X0]], float* [[P1]], align 4 +; CHECK-NEXT: store float [[X0]], float* [[P2]], align 4 +; CHECK-NEXT: ret void +; + %x0 = extractelement <4 x float> %f, i64 0 + %p1 = getelementptr inbounds float, float* %p, i64 1 + %p2 = getelementptr inbounds float, float* %p, i64 2 + store float %x0, float* %p, align 4 + store float %x0, float* %p1, align 4 + store float %x0, float* %p2, align 4 + ret void +} + +define void @test_v4f32_v4f32_store(<4 x float> %f, float* %p){ +; CHECK-LABEL: @test_v4f32_v4f32_store( +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[F:%.*]], <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: ret void +; + %x0 = extractelement <4 x float> %f, i64 0 + %x1 = extractelement <4 x float> %f, i64 1 + %x2 = extractelement <4 x float> %f, i64 2 + %x3 = extractelement <4 x float> %f, i64 3 + %p1 = getelementptr inbounds float, float* %p, i64 1 + %p2 = getelementptr inbounds float, float* %p, i64 2 + %p3 = getelementptr inbounds float, float* %p, i64 3 + store float %x0, float* %p, align 4 + store float %x1, float* %p1, align 4 + store float %x2, float* %p2, align 4 + store float %x3, float* %p3, align 4 + ret void +} + +define void @test_v4f32_v4f32_splat_store(<4 x float> %f, float* %p){ +; CHECK-LABEL: @test_v4f32_v4f32_splat_store( +; CHECK-NEXT: [[X0:%.*]] = extractelement <4 x float> [[F:%.*]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[X0]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[P:%.*]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[SHUFFLE]], <4 x float>* [[TMP2]], align 4 +; CHECK-NEXT: ret void +; + %x0 = extractelement <4 x float> %f, i64 0 + %p1 = getelementptr inbounds float, float* %p, i64 1 + %p2 = getelementptr inbounds float, float* %p, i64 2 + %p3 = getelementptr inbounds float, float* %p, i64 3 + store float %x0, float* %p, align 4 + store float %x0, float* %p1, align 4 + store float %x0, float* %p2, align 4 + store float %x0, float* %p3, align 4 + ret void +} -- 2.7.4