From 9eb0de857389fd07758cd39c09593fce5f83f95b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 23 Mar 2019 16:14:04 +0000 Subject: [PATCH] [X86][SLP] Show example of failure to uniformly commute splats for 'alt' shuffles. If either the main/alt opcodes isn't commutable we may end up with the splats not correctly commuted to the same side. llvm-svn: 356837 --- .../Transforms/SLPVectorizer/X86/alternate-int.ll | 38 ++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll index fe8ae4d..2a49864 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll @@ -533,3 +533,41 @@ define <8 x i32> @sdiv_v8i32_undefs(<8 x i32> %a) { %r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7 ret <8 x i32> %r7 } + +define <8 x i32> @add_sub_v8i32_splat(<8 x i32> %a, i32 %b) { +; CHECK-LABEL: @add_sub_v8i32_splat( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]] +; CHECK-NEXT: [[R7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP6]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[R7]] +; + %a0 = extractelement <8 x i32> %a, i32 0 + %a1 = extractelement <8 x i32> %a, i32 1 + %a2 = extractelement <8 x i32> %a, i32 2 + %a3 = extractelement <8 x i32> %a, i32 3 + %a4 = extractelement <8 x i32> %a, i32 4 + %a5 = extractelement <8 x i32> %a, i32 5 + %a6 = extractelement <8 x i32> %a, i32 6 + %a7 = extractelement <8 x i32> %a, i32 7 + %ab0 = add i32 %a0, %b + %ab1 = add i32 %b, %a1 + %ab2 = add i32 %a2, %b + %ab3 = add i32 %b, %a3 + %ab4 = sub i32 %b, %a4 + %ab5 = sub i32 %b, %a5 + %ab6 = sub i32 %b, %a6 + %ab7 = sub i32 %b, %a7 + %r0 = insertelement <8 x i32> undef, i32 %ab0, i32 0 + %r1 = insertelement <8 x i32> %r0, i32 %ab1, i32 1 + %r2 = insertelement <8 x i32> %r1, i32 %ab2, i32 2 + %r3 = insertelement <8 x i32> %r2, i32 %ab3, i32 3 + %r4 = insertelement <8 x i32> %r3, i32 %ab4, i32 4 + %r5 = insertelement <8 x i32> %r4, i32 %ab5, i32 5 + %r6 = insertelement <8 x i32> %r5, i32 %ab6, i32 6 + %r7 = insertelement <8 x i32> %r6, i32 %ab7, i32 7 + ret <8 x i32> %r7 +} -- 2.7.4