From: Craig Topper Date: Sun, 11 Dec 2016 00:23:50 +0000 (+0000) Subject: [X86][InstCombine] Teach InstCombineCalls to turn pshufb intrinsic into a shufflevect... X-Git-Tag: llvmorg-4.0.0-rc1~2411 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9a63d7ade59efdca1d27b5b3efb995bf50d7f6dc;p=platform%2Fupstream%2Fllvm.git [X86][InstCombine] Teach InstCombineCalls to turn pshufb intrinsic into a shufflevector if the indices are constant. llvm-svn: 289348 --- diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index c79aa05..9ae6285 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -805,11 +805,11 @@ static Value *simplifyX86pshufb(const IntrinsicInst &II, auto *VecTy = cast(II.getType()); auto *MaskEltTy = Type::getInt32Ty(II.getContext()); unsigned NumElts = VecTy->getNumElements(); - assert((NumElts == 16 || NumElts == 32) && + assert((NumElts == 16 || NumElts == 32 || NumElts == 64) && "Unexpected number of elements in shuffle mask!"); // Construct a shuffle mask from constant integers or UNDEFs. - Constant *Indexes[32] = {nullptr}; + Constant *Indexes[64] = {nullptr}; // Each byte in the shuffle control mask forms an index to permute the // corresponding byte in the destination operand. @@ -2081,6 +2081,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_ssse3_pshuf_b_128: case Intrinsic::x86_avx2_pshuf_b: + case Intrinsic::x86_avx512_pshuf_b_512: if (Value *V = simplifyX86pshufb(*II, *Builder)) return replaceInstUsesWith(*II, V); break; diff --git a/llvm/test/Transforms/InstCombine/x86-pshufb.ll b/llvm/test/Transforms/InstCombine/x86-pshufb.ll index 3ada4fbd..b37884d 100644 --- a/llvm/test/Transforms/InstCombine/x86-pshufb.ll +++ b/llvm/test/Transforms/InstCombine/x86-pshufb.ll @@ -19,6 +19,14 @@ define <32 x i8> @identity_test_avx2(<32 x i8> %InVec) { ret <32 x i8> %1 } +define <64 x i8> @identity_test_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @identity_test_avx512( +; CHECK-NEXT: ret <64 x i8> %InVec +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + ; Verify that instcombine is able to fold byte shuffles with zero masks. define <16 x i8> @fold_to_zero_vector(<16 x i8> %InVec) { @@ -37,6 +45,14 @@ define <32 x i8> @fold_to_zero_vector_avx2(<32 x i8> %InVec) { ret <32 x i8> %1 } +define <64 x i8> @fold_to_zero_vector_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @fold_to_zero_vector_avx512( +; CHECK-NEXT: ret <64 x i8> zeroinitializer +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + ; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector ; with a shuffle mask of all zeroes. @@ -63,6 +79,15 @@ define <32 x i8> @splat_test_avx2(<32 x i8> %InVec) { ret <32 x i8> %1 } +define <64 x i8> @splat_test_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @splat_test_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> undef, <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> zeroinitializer) + ret <64 x i8> %1 +} + ; Each of the byte shuffles in the following tests is equivalent to a blend between ; vector %InVec and a vector of all zeroes. @@ -174,6 +199,60 @@ define <32 x i8> @blend6_avx2(<32 x i8> %InVec) { ret <32 x i8> %1 } +define <64 x i8> @blend1_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @blend1_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> , <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +define <64 x i8> @blend2_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @blend2_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> , <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +define <64 x i8> @blend3_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @blend3_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> , <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +define <64 x i8> @blend4_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @blend4_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> , <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +define <64 x i8> @blend5_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @blend5_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> , <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +define <64 x i8> @blend6_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @blend6_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> , <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + ; movq idiom. define <16 x i8> @movq_idiom(<16 x i8> %InVec) { ; CHECK-LABEL: @movq_idiom( @@ -193,6 +272,15 @@ define <32 x i8> @movq_idiom_avx2(<32 x i8> %InVec) { ret <32 x i8> %1 } +define <64 x i8> @movq_idiom_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @movq_idiom_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> , <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + ; Vector permutations using byte shuffles. define <16 x i8> @permute1(<16 x i8> %InVec) { @@ -231,6 +319,24 @@ define <32 x i8> @permute2_avx2(<32 x i8> %InVec) { ret <32 x i8> %1 } +define <64 x i8> @permute1_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @permute1_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> undef, <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + +define <64 x i8> @permute2_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @permute2_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> undef, <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + ; Test that instcombine correctly folds a pshufb with values that ; are not -128 and that are not encoded in four bits. @@ -250,6 +356,14 @@ define <32 x i8> @identity_test_avx2_2(<32 x i8> %InVec) { ret <32 x i8> %1 } +define <64 x i8> @identity_test_avx512_2(<64 x i8> %InVec) { +; CHECK-LABEL: @identity_test_avx512_2( +; CHECK-NEXT: ret <64 x i8> %InVec +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + define <16 x i8> @fold_to_zero_vector_2(<16 x i8> %InVec) { ; CHECK-LABEL: @fold_to_zero_vector_2( ; CHECK-NEXT: ret <16 x i8> zeroinitializer @@ -266,6 +380,14 @@ define <32 x i8> @fold_to_zero_vector_avx2_2(<32 x i8> %InVec) { ret <32 x i8> %1 } +define <64 x i8> @fold_to_zero_vector_avx512_2(<64 x i8> %InVec) { +; CHECK-LABEL: @fold_to_zero_vector_avx512_2( +; CHECK-NEXT: ret <64 x i8> zeroinitializer +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + define <16 x i8> @permute3(<16 x i8> %InVec) { ; CHECK-LABEL: @permute3( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> @@ -284,6 +406,15 @@ define <32 x i8> @permute3_avx2(<32 x i8> %InVec) { ret <32 x i8> %1 } +define <64 x i8> @permute3_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @permute3_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> undef, <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + ; FIXME: Verify that instcombine is able to fold constant byte shuffles with undef mask elements. define <16 x i8> @fold_with_undef_elts(<16 x i8> %InVec) { @@ -304,6 +435,15 @@ define <32 x i8> @fold_with_undef_elts_avx2(<32 x i8> %InVec) { ret <32 x i8> %1 } +define <64 x i8> @fold_with_undef_elts_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @fold_with_undef_elts_avx512( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> , <64 x i32> +; CHECK-NEXT: ret <64 x i8> [[TMP1]] +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> ) + ret <64 x i8> %1 +} + define <16 x i8> @fold_with_allundef_elts(<16 x i8> %InVec) { ; CHECK-LABEL: @fold_with_allundef_elts( ; CHECK-NEXT: ret <16 x i8> undef @@ -320,5 +460,14 @@ define <32 x i8> @fold_with_allundef_elts_avx2(<32 x i8> %InVec) { ret <32 x i8> %1 } +define <64 x i8> @fold_with_allundef_elts_avx512(<64 x i8> %InVec) { +; CHECK-LABEL: @fold_with_allundef_elts_avx512( +; CHECK-NEXT: ret <64 x i8> undef +; + %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> undef) + ret <64 x i8> %1 +} + declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) +declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)