From f4448063ccf1c2eda518ad326018276baff870d2 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 2 Apr 2020 13:44:50 -0400 Subject: [PATCH] [InstCombine] try to reduce shuffle with bitcasted operand shuf (bitcast X), undef, Mask --> bitcast X' The 'inverse shuffles' test (shuf_bitcast_operand) is a pattern in the motivating examples from PR35454: https://bugs.llvm.org/show_bug.cgi?id=35454 (see also D76727) We can deal with this class of patterns in generic instcombine because we are not creating any new shuffles, just a bitcast. Alive2 proof: http://volta.cs.utah.edu:8080/z/mwDUZf Differential Revision: https://reviews.llvm.org/D76844 --- .../InstCombine/InstCombineVectorOps.cpp | 32 ++++++++++++++++++++-- .../Transforms/InstCombine/shufflevec-bitcast.ll | 26 +++++++++++------- 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 2d72696..1e95f23 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1889,9 +1889,9 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) { Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); - if (auto *V = - SimplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(), - SVI.getType(), SQ.getWithInstruction(&SVI))) + SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI); + if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(), + SVI.getType(), ShufQuery)) return replaceInstUsesWith(SVI, V); // shuffle x, x, mask --> shuffle x, undef, mask' @@ -1899,6 +1899,32 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { unsigned LHSWidth = LHS->getType()->getVectorNumElements(); ArrayRef Mask = SVI.getShuffleMask(); Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); + + // Peek through a bitcasted shuffle operand by scaling the mask. If the + // simulated shuffle can simplify, then this shuffle is unnecessary: + // shuf (bitcast X), undef, Mask --> bitcast X' + // TODO: This could be extended to allow length-changing shuffles and/or casts + // to narrower elements. The transform might also be obsoleted if we + // allowed canonicalization of bitcasted shuffles. + Value *X; + if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) && + X->getType()->isVectorTy() && VWidth == LHSWidth && + X->getType()->getVectorNumElements() >= VWidth) { + // Create the scaled mask constant. + Type *XType = X->getType(); + unsigned XNumElts = XType->getVectorNumElements(); + assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast"); + unsigned ScaleFactor = XNumElts / VWidth; + SmallVector ScaledMask; + scaleShuffleMask(ScaleFactor, Mask, ScaledMask); + + // If the shuffled source vector simplifies, cast that value to this + // shuffle's type. + if (auto *V = SimplifyShuffleVectorInst(X, UndefValue::get(XType), + ScaledMask, XType, ShufQuery)) + return BitCastInst::Create(Instruction::BitCast, V, SVI.getType()); + } + if (LHS == RHS) { assert(!isa(RHS) && "Shuffle with 2 undef ops not simplified?"); // Remap any references to RHS to use LHS. diff --git a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll index 57b7f4c..5af4f50 100644 --- a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll +++ b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll @@ -21,11 +21,12 @@ define void @test(<16 x i8> %w, i32* %o1, float* %o2) { ret void } +; Shuffle-of-bitcast-splat --> splat-bitcast + define <4 x i16> @splat_bitcast_operand(<8 x i8> %x) { ; CHECK-LABEL: @splat_bitcast_operand( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> undef, <8 x i32> -; CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i16> [[BC]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> undef, <8 x i32> +; CHECK-NEXT: [[S2:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16> ; CHECK-NEXT: ret <4 x i16> [[S2]] ; %s1 = shufflevector <8 x i8> %x, <8 x i8> undef, <8 x i32> @@ -34,12 +35,14 @@ define <4 x i16> @splat_bitcast_operand(<8 x i8> %x) { ret <4 x i16> %s2 } +; Shuffle-of-bitcast-splat --> splat-bitcast + define <4 x i16> @splat_bitcast_operand_uses(<8 x i8> %x) { ; CHECK-LABEL: @splat_bitcast_operand_uses( ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> undef, <8 x i32> ; CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16> ; CHECK-NEXT: call void @use(<4 x i16> [[BC]]) -; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i16> [[BC]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16> ; CHECK-NEXT: ret <4 x i16> [[S2]] ; %s1 = shufflevector <8 x i8> %x, <8 x i8> undef, <8 x i32> @@ -49,11 +52,12 @@ define <4 x i16> @splat_bitcast_operand_uses(<8 x i8> %x) { ret <4 x i16> %s2 } +; Shuffle-of-bitcast-splat --> splat-bitcast + define <4 x i32> @splat_bitcast_operand_same_size_src_elt(<4 x float> %x) { ; CHECK-LABEL: @splat_bitcast_operand_same_size_src_elt( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> -; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x float> [[S1]] to <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = bitcast <4 x float> [[S1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S2]] ; %s1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> @@ -66,9 +70,7 @@ define <4 x i32> @splat_bitcast_operand_same_size_src_elt(<4 x float> %x) { define <4 x i32> @shuf_bitcast_operand(<16 x i8> %x) { ; CHECK-LABEL: @shuf_bitcast_operand( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> undef, <16 x i32> -; CHECK-NEXT: [[BC:%.*]] = bitcast <16 x i8> [[S1]] to <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = bitcast <16 x i8> [[X:%.*]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S2]] ; %s1 = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> @@ -77,6 +79,8 @@ define <4 x i32> @shuf_bitcast_operand(<16 x i8> %x) { ret <4 x i32> %s2 } +; TODO: Could allow fold for length-changing shuffles. + define <5 x i16> @splat_bitcast_operand_change_type(<8 x i8> %x) { ; CHECK-LABEL: @splat_bitcast_operand_change_type( ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> undef, <8 x i32> @@ -90,6 +94,8 @@ define <5 x i16> @splat_bitcast_operand_change_type(<8 x i8> %x) { ret <5 x i16> %s2 } +; TODO: Could allow fold for cast to narrow element. + define <4 x i16> @splat_bitcast_operand_wider_src_elt(<2 x i32> %x) { ; CHECK-LABEL: @splat_bitcast_operand_wider_src_elt( ; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32> -- 2.7.4