From 0bab0f6161193cd0cd24b7b0fc51590a60e810d2 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 25 May 2021 07:52:48 -0400 Subject: [PATCH] [InstCombine] canonicalize cast before unary shuffle We could go either direction on this transform. VectorCombine already goes this way for bitcasts (and handles more complicated cases using the cost model), so let's try cast-first. Deferring completely to VectorCombine is another possibility. But the backend should be able to invert this easily when the vectors have the same shape, so it doesn't seem like a transform that we need to avoid. The motivating example from https://llvm.org/PR49081 has an int-to-float sandwiched between 2 shuffles, and the backend currently does not reduce that, so on x86, we get something like: pshufd $249, %xmm0, %xmm0] cvtdq2ps %xmm0, %xmm0 shufps $144, %xmm0, %xmm0 ...instead of just a single conversion instruction. Differential Revision: https://reviews.llvm.org/D103038 --- .../Transforms/InstCombine/InstCombineCasts.cpp | 20 +++++++++- .../InstCombine/X86/x86-f16c-inseltpoison.ll | 4 +- llvm/test/Transforms/InstCombine/X86/x86-f16c.ll | 4 +- .../Transforms/InstCombine/shuffle-cast-dist.ll | 4 +- .../InstCombine/shufflevec-bitcast-inseltpoison.ll | 6 +-- .../Transforms/InstCombine/shufflevec-bitcast.ll | 6 +-- llvm/test/Transforms/InstCombine/vector-casts.ll | 45 ++++++++++++++++++---- 7 files changed, 68 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 785ee24..49c6057 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -277,13 +277,13 @@ InstCombinerImpl::isEliminableCastPair(const CastInst *CI1, /// Implement the transforms common to all CastInst visitors. Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) { Value *Src = CI.getOperand(0); + Type *Ty = CI.getType(); // Try to eliminate a cast of a cast. if (auto *CSrc = dyn_cast(Src)) { // A->B->C cast if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) { // The first cast (CSrc) is eliminable so we need to fix up or replace // the second cast (CI). CSrc will then have a good chance of being dead. - auto *Ty = CI.getType(); auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty); // Point debug users of the dying cast to the new one. if (CSrc->hasOneUse()) @@ -319,6 +319,24 @@ Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) { return NV; } + // Canonicalize a unary shuffle after the cast if neither operation changes + // the size or element size of the input vector. + // TODO: We could allow size-changing ops if that doesn't harm codegen. + // cast (shuffle X, Mask) --> shuffle (cast X), Mask + Value *X; + ArrayRef Mask; + if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) { + // TODO: Allow scalable vectors? + auto *SrcTy = dyn_cast(X->getType()); + auto *DestTy = dyn_cast(Ty); + if (SrcTy && DestTy && + SrcTy->getNumElements() == DestTy->getNumElements() && + SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) { + Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy); + return new ShuffleVectorInst(CastX, UndefValue::get(DestTy), Mask); + } + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/X86/x86-f16c-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-f16c-inseltpoison.ll index bc0b6792..041d55e 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-f16c-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-f16c-inseltpoison.ll @@ -24,8 +24,8 @@ define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) { ; All 8 elements required. define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) { ; CHECK-LABEL: @demand_vcvtph2ps_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <8 x half> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[A:%.*]] to <8 x half> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> undef, <8 x i32> ; CHECK-NEXT: [[CVTPH2PS:%.*]] = fpext <8 x half> [[TMP2]] to <8 x float> ; CHECK-NEXT: ret <8 x float> [[CVTPH2PS]] ; diff --git a/llvm/test/Transforms/InstCombine/X86/x86-f16c.ll b/llvm/test/Transforms/InstCombine/X86/x86-f16c.ll index 19a850c..bc7671e 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-f16c.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-f16c.ll @@ -24,8 +24,8 @@ define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) { ; All 8 elements required. define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) { ; CHECK-LABEL: @demand_vcvtph2ps_256( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> undef, <8 x i32> -; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <8 x half> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[A:%.*]] to <8 x half> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> undef, <8 x i32> ; CHECK-NEXT: [[CVTPH2PS:%.*]] = fpext <8 x half> [[TMP2]] to <8 x float> ; CHECK-NEXT: ret <8 x float> [[CVTPH2PS]] ; diff --git a/llvm/test/Transforms/InstCombine/shuffle-cast-dist.ll b/llvm/test/Transforms/InstCombine/shuffle-cast-dist.ll index c491b82..df0cb49 100644 --- a/llvm/test/Transforms/InstCombine/shuffle-cast-dist.ll +++ b/llvm/test/Transforms/InstCombine/shuffle-cast-dist.ll @@ -4,8 +4,8 @@ define <2 x float> @vtrn1(<2 x i32> %v) ; CHECK-LABEL: @vtrn1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[R_UNCASTED:%.*]] = shufflevector <2 x i32> [[V:%.*]], <2 x i32> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[R:%.*]] = bitcast <2 x i32> [[R_UNCASTED]] to <2 x float> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[V:%.*]] to <2 x float> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> undef, <2 x i32> zeroinitializer ; CHECK-NEXT: ret <2 x float> [[R]] ; { diff --git a/llvm/test/Transforms/InstCombine/shufflevec-bitcast-inseltpoison.ll b/llvm/test/Transforms/InstCombine/shufflevec-bitcast-inseltpoison.ll index 0a88370d..e1cf1cf 100644 --- a/llvm/test/Transforms/InstCombine/shufflevec-bitcast-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/shufflevec-bitcast-inseltpoison.ll @@ -56,9 +56,9 @@ define <4 x i16> @splat_bitcast_operand_uses(<8 x i8> %x) { define <4 x i32> @splat_bitcast_operand_same_size_src_elt(<4 x float> %x) { ; CHECK-LABEL: @splat_bitcast_operand_same_size_src_elt( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = bitcast <4 x float> [[S1]] to <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[S2]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[X:%.*]] to <4 x i32> +; CHECK-NEXT: [[BC:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[BC]] ; %s1 = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> %bc = bitcast <4 x float> %s1 to <4 x i32> diff --git a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll index 5f0ca89..ba57de3 100644 --- a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll +++ b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll @@ -56,9 +56,9 @@ define <4 x i16> @splat_bitcast_operand_uses(<8 x i8> %x) { define <4 x i32> @splat_bitcast_operand_same_size_src_elt(<4 x float> %x) { ; CHECK-LABEL: @splat_bitcast_operand_same_size_src_elt( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = bitcast <4 x float> [[S1]] to <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[S2]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[X:%.*]] to <4 x i32> +; CHECK-NEXT: [[BC:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[BC]] ; %s1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %bc = bitcast <4 x float> %s1 to <4 x i32> diff --git a/llvm/test/Transforms/InstCombine/vector-casts.ll b/llvm/test/Transforms/InstCombine/vector-casts.ll index 6bb4f45..0cabb62 100644 --- a/llvm/test/Transforms/InstCombine/vector-casts.ll +++ b/llvm/test/Transforms/InstCombine/vector-casts.ll @@ -413,8 +413,8 @@ define <2 x i64> @zext_less_casting_with_wideop(<2 x i64> %x, <2 x i64> %y) { define <4 x float> @sitofp_shuf(<4 x i32> %x) { ; CHECK-LABEL: @sitofp_shuf( -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: [[R:%.*]] = sitofp <4 x i32> [[S]] to <4 x float> +; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[X:%.*]] to <4 x float> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> ; CHECK-NEXT: ret <4 x float> [[R]] ; %s = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> @@ -424,8 +424,8 @@ define <4 x float> @sitofp_shuf(<4 x i32> %x) { define <3 x half> @uitofp_shuf(<3 x i16> %x) { ; CHECK-LABEL: @uitofp_shuf( -; CHECK-NEXT: [[S:%.*]] = shufflevector <3 x i16> [[X:%.*]], <3 x i16> poison, <3 x i32> -; CHECK-NEXT: [[R:%.*]] = uitofp <3 x i16> [[S]] to <3 x half> +; CHECK-NEXT: [[TMP1:%.*]] = uitofp <3 x i16> [[X:%.*]] to <3 x half> +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x half> [[TMP1]], <3 x half> undef, <3 x i32> ; CHECK-NEXT: ret <3 x half> [[R]] ; %s = shufflevector <3 x i16> %x, <3 x i16> poison, <3 x i32> @@ -435,8 +435,8 @@ define <3 x half> @uitofp_shuf(<3 x i16> %x) { define <4 x i64> @fptosi_shuf(<4 x double> %x) { ; CHECK-LABEL: @fptosi_shuf( -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[X:%.*]], <4 x double> poison, <4 x i32> -; CHECK-NEXT: [[R:%.*]] = fptosi <4 x double> [[S]] to <4 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = fptosi <4 x double> [[X:%.*]] to <4 x i64> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i64> [[R]] ; %s = shufflevector <4 x double> %x, <4 x double> poison, <4 x i32> @@ -446,8 +446,8 @@ define <4 x i64> @fptosi_shuf(<4 x double> %x) { define <2 x i32> @fptoui_shuf(<2 x float> %x) { ; CHECK-LABEL: @fptoui_shuf( -; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> -; CHECK-NEXT: [[R:%.*]] = fptoui <2 x float> [[S]] to <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = fptoui <2 x float> [[X:%.*]] to <2 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[R]] ; %s = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> @@ -455,6 +455,9 @@ define <2 x i32> @fptoui_shuf(<2 x float> %x) { ret <2 x i32> %r } +; negative test +; TODO: Should we reduce the width of the shuffle? + define <4 x half> @narrowing_sitofp_shuf(<4 x i32> %x) { ; CHECK-LABEL: @narrowing_sitofp_shuf( ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> @@ -466,6 +469,8 @@ define <4 x half> @narrowing_sitofp_shuf(<4 x i32> %x) { ret <4 x half> %r } +; negative test + define <4 x double> @widening_uitofp_shuf(<4 x i32> %x) { ; CHECK-LABEL: @widening_uitofp_shuf( ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> @@ -477,6 +482,8 @@ define <4 x double> @widening_uitofp_shuf(<4 x i32> %x) { ret <4 x double> %r } +; negative test + define <3 x i64> @fptosi_narrowing_shuf(<4 x double> %x) { ; CHECK-LABEL: @fptosi_narrowing_shuf( ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[X:%.*]], <4 x double> poison, <3 x i32> @@ -488,6 +495,9 @@ define <3 x i64> @fptosi_narrowing_shuf(<4 x double> %x) { ret <3 x i64> %r } +; negative test +; TODO: Should we reduce the width of the cast? + define <3 x i32> @fptoui_widening_shuf(<2 x float> %x) { ; CHECK-LABEL: @fptoui_widening_shuf( ; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <3 x i32> @@ -499,6 +509,9 @@ define <3 x i32> @fptoui_widening_shuf(<2 x float> %x) { ret <3 x i32> %r } +; negative test +; TODO: Should we reduce the width of the cast? + define <4 x half> @narrowing_sitofp_widening_shuf(<2 x i32> %x) { ; CHECK-LABEL: @narrowing_sitofp_widening_shuf( ; CHECK-NEXT: [[S:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <4 x i32> @@ -512,6 +525,8 @@ define <4 x half> @narrowing_sitofp_widening_shuf(<2 x i32> %x) { declare void @use(<4 x i32>) +; negative test + define <4 x float> @sitofp_shuf_extra_use(<4 x i32> %x) { ; CHECK-LABEL: @sitofp_shuf_extra_use( ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> @@ -524,3 +539,17 @@ define <4 x float> @sitofp_shuf_extra_use(<4 x i32> %x) { %r = sitofp <4 x i32> %s to <4 x float> ret <4 x float> %r } + +; negative test +; TODO: Allow scalable vectors? + +define @sitofp_shuf_scalable( %x) { +; CHECK-LABEL: @sitofp_shuf_scalable( +; CHECK-NEXT: [[S:%.*]] = shufflevector [[X:%.*]], poison, zeroinitializer +; CHECK-NEXT: [[R:%.*]] = sitofp [[S]] to +; CHECK-NEXT: ret [[R]] +; + %s = shufflevector %x, poison, zeroinitializer + %r = sitofp %s to + ret %r +} -- 2.7.4